[Refactor] PGD head

effec8c3 · ZCMax · ChaimZhu · d490f024 · effec8c3 · effec8c3
Commit effec8c3 authored Jun 09, 2022 by ZCMax Committed by ChaimZhu Jul 20, 2022
Showing with 320 additions and 129 deletions

mmdet3d/models/dense_heads/pgd_head.py mmdet3d/models/dense_heads/pgd_head.py +113 -129

tests/test_models/test_dense_heads/test_pgd_head.py tests/test_models/test_dense_heads/test_pgd_head.py +207 -0

No files found.
--- a/mmdet3d/models/dense_heads/pgd_head.py
+++ b/mmdet3d/models/dense_heads/pgd_head.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import Tuple
+
 import numpy as np
 import torch
 from mmcv.cnn import Scale, bias_init_with_prob, normal_init
 from mmcv.runner import force_fp32
+from mmengine.data import InstanceData
 from torch import nn as nn
 from torch.nn import functional as F

 from mmdet3d.core import box3d_multiclass_nms, xywhr2xyxyr
 from mmdet3d.core.bbox import points_cam2img, points_img2cam
-from mmdet3d.models.builder import build_loss
 from mmdet3d.registry import MODELS
 from mmdet.core import distance2bbox, multi_apply
 from .fcos_mono3d_head import FCOSMono3DHead
@@ -57,23 +59,28 @@ class PGDHead(FCOSMono3DHead):
    """

    def __init__(self,
-                 use_depth_classifier=True,
-                 use_onlyreg_proj=False,
-                 weight_dim=-1,
-                 weight_branch=((256, ), ),
-                 depth_branch=(64, ),
-                 depth_range=(0, 70),
-                 depth_unit=10,
-                 division='uniform',
-                 depth_bins=8,
-                 loss_depth=dict(
-                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
-                 loss_bbox2d=dict(
-                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
-                 loss_consistency=dict(type='GIoULoss', loss_weight=1.0),
-                 pred_bbox2d=True,
-                 pred_keypoints=False,
-                 bbox_coder=dict(
+                 use_depth_classifier: bool = True,
+                 use_onlyreg_proj: bool = False,
+                 weight_dim: int = -1,
+                 weight_branch: Tuple[Tuple] = ((256, ), ),
+                 depth_branch: Tuple = (64, ),
+                 depth_range: Tuple = (0, 70),
+                 depth_unit: int = 10,
+                 division: str = 'uniform',
+                 depth_bins: int = 8,
+                 loss_depth: dict = dict(
+                     type='mmdet.SmoothL1Loss',
+                     beta=1.0 / 9.0,
+                     loss_weight=1.0),
+                 loss_bbox2d: dict = dict(
+                     type='mmdet.SmoothL1Loss',
+                     beta=1.0 / 9.0,
+                     loss_weight=1.0),
+                 loss_consistency: dict = dict(
+                     type='mmdet.GIoULoss', loss_weight=1.0),
+                 pred_bbox2d: bool = True,
+                 pred_keypoints: bool = False,
+                 bbox_coder: dict = dict(
                     type='PGDBBoxCoder',
                     base_depths=((28.01, 16.32), ),
                     base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6),
@@ -107,10 +114,10 @@ class PGDHead(FCOSMono3DHead):
            self.num_depth_cls = depth_bins
        super().__init__(
            pred_bbox2d=pred_bbox2d, bbox_coder=bbox_coder, **kwargs)
-        self.loss_depth = build_loss(loss_depth)
+        self.loss_depth = MODELS.build(loss_depth)
        if self.pred_bbox2d:
-            self.loss_bbox2d = build_loss(loss_bbox2d)
-            self.loss_consistency = build_loss(loss_consistency)
+            self.loss_bbox2d = MODELS.build(loss_bbox2d)
+            self.loss_consistency = MODELS.build(loss_consistency)
        if self.pred_keypoints:
            self.kpts_start = 9 if self.pred_velo else 7

@@ -270,7 +277,7 @@ class PGDHead(FCOSMono3DHead):
                        bbox_targets_3d,
                        pos_points,
                        pos_inds,
-                        img_metas,
+                        batch_img_metas,
                        pos_depth_cls_preds=None,
                        pos_weights=None,
                        pos_cls_scores=None,
@@ -292,7 +299,7 @@ class PGDHead(FCOSMono3DHead):
            pos_points (Tensor): Foreground points.
            pos_inds (Tensor): Index of foreground points from flattened
                tensors.
-            img_metas (list[dict]): Meta information of each image, e.g.,
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.
            pos_depth_cls_preds (Tensor, optional): Probabilistic depth map of
                positive boxes on all the scale levels in shape
@@ -310,8 +317,8 @@ class PGDHead(FCOSMono3DHead):
            tuple[Tensor]: Exterior 2D boxes from projected 3D boxes,
                predicted 2D boxes and keypoint targets (if necessary).
        """
-        views = [np.array(img_meta['cam2img']) for img_meta in img_metas]
-        num_imgs = len(img_metas)
+        views = [np.array(img_meta['cam2img']) for img_meta in batch_img_metas]
+        num_imgs = len(batch_img_metas)
        img_idx = []
        for label in labels_3d:
            for idx in range(num_imgs):
@@ -411,13 +418,13 @@ class PGDHead(FCOSMono3DHead):
                centers2d_targets[:, 0] - cam2img[0, 2],
                cam2img[0, 0]) + pos_bbox_targets_3d[mask, 6]

-            corners = img_metas[0]['box_type_3d'](
+            corners = batch_img_metas[0]['box_type_3d'](
                pos_strided_bbox_preds[mask],
                box_dim=self.bbox_coder.bbox_code_size,
                origin=(0.5, 0.5, 0.5)).corners
            box_corners_in_image[mask] = points_cam2img(corners, cam2img)

-            corners_gt = img_metas[0]['box_type_3d'](
+            corners_gt = batch_img_metas[0]['box_type_3d'](
                pos_bbox_targets_3d[mask, :self.bbox_code_size],
                box_dim=self.bbox_coder.bbox_code_size,
                origin=(0.5, 0.5, 0.5)).corners
@@ -443,7 +450,7 @@ class PGDHead(FCOSMono3DHead):

    def get_pos_predictions(self, bbox_preds, dir_cls_preds, depth_cls_preds,
                            weights, attr_preds, centernesses, pos_inds,
-                            img_metas):
+                            batch_img_metas):
        """Flatten predictions and get positive ones.

        Args:
@@ -463,7 +470,7 @@ class PGDHead(FCOSMono3DHead):
                is a 4D-tensor, the channel number is num_points * 1.
            pos_inds (Tensor): Index of foreground points from flattened
                tensors.
-            img_metas (list[dict]): Meta information of each image, e.g.,
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.

        Returns:
@@ -532,15 +539,9 @@ class PGDHead(FCOSMono3DHead):
             weights,
             attr_preds,
             centernesses,
-             gt_bboxes,
-             gt_labels,
-             gt_bboxes_3d,
-             gt_labels_3d,
-             centers2d,
-             depths,
-             attr_labels,
-             img_metas,
-             gt_bboxes_ignore=None):
+             batch_gt_instances_3d,
+             batch_img_metas,
+             batch_gt_instances_ignore=None):
        """Compute loss of the head.

        Args:
@@ -564,21 +565,16 @@ class PGDHead(FCOSMono3DHead):
                num_points * num_attrs.
            centernesses (list[Tensor]): Centerness for each scale level, each
                is a 4D-tensor, the channel number is num_points * 1.
-            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
-                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
-            gt_labels (list[Tensor]): class indices corresponding to each box
-            gt_bboxes_3d (list[Tensor]): 3D boxes ground truth with shape of
-                (num_gts, code_size).
-            gt_labels_3d (list[Tensor]): same as gt_labels
-            centers2d (list[Tensor]): 2D centers on the image with shape of
-                (num_gts, 2).
-            depths (list[Tensor]): Depth ground truth with shape of
-                (num_gts, ).
-            attr_labels (list[Tensor]): Attributes indices of each box.
-            img_metas (list[dict]): Meta information of each image, e.g.,
+            batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
+                gt_instance_3d.  It usually includes ``bboxes``、``labels``
+                、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
+                attributes.
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.
-            gt_bboxes_ignore (list[Tensor]): specify which bounding boxes can
-                be ignored when computing the loss. Defaults to None.
+            batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
+                Batch of gt_instances_ignore. It includes ``bboxes`` attribute
+                data that is ignored during training and testing.
+                Defaults to None.

        Returns:
            dict[str, Tensor]: A dictionary of loss components.
@@ -595,8 +591,7 @@ class PGDHead(FCOSMono3DHead):
                                           bbox_preds[0].device)
        labels_3d, bbox_targets_3d, centerness_targets, attr_targets = \
            self.get_targets(
-                all_level_points, gt_bboxes, gt_labels, gt_bboxes_3d,
-                gt_labels_3d, centers2d, depths, attr_labels)
+                all_level_points, batch_gt_instances_3d)

        num_imgs = cls_scores[0].size(0)
        # flatten cls_scores and targets
@@ -629,7 +624,7 @@ class PGDHead(FCOSMono3DHead):
        pos_bbox_preds, pos_dir_cls_preds, pos_depth_cls_preds, pos_weights, \
            pos_attr_preds, pos_centerness = self.get_pos_predictions(
                bbox_preds, dir_cls_preds, depth_cls_preds, weights,
-                attr_preds, centernesses, pos_inds, img_metas)
+                attr_preds, centernesses, pos_inds, batch_img_metas)

        if num_pos > 0:
            pos_bbox_targets_3d = flatten_bbox_targets_3d[pos_inds]
@@ -679,7 +674,7 @@ class PGDHead(FCOSMono3DHead):

            proj_bbox2d_inputs = (bbox_preds, pos_dir_cls_preds, labels_3d,
                                  bbox_targets_3d, pos_points, pos_inds,
-                                  img_metas)
+                                  batch_img_metas)

            # direction classification loss
            # TODO: add more check for use_direction_classifier
@@ -793,7 +788,7 @@ class PGDHead(FCOSMono3DHead):
    @force_fp32(
        apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds',
                  'depth_cls_preds', 'weights', 'attr_preds', 'centernesses'))
-    def get_bboxes(self,
+    def get_results(self,
                    cls_scores,
                    bbox_preds,
                    dir_cls_preds,
@@ -801,7 +796,7 @@ class PGDHead(FCOSMono3DHead):
                    weights,
                    attr_preds,
                    centernesses,
-                   img_metas,
+                    batch_img_metas,
                    cfg=None,
                    rescale=None):
        """Transform network output for a batch into bbox predictions.
@@ -824,7 +819,7 @@ class PGDHead(FCOSMono3DHead):
                Has shape (N, num_points * num_attrs, H, W)
            centernesses (list[Tensor]): Centerness for each scale level with
                shape (N, num_points * 1, H, W)
-            img_metas (list[dict]): Meta information of each image, e.g.,
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.
            cfg (mmcv.Config, optional): Test / postprocessing configuration,
                if None, test_cfg would be used. Defaults to None.
@@ -849,7 +844,7 @@ class PGDHead(FCOSMono3DHead):
        mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
                                      bbox_preds[0].device)
        result_list = []
-        for img_id in range(len(img_metas)):
+        for img_id in range(len(batch_img_metas)):
            cls_score_list = [
                cls_scores[i][img_id].detach() for i in range(num_levels)
            ]
@@ -902,15 +897,15 @@ class PGDHead(FCOSMono3DHead):
            centerness_pred_list = [
                centernesses[i][img_id].detach() for i in range(num_levels)
            ]
-            input_meta = img_metas[img_id]
-            det_bboxes = self._get_bboxes_single(
+            img_meta = batch_img_metas[img_id]
+            results = self._get_results_single(
                cls_score_list, bbox_pred_list, dir_cls_pred_list,
                depth_cls_pred_list, weight_list, attr_pred_list,
-                centerness_pred_list, mlvl_points, input_meta, cfg, rescale)
-            result_list.append(det_bboxes)
+                centerness_pred_list, mlvl_points, img_meta, cfg, rescale)
+            result_list.append(results)
        return result_list

-    def _get_bboxes_single(self,
+    def _get_results_single(self,
                            cls_scores,
                            bbox_preds,
                            dir_cls_preds,
@@ -919,7 +914,7 @@ class PGDHead(FCOSMono3DHead):
                            attr_preds,
                            centernesses,
                            mlvl_points,
-                           input_meta,
+                            img_meta,
                            cfg,
                            rescale=False):
        """Transform outputs for a single batch item into bbox predictions.
@@ -943,7 +938,7 @@ class PGDHead(FCOSMono3DHead):
                with shape (num_points, H, W).
            mlvl_points (list[Tensor]): Box reference for a single scale level
                with shape (num_total_points, 2).
-            input_meta (dict): Metadata of input image.
+            img_meta (dict): Metadata of input image.
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used.
            rescale (bool, optional): If True, return boxes in original image
@@ -953,8 +948,8 @@ class PGDHead(FCOSMono3DHead):
            tuples[Tensor]: Predicted 3D boxes, scores, labels, attributes and
                2D boxes (if necessary).
        """
-        view = np.array(input_meta['cam2img'])
-        scale_factor = input_meta['scale_factor']
+        view = np.array(img_meta['cam2img'])
+        scale_factor = img_meta['scale_factor']
        cfg = self.test_cfg if cfg is None else cfg
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_centers2d = []
@@ -1045,7 +1040,7 @@ class PGDHead(FCOSMono3DHead):
            mlvl_depth_uncertainty.append(depth_uncertainty)
            if self.pred_bbox2d:
                bbox_pred2d = distance2bbox(
-                    points, bbox_pred2d, max_shape=input_meta['img_shape'])
+                    points, bbox_pred2d, max_shape=img_meta['img_shape'])
                mlvl_bboxes2d.append(bbox_pred2d)

        mlvl_centers2d = torch.cat(mlvl_centers2d)
@@ -1063,7 +1058,7 @@ class PGDHead(FCOSMono3DHead):
                                                 mlvl_dir_scores,
                                                 self.dir_offset, cam2img)

-        mlvl_bboxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d'](
+        mlvl_bboxes_for_nms = xywhr2xyxyr(img_meta['box_type_3d'](
            mlvl_bboxes,
            box_dim=self.bbox_coder.bbox_code_size,
            origin=(0.5, 0.5, 0.5)).bev)
@@ -1084,13 +1079,14 @@ class PGDHead(FCOSMono3DHead):
            if self.weight_dim != -1:
                mlvl_depth_uncertainty = torch.cat(mlvl_depth_uncertainty)
                mlvl_nms_scores *= mlvl_depth_uncertainty[:, None]
-        results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
+        nms_results = box3d_multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
                                           mlvl_nms_scores, cfg.score_thr,
-                                       cfg.max_per_img, cfg, mlvl_dir_scores,
-                                       mlvl_attr_scores, mlvl_bboxes2d)
-        bboxes, scores, labels, dir_scores, attrs = results[0:5]
+                                           cfg.max_per_img, cfg,
+                                           mlvl_dir_scores, mlvl_attr_scores,
+                                           mlvl_bboxes2d)
+        bboxes, scores, labels, dir_scores, attrs = nms_results[0:5]
        attrs = attrs.to(labels.dtype)  # change data type to int
-        bboxes = input_meta['box_type_3d'](
+        bboxes = img_meta['box_type_3d'](
            bboxes,
            box_dim=self.bbox_coder.bbox_code_size,
            origin=(0.5, 0.5, 0.5))
@@ -1101,37 +1097,32 @@ class PGDHead(FCOSMono3DHead):
        if not self.pred_attrs:
            attrs = None

-        outputs = (bboxes, scores, labels, attrs)
+        results = InstanceData()
+        results.bboxes_3d = bboxes
+        results.scores_3d = scores
+        results.labels_3d = labels
+
+        if attrs is not None:
+            results.attr_labels = attrs
+
        if self.pred_bbox2d:
-            bboxes2d = results[-1]
+            bboxes2d = nms_results[-1]
            bboxes2d = torch.cat([bboxes2d, scores[:, None]], dim=1)
-            outputs = outputs + (bboxes2d, )
+            results.bboxes = bboxes2d

-        return outputs
+        return results

-    def get_targets(self, points, gt_bboxes_list, gt_labels_list,
-                    gt_bboxes_3d_list, gt_labels_3d_list, centers2d_list,
-                    depths_list, attr_labels_list):
+    def get_targets(self, points, batch_gt_instances_3d):
        """Compute regression, classification and centerss targets for points
        in multiple images.

        Args:
            points (list[Tensor]): Points of each fpn level, each has shape
                (num_points, 2).
-            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
-                each has shape (num_gt, 4).
-            gt_labels_list (list[Tensor]): Ground truth labels of each box,
-                each has shape (num_gt,).
-            gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each
-                image, each has shape (num_gt, bbox_code_size).
-            gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each
-                box, each has shape (num_gt,).
-            centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
-                each has shape (num_gt, 2).
-            depths_list (list[Tensor]): Depth of projected 3D centers onto 2D
-                image, each has shape (num_gt, 1).
-            attr_labels_list (list[Tensor]): Attribute labels of each box,
-                each has shape (num_gt,).
+            batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
+                gt_instance_3d.  It usually includes ``bboxes``、``labels``
+                、``bboxes_3d``、``labels_3d``、``depths``、``centers_2d`` and
+                attributes.

        Returns:
            tuple:
@@ -1153,23 +1144,16 @@ class PGDHead(FCOSMono3DHead):
        # the number of points per img, per lvl
        num_points = [center.size(0) for center in points]

-        if attr_labels_list is None:
-            attr_labels_list = [
-                gt_labels.new_full(gt_labels.shape, self.attr_background_label)
-                for gt_labels in gt_labels_list
-            ]
+        if 'attr_labels' not in batch_gt_instances_3d[0]:
+            for gt_instances_3d in batch_gt_instances_3d:
+                gt_instances_3d.attr_labels = gt_instances_3d.labels.new_full(
+                    gt_instances_3d.labels.shape, self.attr_background_label)

        # get labels and bbox_targets of each image
        _, bbox_targets_list, labels_3d_list, bbox_targets_3d_list, \
            centerness_targets_list, attr_targets_list = multi_apply(
                self._get_target_single,
-                gt_bboxes_list,
-                gt_labels_list,
-                gt_bboxes_3d_list,
-                gt_labels_3d_list,
-                centers2d_list,
-                depths_list,
-                attr_labels_list,
+                batch_gt_instances_3d,
                points=concat_points,
                regress_ranges=concat_regress_ranges,
                num_points_per_lvl=num_points)

--- a/tests/test_models/test_dense_heads/test_pgd_head.py
+++ b/tests/test_models/test_dense_heads/test_pgd_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import mmcv
+import numpy as np
+import torch
+from mmengine.data import InstanceData
+
+from mmdet3d.core.bbox import CameraInstance3DBoxes
+from mmdet3d.models.dense_heads import PGDHead
+
+
+class TestFGDHead(TestCase):
+
+    def test_pgd_head_loss(self):
+        """Tests PGD head loss and inference."""
+
+        img_metas = [
+            dict(
+                img_shape=[384, 1248],
+                cam2img=[[721.5377, 0.0, 609.5593, 44.85728],
+                         [0.0, 721.5377, 172.854, 0.2163791],
+                         [0.0, 0.0, 1.0, 0.002745884], [0.0, 0.0, 0.0, 1.0]],
+                scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
+                box_type_3d=CameraInstance3DBoxes)
+        ]
+
+        train_cfg = dict(code_weight=[
+            1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+            0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0,
+            1.0
+        ])
+
+        test_cfg = dict(
+            use_rotate_nms=True,
+            nms_across_levels=False,
+            nms_pre=100,
+            nms_thr=0.05,
+            score_thr=0.001,
+            min_bbox_size=0,
+            max_per_img=20)
+
+        train_cfg = mmcv.Config(train_cfg)
+        test_cfg = mmcv.Config(test_cfg)
+
+        pgd_head = PGDHead(
+            num_classes=3,
+            in_channels=256,
+            stacked_convs=2,
+            feat_channels=256,
+            use_direction_classifier=True,
+            bbox_code_size=7,
+            diff_rad_by_sin=True,
+            pred_attrs=False,
+            pred_velo=False,
+            pred_bbox2d=True,
+            pred_keypoints=True,
+            use_onlyreg_proj=True,
+            dir_offset=0.7854,  # pi/4
+            dir_limit_offset=0,
+            strides=(4, 8, 16, 32),
+            regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 1e8)),
+            group_reg_dims=(2, 1, 3, 1, 16,
+                            4),  # offset, depth, size, rot, kpts, bbox2d
+            cls_branch=(256, ),
+            reg_branch=(
+                (256, ),  # offset
+                (256, ),  # depth
+                (256, ),  # size
+                (256, ),  # rot
+                (256, ),  # kpts
+                (256, )  # bbox2d
+            ),
+            dir_branch=(256, ),
+            attr_branch=(256, ),
+            centerness_branch=(256, ),
+            loss_cls=dict(
+                type='mmdet.FocalLoss',
+                use_sigmoid=True,
+                gamma=2.0,
+                alpha=0.25,
+                loss_weight=1.0),
+            loss_bbox=dict(
+                type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+            loss_dir=dict(
+                type='mmdet.CrossEntropyLoss',
+                use_sigmoid=False,
+                loss_weight=1.0),
+            loss_attr=dict(
+                type='mmdet.CrossEntropyLoss',
+                use_sigmoid=False,
+                loss_weight=1.0),
+            loss_centerness=dict(
+                type='mmdet.CrossEntropyLoss',
+                use_sigmoid=True,
+                loss_weight=1.0),
+            norm_on_bbox=True,
+            centerness_on_reg=True,
+            center_sampling=True,
+            conv_bias=True,
+            dcn_on_last_conv=False,
+            use_depth_classifier=True,
+            depth_branch=(256, ),
+            depth_range=(0, 70),
+            depth_unit=10,
+            division='uniform',
+            depth_bins=8,
+            weight_dim=1,
+            loss_depth=dict(
+                type='UncertainSmoothL1Loss',
+                alpha=1.0,
+                beta=3.0,
+                loss_weight=1.0),
+            bbox_coder=dict(
+                type='PGDBBoxCoder',
+                base_depths=((28.01, 16.32), ),
+                base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), (3.9, 1.56,
+                                                                 1.6)),
+                code_size=7),
+            train_cfg=train_cfg,
+            test_cfg=test_cfg)
+
+        # PGD head expects a multiple levels of features per image
+        feats = [
+            torch.rand([1, 256, 96, 312], dtype=torch.float32),
+            torch.rand([1, 256, 48, 156], dtype=torch.float32),
+            torch.rand([1, 256, 24, 78], dtype=torch.float32),
+            torch.rand([1, 256, 12, 39], dtype=torch.float32),
+        ]
+
+        # Test forward
+        ret_dict = pgd_head.forward(feats)
+
+        self.assertEqual(
+            len(ret_dict), 7, 'the length of forward feature should be 7')
+        self.assertEqual(
+            len(ret_dict[0]), 4, 'each feature should have 4 levels')
+        self.assertEqual(
+            ret_dict[0][0].shape, torch.Size([1, 3, 96, 312]),
+            'the fist level feature shape should be [1, 3, 96, 312]')
+
+        # When truth is non-empty then all losses
+        # should be nonzero for random inputs
+        gt_instances_3d = InstanceData()
+
+        gt_bboxes = torch.rand([3, 4], dtype=torch.float32)
+        gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7)
+        gt_labels = torch.randint(0, 3, [3])
+        gt_labels_3d = gt_labels
+        centers_2d = torch.rand([3, 2], dtype=torch.float32)
+        depths = torch.rand([3], dtype=torch.float32)
+
+        gt_instances_3d.bboxes_3d = gt_bboxes_3d
+        gt_instances_3d.labels_3d = gt_labels_3d
+        gt_instances_3d.bboxes = gt_bboxes
+        gt_instances_3d.labels = gt_labels
+        gt_instances_3d.centers_2d = centers_2d
+        gt_instances_3d.depths = depths
+
+        gt_losses = pgd_head.loss(*ret_dict, [gt_instances_3d], img_metas)
+
+        gt_cls_loss = gt_losses['loss_cls'].item()
+        gt_siz_loss = gt_losses['loss_size'].item()
+        gt_ctr_loss = gt_losses['loss_centerness'].item()
+        gt_off_loss = gt_losses['loss_offset'].item()
+        gt_dep_loss = gt_losses['loss_depth'].item()
+        gt_rot_loss = gt_losses['loss_rotsin'].item()
+        gt_kpt_loss = gt_losses['loss_kpts'].item()
+        gt_dir_loss = gt_losses['loss_dir'].item()
+        gt_box_loss = gt_losses['loss_bbox2d'].item()
+        gt_cos_loss = gt_losses['loss_consistency'].item()
+
+        self.assertGreater(gt_cls_loss, 0, 'cls loss should be positive')
+        self.assertGreater(gt_siz_loss, 0, 'size loss should be positive')
+        self.assertGreater(gt_ctr_loss, 0,
+                           'centerness loss should be positive')
+        self.assertGreater(gt_off_loss, 0, 'offset loss should be positive')
+        self.assertGreater(gt_dep_loss, 0, 'depth loss should be positive')
+        self.assertGreater(gt_rot_loss, 0, 'rotsin loss should be positive')
+        self.assertGreater(gt_kpt_loss, 0, 'keypoints loss should be positive')
+        self.assertGreater(gt_dir_loss, 0, 'direction loss should be positive')
+        self.assertGreater(gt_box_loss, 0, '2d bbox loss should be positive')
+        self.assertGreater(gt_cos_loss, 0,
+                           'consistency loss should be positive')
+
+        # test get_results
+        results_list = pgd_head.get_results(*ret_dict, img_metas)
+        self.assertEqual(
+            len(results_list), 1,
+            'there should be no centerness loss when there are no true boxes')
+        results = results_list[0]
+        pred_bboxes_3d = results.bboxes_3d
+        pred_scores_3d = results.scores_3d
+        pred_labels_3d = results.labels_3d
+        pred_bboxes_2d = results.bboxes
+        self.assertEqual(pred_bboxes_3d.tensor.shape, torch.Size([20, 7]),
+                         'the shape of predicted 3d bboxes should be [20, 7]')
+        self.assertEqual(
+            pred_scores_3d.shape, torch.Size([20]),
+            'the shape of predicted 3d bbox scores should be [20]')
+        self.assertEqual(
+            pred_labels_3d.shape, torch.Size([20]),
+            'the shape of predicted 3d bbox labels should be [20]')
+        self.assertEqual(
+            pred_bboxes_2d.shape, torch.Size([20, 5]),
+            'the shape of predicted 2d bbox attribute labels should be [20, 5]'
+        )