[Fix] Add smoke_mono3d_head unittest

98cc28e2 · ZCMax · ChaimZhu · f4f8ae22 · 98cc28e2 · 98cc28e2
Commit 98cc28e2 authored Jun 09, 2022 by ZCMax Committed by ChaimZhu Jul 20, 2022
3 changed files
--- a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
@@ -434,12 +434,12 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):

    @abstractmethod
    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'dir_cls_preds'))
-    def get_bboxes(self,
+    def get_results(self,
                    cls_scores,
                    bbox_preds,
                    dir_cls_preds,
                    attr_preds,
-                   img_metas,
+                    batch_img_metas,
                    cfg=None,
                    rescale=None):
        """Transform network output for a batch into bbox predictions.
@@ -454,7 +454,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
                the channel number is num_points * 2. (bin = 2)
            attr_preds (list[Tensor]): Attribute scores for each scale level
                Has shape (N, num_points * num_attrs, H, W)
-            img_metas (list[dict]): Meta information of each image, e.g.,
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
                image size, scaling factor, etc.
            cfg (mmcv.Config): Test / postprocessing configuration,
                if None, test_cfg would be used

--- a/mmdet3d/models/dense_heads/smoke_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/smoke_mono3d_head.py
@@ -4,12 +4,12 @@ from typing import List, Optional, Tuple, Union
 import torch
 from mmcv.runner import force_fp32
 from mmengine.config import ConfigDict
+from mmengine.data import InstanceData
 from torch import Tensor
 from torch.nn import functional as F

-from mmdet3d.registry import MODELS
+from mmdet3d.registry import MODELS, TASK_UTILS
 from mmdet.core import multi_apply
-from mmdet.core.bbox.builder import build_bbox_coder
 from mmdet.models.utils import gaussian_radius, gen_gaussian_target
 from mmdet.models.utils.gaussian_target import (get_local_maximum,
                                                get_topk_from_heatmap,
@@ -77,7 +77,7 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
            **kwargs)
        self.dim_channel = dim_channel
        self.ori_channel = ori_channel
-        self.bbox_coder = build_bbox_coder(bbox_coder)
+        self.bbox_coder = TASK_UTILS.build(bbox_coder)

    def forward(self, feats: Tuple[Tensor]):
        """Forward features from the upstream network.
@@ -139,12 +139,12 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
        """
        assert len(cls_scores) == len(bbox_preds) == 1
        cam2imgs = torch.stack([
-            cls_scores[0].new_tensor(img_metas['cam2img'])
-            for img_metas in batch_img_metas
+            cls_scores[0].new_tensor(img_meta['cam2img'])
+            for img_meta in batch_img_metas
        ])
        trans_mats = torch.stack([
-            cls_scores[0].new_tensor(img_metas['trans_mat'])
-            for img_metas in batch_img_metas
+            cls_scores[0].new_tensor(img_meta['trans_mat'])
+            for img_meta in batch_img_metas
        ])
        batch_bboxes, batch_scores, batch_topk_labels = self.decode_heatmap(
            cls_scores[0],
@@ -170,7 +170,16 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
            bboxes = batch_img_metas[img_id]['box_type_3d'](
                bboxes, box_dim=self.bbox_code_size, origin=(0.5, 0.5, 0.5))
            attrs = None
-            result_list.append((bboxes, scores, labels, attrs))
+
+            results = InstanceData()
+            results.bboxes_3d = bboxes
+            results.labels_3d = labels
+            results.scores_3d = scores
+
+            if attrs is not None:
+                results.attr_labels = attrs
+
+            result_list.append(results)

        return result_list

@@ -267,12 +276,12 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):
        batch, channel = pred_reg.shape[0], pred_reg.shape[1]
        w = pred_reg.shape[3]
        cam2imgs = torch.stack([
-            gt_locations.new_tensor(img_metas['cam2img'])
-            for img_metas in batch_img_metas
+            gt_locations.new_tensor(img_meta['cam2img'])
+            for img_meta in batch_img_metas
        ])
        trans_mats = torch.stack([
-            gt_locations.new_tensor(img_metas['trans_mat'])
-            for img_metas in batch_img_metas
+            gt_locations.new_tensor(img_meta['trans_mat'])
+            for img_meta in batch_img_metas
        ])
        centers_2d_inds = centers_2d[:, 1] * w + centers_2d[:, 0]
        centers_2d_inds = centers_2d_inds.view(batch, -1)
@@ -359,8 +368,8 @@ class SMOKEMono3DHead(AnchorFreeMono3DHead):

        reg_mask = torch.stack([
            gt_bboxes[0].new_tensor(
-                not img_metas['affine_aug'], dtype=torch.bool)
-            for img_metas in batch_img_metas
+                not img_meta['affine_aug'], dtype=torch.bool)
+            for img_meta in batch_img_metas
        ])

        img_h, img_w = img_shape[:2]

--- a/tests/test_models/test_dense_heads/test_smoke_mono3d_head.py
+++ b/tests/test_models/test_dense_heads/test_smoke_mono3d_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import numpy as np
+import torch
+from mmengine.data import InstanceData
+
+from mmdet3d.core.bbox import CameraInstance3DBoxes
+from mmdet3d.models.dense_heads import SMOKEMono3DHead
+
+
+class TestSMOKEMono3DHead(TestCase):
+
+    def test_smoke_mono3d_head_loss(self):
+        """Tests SMOKE head loss and inference."""
+
+        img_metas = [
+            dict(
+                cam2img=[[1260.8474446004698, 0.0, 807.968244525554, 40.1111],
+                         [0.0, 1260.8474446004698, 495.3344268742088, 2.34422],
+                         [0.0, 0.0, 1.0, 0.00333333], [0.0, 0.0, 0.0, 1.0]],
+                scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
+                pad_shape=[128, 128],
+                trans_mat=np.array(
+                    [[0.25, 0., 0.], [0., 0.25, 0], [0., 0., 1.]],
+                    dtype=np.float32),
+                affine_aug=False,
+                box_type_3d=CameraInstance3DBoxes)
+        ]
+
+        smoke_mono3d_head = SMOKEMono3DHead(
+            num_classes=3,
+            in_channels=64,
+            dim_channel=[3, 4, 5],
+            ori_channel=[6, 7],
+            stacked_convs=0,
+            feat_channels=64,
+            use_direction_classifier=False,
+            diff_rad_by_sin=False,
+            pred_attrs=False,
+            pred_velo=False,
+            dir_offset=0,
+            strides=None,
+            group_reg_dims=(8, ),
+            cls_branch=(256, ),
+            reg_branch=((256, ), ),
+            num_attrs=0,
+            bbox_code_size=7,
+            dir_branch=(),
+            attr_branch=(),
+            bbox_coder=dict(
+                type='SMOKECoder',
+                base_depth=(28.01, 16.32),
+                base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,
+                                                                    1.53)),
+                code_size=7),
+            loss_cls=dict(type='mmdet.GaussianFocalLoss', loss_weight=1.0),
+            loss_bbox=dict(
+                type='mmdet.L1Loss', reduction='sum', loss_weight=1 / 300),
+            loss_dir=dict(
+                type='mmdet.CrossEntropyLoss',
+                use_sigmoid=False,
+                loss_weight=1.0),
+            loss_attr=None,
+            conv_bias=True,
+            dcn_on_last_conv=False)
+
+        # SMOKE head expects a single level of features per image
+        feats = [torch.rand([1, 64, 32, 32], dtype=torch.float32)]
+
+        # Test forward
+        ret_dict = smoke_mono3d_head.forward(feats)
+
+        self.assertEqual(
+            len(ret_dict), 2, 'the length of forward feature should be 2')
+        self.assertEqual(
+            len(ret_dict[0]), 1, 'each feature should have 1 level')
+        self.assertEqual(
+            ret_dict[0][0].shape, torch.Size([1, 3, 32, 32]),
+            'the fist level feature shape should be [1, 3, 32, 32]')
+
+        # When truth is non-empty then all losses
+        # should be nonzero for random inputs
+        gt_instances_3d = InstanceData()
+
+        gt_bboxes = torch.Tensor([[1.0, 2.0, 20.0, 40.0],
+                                  [45.0, 50.0, 80.0, 70.1],
+                                  [34.0, 39.0, 65.0, 64.0]])
+        gt_bboxes_3d = CameraInstance3DBoxes(torch.rand([3, 7]), box_dim=7)
+        gt_labels = torch.randint(0, 3, [3])
+        gt_labels_3d = gt_labels
+        centers_2d = torch.randint(0, 60, (3, 2))
+        depths = torch.rand([3], dtype=torch.float32)
+
+        gt_instances_3d.bboxes_3d = gt_bboxes_3d
+        gt_instances_3d.labels_3d = gt_labels_3d
+        gt_instances_3d.bboxes = gt_bboxes
+        gt_instances_3d.labels = gt_labels
+        gt_instances_3d.centers_2d = centers_2d
+        gt_instances_3d.depths = depths
+
+        gt_losses = smoke_mono3d_head.loss(*ret_dict, [gt_instances_3d],
+                                           img_metas)
+
+        gt_cls_loss = gt_losses['loss_cls'].item()
+        gt_box_loss = gt_losses['loss_bbox'].item()
+
+        self.assertGreater(gt_cls_loss, 0, 'cls loss should be positive')
+        self.assertGreater(gt_box_loss, 0, 'bbox loss should be positive')
+
+        # test get_results
+        results_list = smoke_mono3d_head.get_results(*ret_dict, img_metas)
+        self.assertEqual(
+            len(results_list), 1, 'there should be one image results')
+        results = results_list[0]
+        pred_bboxes_3d = results.bboxes_3d
+        pred_scores_3d = results.scores_3d
+        pred_labels_3d = results.labels_3d
+
+        self.assertEqual(
+            pred_bboxes_3d.tensor.shape, torch.Size([100, 7]),
+            'the shape of predicted 3d bboxes should be [100, 7]')
+        self.assertEqual(
+            pred_scores_3d.shape, torch.Size([100]),
+            'the shape of predicted 3d bbox scores should be [100]')
+        self.assertEqual(
+            pred_labels_3d.shape, torch.Size([100]),
+            'the shape of predicted 3d bbox labels should be [100]')