[Refactor] Refactor FCAF3D (#1945)

* add fcaf3d config * support inference of fcaf3d on scannet-3d * minor changes * fix config of scannet * align test precision of facaf3d && support training * add ut * fix bugs of dataset conversion and indoor_metric * support fcaf3d on sunrgbd datasets and add rorate_iou_loss ut * small fix * add docstring and typehint * add typehint for SparseTensor * remove pdb * fix fcaf3d ut

[Refactor] Refactor FCAF3D (#1945)
* add fcaf3d config * support inference of fcaf3d on scannet-3d * minor changes * fix config of scannet * align test precision of facaf3d && support training * add ut * fix bugs of dataset conversion and indoor_metric * support fcaf3d on sunrgbd datasets and add rorate_iou_loss ut * small fix * add docstring and typehint * add typehint for SparseTensor * remove pdb * fix fcaf3d ut
0be27ffb · Jingwei Zhang · ZwwWayne · 1e0e50d5 · 0be27ffb · 0be27ffb
Commit 0be27ffb authored Oct 31, 2022 by Jingwei Zhang Committed by ZwwWayne Dec 03, 2022
17 changed files
--- a/configs/_base_/datasets/scannet-3d.py
+++ b/configs/_base_/datasets/scannet-3d.py
@@ -8,7 +8,7 @@ metainfo = dict(
             'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
             'garbagebin'))
-file_client_args = dict(backend='disk')
+# file_client_args = dict(backend='disk')
 # Uncomment the following if use ceph or other file clients.
 # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 # for more details.

--- a/configs/_base_/models/fcaf3d.py
+++ b/configs/_base_/models/fcaf3d.py
+model = dict(
+    type='MinkSingleStage3DDetector',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(type='MinkResNet', in_channels=3, depth=34),
+    bbox_head=dict(
+        type='FCAF3DHead',
+        in_channels=(64, 128, 256, 512),
+        out_channels=128,
+        voxel_size=.01,
+        pts_prune_threshold=100000,
+        pts_assign_threshold=27,
+        pts_center_threshold=18,
+        num_classes=18,
+        num_reg_outs=6,
+        center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
+        bbox_loss=dict(type='AxisAlignedIoULoss'),
+        cls_loss=dict(type='mmdet.FocalLoss'),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
--- a/configs/fcaf3d/fcaf3d_8xb2_scannet-3d-18class.py
+++ b/configs/fcaf3d/fcaf3d_8xb2_scannet-3d-18class.py
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/scannet-3d.py'
+]
+n_points = 100000
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(type='LoadAnnotations3D'),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(type='PointSample', num_points=n_points),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[.9, 1.1],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=n_points),
+            dict(type='NormalizePointsColor', color_mean=None),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    dataset=dict(
+        type='RepeatDataset',
+        times=10,
+        dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
--- a/configs/fcaf3d/fcaf3d_8xb2_sunrgbd-3d-10class.py
+++ b/configs/fcaf3d/fcaf3d_8xb2_sunrgbd-3d-10class.py
+_base_ = [
+    '../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
+    '../_base_/datasets/sunrgbd-3d.py'
+]
+n_points = 100000
+model = dict(
+    bbox_head=dict(
+        num_classes=10,
+        num_reg_outs=8,
+        bbox_loss=dict(type='RotatedIoU3DLoss')))
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(type='LoadAnnotations3D'),
+    dict(type='PointSample', num_points=n_points),
+    dict(type='RandomFlip3D', sync_2d=False, flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.523599, 0.523599],
+        scale_ratio_range=[0.85, 1.15],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=n_points)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=8,
+    dataset=dict(
+        type='RepeatDataset',
+        times=3,
+        dataset=dict(pipeline=train_pipeline, filter_empty_gt=True)))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
+    clip_grad=dict(max_norm=10, norm_type=2))
+# learning rate
+param_scheduler = dict(
+    type='MultiStepLR',
+    begin=0,
+    end=12,
+    by_epoch=True,
+    milestones=[8, 11],
+    gamma=0.1)
+custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
--- a/mmdet3d/evaluation/metrics/indoor_metric.py
+++ b/mmdet3d/evaluation/metrics/indoor_metric.py
@@ -78,8 +78,9 @@ class IndoorMetric(BaseMetric):
            ann_infos.append(eval_ann)
            pred_results.append(sinlge_pred_results)
+        # some checkpoints may not record the key "box_type_3d"
        box_type_3d, box_mode_3d = get_box_type(
-            self.dataset_meta['box_type_3d'])
+            self.dataset_meta.get('box_type_3d', 'depth'))
        ret_dict = indoor_eval(
            ann_infos,

--- a/mmdet3d/models/backbones/mink_resnet.py
+++ b/mmdet3d/models/backbones/mink_resnet.py
@@ -58,7 +58,7 @@ class MinkResNet(nn.Module):
        for i, num_blocks in enumerate(stage_blocks):
            setattr(
-                self, f'layer{i}',
+                self, f'layer{i + 1}',
                self._make_layer(block, 64 * 2**i, stage_blocks[i], stride=2))
    def init_weights(self):
@@ -111,6 +111,6 @@ class MinkResNet(nn.Module):
            x = self.maxpool(x)
        outs = []
        for i in range(self.num_stages):
-            x = getattr(self, f'layer{i}')(x)
+            x = getattr(self, f'layer{i + 1}')(x)
            outs.append(x)
        return outs
--- a/mmdet3d/models/dense_heads/__init__.py
+++ b/mmdet3d/models/dense_heads/__init__.py
@@ -5,6 +5,7 @@ from .base_3d_dense_head import Base3DDenseHead
 from .base_conv_bbox_head import BaseConvBboxHead
 from .base_mono3d_dense_head import BaseMono3DDenseHead
 from .centerpoint_head import CenterHead
+from .fcaf3d_head import FCAF3DHead
 from .fcos_mono3d_head import FCOSMono3DHead
 from .free_anchor3d_head import FreeAnchor3DHead
 from .groupfree3d_head import GroupFree3DHead
@@ -22,5 +23,5 @@ __all__ = [
    'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
    'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
    'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
-    'MonoFlexHead', 'Base3DDenseHead'
+    'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead'
 ]
--- a/mmdet3d/models/dense_heads/fcaf3d_head.py
+++ b/mmdet3d/models/dense_heads/fcaf3d_head.py
--- a/mmdet3d/models/detectors/__init__.py
+++ b/mmdet3d/models/detectors/__init__.py
@@ -8,6 +8,7 @@ from .groupfree3dnet import GroupFree3DNet
 from .h3dnet import H3DNet
 from .imvotenet import ImVoteNet
 from .imvoxelnet import ImVoxelNet
+from .mink_single_stage import MinkSingleStage3DDetector
 from .multiview_dfm import MultiViewDfM
 from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
 from .mvx_two_stage import MVXTwoStageDetector
@@ -21,25 +22,9 @@ from .votenet import VoteNet
 from .voxelnet import VoxelNet
 __all__ = [
-    'Base3DDetector',
+    'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',
-    'DfM',
+    'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',
-    'VoxelNet',
+    'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',
-    'DynamicVoxelNet',
+    'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D',
-    'MVXTwoStageDetector',
+    'SASSD', 'MinkSingleStage3DDetector', 'MultiViewDfM', 'DfM'
-    'DynamicMVXFasterRCNN',
-    'MVXFasterRCNN',
-    'MultiViewDfM',
-    'PartA2',
-    'VoteNet',
-    'H3DNet',
-    'CenterPoint',
-    'SSD3DNet',
-    'ImVoteNet',
-    'SingleStageMono3DDetector',
-    'FCOSMono3D',
-    'ImVoxelNet',
-    'GroupFree3DNet',
-    'PointRCNN',
-    'SMOKEMono3D',
-    'SASSD',
 ]
--- a/mmdet3d/models/detectors/mink_single_stage.py
+++ b/mmdet3d/models/detectors/mink_single_stage.py
+# Copyright (c) OpenMMLab. All rights reserved.
+# Adapted from https://github.com/SamsungLabs/fcaf3d/blob/master/mmdet3d/models/detectors/single_stage_sparse.py # noqa
+from typing import Dict, Tuple, Union
+import torch
+from torch import Tensor
+try:
+    import MinkowskiEngine as ME
+except ImportError:
+    # Please follow getting_started.md to install MinkowskiEngine.
+    pass
+from mmdet3d.registry import MODELS
+from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
+from .single_stage import SingleStage3DDetector
+@MODELS.register_module()
+class MinkSingleStage3DDetector(SingleStage3DDetector):
+    r"""MinkSingleStage3DDetector.
+    This class serves as a base class for single-stage 3D detectors based on
+    MinkowskiEngine `GSDN <https://arxiv.org/abs/2006.12356>`_.
+    Args:
+        backbone (dict): Config dict of detector's backbone.
+        neck (dict, optional): Config dict of neck. Defaults to None.
+        bbox_head (dict, optional): Config dict of box head. Defaults to None.
+        train_cfg (dict, optional): Config dict of training hyper-parameters.
+            Defaults to None.
+        test_cfg (dict, optional): Config dict of test hyper-parameters.
+            Defaults to None.
+        data_preprocessor (dict or ConfigDict, optional): The pre-process
+            config of :class:`BaseDataPreprocessor`.  it usually includes,
+                ``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
+        init_cfg (dict or ConfigDict, optional): the config to control the
+            initialization. Defaults to None.
+    """
+    def __init__(self,
+                 backbone: ConfigType,
+                 neck: OptConfigType = None,
+                 bbox_head: OptConfigType = None,
+                 train_cfg: OptConfigType = None,
+                 test_cfg: OptConfigType = None,
+                 data_preprocessor: OptConfigType = None,
+                 init_cfg: OptMultiConfig = None) -> None:
+        super().__init__(
+            backbone=backbone,
+            neck=neck,
+            bbox_head=bbox_head,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            data_preprocessor=data_preprocessor,
+            init_cfg=init_cfg)
+        self.voxel_size = bbox_head['voxel_size']
+        # # TODO: unify the keys
+        # self.head = self.bbox_head
+    def extract_feat(
+        self, batch_inputs_dict: Dict[str, Tensor]
+    ) -> Union[Tuple[torch.Tensor], Dict[str, Tensor]]:
+        """Directly extract features from the backbone+neck.
+        Args:
+            batch_inputs_dict (dict): The model input dict which includes
+                'points' keys.
+                    - points (list[torch.Tensor]): Point cloud of each sample.
+        Returns:
+            tuple[Tensor] | dict:  For outside 3D object detection, we
+                typically obtain a tuple of features from the backbone + neck,
+                and for inside 3D object detection, usually a dict containing
+                features will be obtained.
+        """
+        points = batch_inputs_dict['points']
+        coordinates, features = ME.utils.batch_sparse_collate(
+            [(p[:, :3] / self.voxel_size, p[:, 3:]) for p in points],
+            device=points[0].device)
+        x = ME.SparseTensor(coordinates=coordinates, features=features)
+        x = self.backbone(x)
+        if self.with_neck:
+            x = self.neck(x)
+        return x
--- a/mmdet3d/models/detectors/single_stage.py
+++ b/mmdet3d/models/detectors/single_stage.py
@@ -143,7 +143,11 @@ class SingleStage3DDetector(Base3DDetector):
        """Directly extract features from the backbone+neck.
        Args:
-            points (torch.Tensor): Input points.
+            batch_inputs_dict (dict): The model input dict which include
+                'points', 'img' keys.
+                    - points (list[torch.Tensor]): Point cloud of each sample.
+                    - imgs (torch.Tensor, optional): Image of each sample.
        Returns:
            tuple[Tensor] | dict:  For outside 3D object detection, we

--- a/mmdet3d/models/losses/__init__.py
+++ b/mmdet3d/models/losses/__init__.py
@@ -4,11 +4,12 @@ from .axis_aligned_iou_loss import AxisAlignedIoULoss, axis_aligned_iou_loss
 from .chamfer_distance import ChamferDistance, chamfer_distance
 from .multibin_loss import MultiBinLoss
 from .paconv_regularization_loss import PAConvRegularizationLoss
+from .rotated_iou_loss import RotatedIoU3DLoss, rotated_iou_3d_loss
 from .uncertain_smooth_l1_loss import UncertainL1Loss, UncertainSmoothL1Loss
 __all__ = [
    'FocalLoss', 'SmoothL1Loss', 'binary_cross_entropy', 'ChamferDistance',
    'chamfer_distance', 'axis_aligned_iou_loss', 'AxisAlignedIoULoss',
    'PAConvRegularizationLoss', 'UncertainL1Loss', 'UncertainSmoothL1Loss',
-    'MultiBinLoss'
+    'MultiBinLoss', 'RotatedIoU3DLoss', 'rotated_iou_3d_loss'
 ]
--- a/mmdet3d/models/losses/rotated_iou_loss.py
+++ b/mmdet3d/models/losses/rotated_iou_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional
+import torch
+from mmcv.ops import diff_iou_rotated_3d
+from torch import Tensor
+from torch import nn as nn
+from mmdet3d.registry import MODELS
+from mmdet.models.losses.utils import weighted_loss
+@weighted_loss
+def rotated_iou_3d_loss(pred, target: Tensor) -> Tensor:
+    """Calculate the IoU loss (1-IoU) of two sets of rotated bounding boxes.
+    Note that predictions and targets are one-to-one corresponded.
+    Args:
+        pred (torch.Tensor): Bbox predictions with shape [N, 7]
+            (x, y, z, w, l, h, alpha).
+        target (torch.Tensor): Bbox targets (gt) with shape [N, 7]
+            (x, y, z, w, l, h, alpha).
+    Returns:
+        torch.Tensor: IoU loss between predictions and targets.
+    """
+    iou_loss = 1 - diff_iou_rotated_3d(pred.unsqueeze(0),
+                                       target.unsqueeze(0))[0]
+    return iou_loss
+@MODELS.register_module()
+class RotatedIoU3DLoss(nn.Module):
+    """Calculate the IoU loss (1-IoU) of rotated bounding boxes.
+    Args:
+        reduction (str): Method to reduce losses.
+            The valid reduction method are none, sum or mean.
+        loss_weight (float, optional): Weight of loss. Defaults to 1.0.
+    """
+    def __init__(self,
+                 reduction: str = 'mean',
+                 loss_weight: Optional[float] = 1.0):
+        super().__init__()
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+    def forward(self,
+                pred: Tensor,
+                target: Tensor,
+                weight: Optional[Tensor] = None,
+                avg_factor: Optional[int] = None,
+                reduction_override: Optional[str] = None,
+                **kwargs) -> Tensor:
+        """Forward function of loss calculation.
+        Args:
+            pred (torch.Tensor): Bbox predictions with shape [..., 7]
+                (x, y, z, w, l, h, alpha).
+            target (torch.Tensor): Bbox targets (gt) with shape [..., 7]
+                (x, y, z, w, l, h, alpha).
+            weight (torch.Tensor | float, optional): Weight of loss.
+                Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): Method to reduce losses.
+                The valid reduction method are 'none', 'sum' or 'mean'.
+                Defaults to None.
+        Returns:
+            torch.Tensor: IoU loss between predictions and targets.
+        """
+        if weight is not None and not torch.any(weight > 0):
+            return pred.sum() * weight.sum()  # 0
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        if weight is not None and weight.dim() > 1:
+            weight = weight.mean(-1)
+        loss = self.loss_weight * rotated_iou_3d_loss(
+            pred,
+            target,
+            weight,
+            reduction=reduction,
+            avg_factor=avg_factor,
+            **kwargs)
+        return loss
--- a/mmdet3d/visualization/local_visualizer.py
+++ b/mmdet3d/visualization/local_visualizer.py
@@ -9,10 +9,19 @@ from matplotlib.collections import PatchCollection
 from matplotlib.patches import PathPatch
 from matplotlib.path import Path
 from mmengine.dist import master_only
+from mmengine.structures import InstanceData
+from mmengine.visualization.utils import check_type, tensor2ndarray
 from torch import Tensor
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.structures import (BaseInstance3DBoxes, CameraInstance3DBoxes,
+                                Coord3DMode, DepthInstance3DBoxes,
+                                Det3DDataSample, LiDARInstance3DBoxes,
+                                PointData, points_cam2img)
 from mmdet3d.structures.bbox_3d.box_3d_mode import Box3DMode
 from mmdet.visualization import DetLocalVisualizer
+from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
+                        proj_lidar_bbox3d_to_img, to_depth_mode)
 try:
    import open3d as o3d
@@ -21,17 +30,6 @@ except ImportError:
    raise ImportError(
        'Please run "pip install open3d" to install open3d first.')
-from mmengine.structures import InstanceData
-from mmengine.visualization.utils import check_type, tensor2ndarray
-from mmdet3d.registry import VISUALIZERS
-from mmdet3d.structures import (BaseInstance3DBoxes, CameraInstance3DBoxes,
-                                Coord3DMode, DepthInstance3DBoxes,
-                                Det3DDataSample, LiDARInstance3DBoxes,
-                                PointData, points_cam2img)
-from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
-                        proj_lidar_bbox3d_to_img, to_depth_mode)
 @VISUALIZERS.register_module()
 class Det3DLocalVisualizer(DetLocalVisualizer):

--- a/tests/test_models/test_dense_heads/test_fcaf3d_head.py
+++ b/tests/test_models/test_dense_heads/test_fcaf3d_head.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+import pytest
+import torch
+from mmdet3d import *  # noqa
+from mmdet3d.models.dense_heads import FCAF3DHead
+from tests.utils.model_utils import _create_detector_inputs
+class TestAnchor3DHead(TestCase):
+    def test_fcaf3d_head_loss(self):
+        """Test anchor head loss when truth is empty and non-empty."""
+        if not torch.cuda.is_available():
+            pytest.skip('test requires GPU and torch+cuda')
+        try:
+            import MinkowskiEngine as ME
+        except ImportError:
+            pytest.skip('test requires MinkowskiEngine installation')
+        # build head
+        fcaf3d_head = FCAF3DHead(
+            in_channels=(64, 128, 256, 512),
+            out_channels=128,
+            voxel_size=1.,
+            pts_prune_threshold=1000,
+            pts_assign_threshold=27,
+            pts_center_threshold=18,
+            num_classes=18,
+            num_reg_outs=6,
+            test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01),
+            center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
+            bbox_loss=dict(type='AxisAlignedIoULoss'),
+            cls_loss=dict(type='mmdet.FocalLoss'),
+        )
+        fcaf3d_head = fcaf3d_head.cuda()
+        # fake input of head
+        coordinates, features = [torch.randn(500, 3).cuda() * 100
+                                 ], [torch.randn(500, 3).cuda()]
+        tensor_coordinates, tensor_features = ME.utils.sparse_collate(
+            coordinates, features)
+        x = ME.SparseTensor(
+            features=tensor_features, coordinates=tensor_coordinates)
+        # backbone
+        conv1 = ME.MinkowskiConvolution(
+            3, 64, kernel_size=3, stride=2, dimension=3).cuda()
+        conv2 = ME.MinkowskiConvolution(
+            64, 128, kernel_size=3, stride=2, dimension=3).cuda()
+        conv3 = ME.MinkowskiConvolution(
+            128, 256, kernel_size=3, stride=2, dimension=3).cuda()
+        conv4 = ME.MinkowskiConvolution(
+            256, 512, kernel_size=3, stride=2, dimension=3).cuda()
+        # backbone outputs of 4 levels
+        x1 = conv1(x)
+        x2 = conv2(x1)
+        x3 = conv3(x2)
+        x4 = conv4(x3)
+        x = (x1, x2, x3, x4)
+        # fake annotation
+        packed_inputs = _create_detector_inputs(
+            with_points=False,
+            with_img=False,
+            num_gt_instance=3,
+            num_classes=1,
+            points_feat_dim=6,
+            gt_bboxes_dim=6)
+        data_samples = [
+            sample.cuda() for sample in packed_inputs['data_samples']
+        ]
+        gt_losses = fcaf3d_head.loss(x, data_samples)
+        print(gt_losses)
+        self.assertGreaterEqual(gt_losses['cls_loss'], 0,
+                                'cls loss should be non-zero')
+        self.assertGreaterEqual(gt_losses['bbox_loss'], 0,
+                                'box loss should be non-zero')
+        self.assertGreaterEqual(gt_losses['center_loss'], 0,
+                                'dir loss should be none-zero')
--- a/tests/test_models/test_detectors/test_fcaf3d.py
+++ b/tests/test_models/test_detectors/test_fcaf3d.py
+import unittest
+import torch
+from mmengine import DefaultScope
+from mmdet3d.registry import MODELS
+from tests.utils.model_utils import (_create_detector_inputs,
+                                     _get_detector_cfg, _setup_seed)
+class TestFCAF3d(unittest.TestCase):
+    def test_fcaf3d(self):
+        try:
+            import MinkowskiEngine  # noqa: F401
+        except ImportError:
+            return
+        import mmdet3d.models
+        assert hasattr(mmdet3d.models, 'MinkSingleStage3DDetector')
+        DefaultScope.get_instance('test_fcaf3d', scope_name='mmdet3d')
+        _setup_seed(0)
+        fcaf3d_net_cfg = _get_detector_cfg(
+            'fcaf3d/fcaf3d_8xb2_scannet-3d-18class.py')
+        model = MODELS.build(fcaf3d_net_cfg)
+        num_gt_instance = 3
+        packed_inputs = _create_detector_inputs(
+            num_gt_instance=num_gt_instance,
+            num_classes=1,
+            points_feat_dim=6,
+            gt_bboxes_dim=6)
+        if torch.cuda.is_available():
+            model = model.cuda()
+            with torch.no_grad():
+                data = model.data_preprocessor(packed_inputs, False)
+                torch.cuda.empty_cache()
+                results = model.forward(**data, mode='predict')
+            self.assertEqual(len(results), 1)
+            self.assertIn('bboxes_3d', results[0].pred_instances_3d)
+            self.assertIn('scores_3d', results[0].pred_instances_3d)
+            self.assertIn('labels_3d', results[0].pred_instances_3d)
+            losses = model.forward(**data, mode='loss')
+            self.assertGreater(losses['center_loss'], 0)
+            self.assertGreater(losses['bbox_loss'], 0)
+            self.assertGreater(losses['cls_loss'], 0)
--- a/tests/test_models/test_losses/test_rotated_iou_loss.py
+++ b/tests/test_models/test_losses/test_rotated_iou_loss.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+from mmdet3d.models.losses import RotatedIoU3DLoss
+def test_rotated_iou_3d_loss():
+    if not torch.cuda.is_available():
+        return
+    boxes1 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
+                           [.5, .5, .5, 1., 1., 1., .0],
+                           [.5, .5, .5, 1., 1., 1., .0],
+                           [.5, .5, .5, 1., 1., 1., .0],
+                           [.5, .5, .5, 1., 1., 1., .0]]).cuda()
+    boxes2 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
+                           [.5, .5, .5, 1., 1., 2., np.pi / 2],
+                           [.5, .5, .5, 1., 1., 1., np.pi / 4],
+                           [1., 1., 1., 1., 1., 1., .0],
+                           [-1.5, -1.5, -1.5, 2.5, 2.5, 2.5, .0]]).cuda()
+    expect_ious = 1 - torch.tensor([[1., .5, .7071, 1 / 15, .0]]).cuda()
+    ious = RotatedIoU3DLoss(reduction='none')(boxes1, boxes2)
+    assert torch.allclose(ious, expect_ious, atol=1e-4)