[Refactor]: Refactor SASSD (#1901)

* refactor sassd * rename some varibles * add ut for sassd * add some comments * add some comments * [Refactor] Use `mmeval.MeanIoU` for `SegMetric` (#1929) * Use mmeval.MeanIoU * fix comments and add self.reset * solve some comments * fix docstring Co-authored-by: ChaimZhu <zhuchenming@pjlab.org.cn>

[Refactor]: Refactor SASSD (#1901)
* refactor sassd * rename some varibles * add ut for sassd * add some comments * add some comments * [Refactor] Use `mmeval.MeanIoU` for `SegMetric` (#1929) * Use mmeval.MeanIoU * fix comments and add self.reset * solve some comments * fix docstring Co-authored-by: ChaimZhu <zhuchenming@pjlab.org.cn>
b37dc416 · Jingwei Zhang · ZwwWayne · 5002b8e4 · b37dc416 · b37dc416
Commit b37dc416 authored Oct 24, 2022 by Jingwei Zhang Committed by ZwwWayne Dec 03, 2022
6 changed files
--- a/configs/sassd/sassd_8xb6-80e_kitti-3d-3class.py
+++ b/configs/sassd/sassd_8xb6-80e_kitti-3d-3class.py
@@ -7,11 +7,14 @@ voxel_size = [0.05, 0.05, 0.1]

 model = dict(
    type='SASSD',
-    voxel_layer=dict(
-        max_num_points=5,
-        point_cloud_range=[0, -40, -3, 70.4, 40, 1],
-        voxel_size=voxel_size,
-        max_voxels=(16000, 40000)),
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        voxel=True,
+        voxel_layer=dict(
+            max_num_points=5,
+            point_cloud_range=[0, -40, -3, 70.4, 40, 1],
+            voxel_size=voxel_size,
+            max_voxels=(16000, 40000))),
    voxel_encoder=dict(type='HardSimpleVFE'),
    middle_encoder=dict(
        type='SparseEncoderSASSD',
@@ -42,39 +45,41 @@ model = dict(
                [0, -40.0, -0.6, 70.4, 40.0, -0.6],
                [0, -40.0, -1.78, 70.4, 40.0, -1.78],
            ],
-            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
+            sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
            rotations=[0, 1.57],
            reshape_out=False),
        diff_rad_by_sin=True,
        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
        loss_cls=dict(
-            type='FocalLoss',
+            type='mmdet.FocalLoss',
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
-        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
+        loss_bbox=dict(
+            type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
        loss_dir=dict(
-            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
+            type='mmdet.CrossEntropyLoss', use_sigmoid=False,
+            loss_weight=0.2)),
    # model training and testing settings
    train_cfg=dict(
        assigner=[
            dict(  # for Pedestrian
-                type='MaxIoUAssigner',
+                type='Max3DIoUAssigner',
                iou_calculator=dict(type='BboxOverlapsNearest3D'),
                pos_iou_thr=0.35,
                neg_iou_thr=0.2,
                min_pos_iou=0.2,
                ignore_iof_thr=-1),
            dict(  # for Cyclist
-                type='MaxIoUAssigner',
+                type='Max3DIoUAssigner',
                iou_calculator=dict(type='BboxOverlapsNearest3D'),
                pos_iou_thr=0.35,
                neg_iou_thr=0.2,
                min_pos_iou=0.2,
                ignore_iof_thr=-1),
            dict(  # for Car
-                type='MaxIoUAssigner',
+                type='Max3DIoUAssigner',
                iou_calculator=dict(type='BboxOverlapsNearest3D'),
                pos_iou_thr=0.6,
                neg_iou_thr=0.45,

--- a/mmdet3d/evaluation/metrics/seg_metric.py
+++ b/mmdet3d/evaluation/metrics/seg_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import os.path as osp
-import tempfile
-from typing import Dict, Optional, Sequence
+import warnings
+from typing import Sequence

-import mmcv
-import numpy as np
-from mmengine.evaluator import BaseMetric
-from mmengine.logging import MMLogger
+from mmengine.logging import print_log
+from mmeval.metrics import MeanIoU
+from terminaltables import AsciiTable

-from mmdet3d.evaluation import seg_eval
 from mmdet3d.registry import METRICS


 @METRICS.register_module()
-class SegMetric(BaseMetric):
-    """3D semantic segmentation evaluation metric.
+class SegMetric(MeanIoU):
+    """A wrapper of ``mmeval.MeanIoU`` for 3D semantic segmentation.
+
+    This wrapper implements the `process` method that parses predictions and
+    labels from inputs. This enables ``mmengine.Evaluator`` to handle the data
+    flow of different tasks through a unified interface.
+    In addition, this wrapper also implements the ``evaluate`` method that
+    parses metric results and print pretty table of metrics per class.

    Args:
-        collect_device (str, optional): Device name used for collecting
-            results from different ranks during distributed training.
-            Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
-        prefix (str): The prefix that will be added in the metric
-            names to disambiguate homonymous metrics of different evaluators.
-            If prefix is not provided in the argument, self.default_prefix
-            will be used instead. Default: None.
-        pklfile_prefix (str, optional): The prefix of pkl files, including
-            the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
-        submission_prefix (str, optional): The prefix of submission data.
-            If not specified, the submission data will not be generated.
-            Default: None.
+        dist_backend (str | None): The name of the distributed communication
+            backend. Refer to :class:`mmeval.BaseMetric`.
+            Defaults to 'torch_cuda'.
+        **kwargs: Keyword parameters passed to :class:`mmeval.MeanIoU`.
    """

-    def __init__(self,
-                 collect_device: str = 'cpu',
-                 prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
-                 submission_prefix: str = None,
-                 **kwargs):
-        self.pklfile_prefix = pklfile_prefix
-        self.submission_prefix = submission_prefix
-        super(SegMetric, self).__init__(
-            prefix=prefix, collect_device=collect_device)
+    def __init__(self, dist_backend='torch_cpu', **kwargs):
+        iou_metrics = kwargs.pop('iou_metrics', None)
+        if iou_metrics is not None:
+            warnings.warn(
+                'DeprecationWarning: The `iou_metrics` parameter of '
+                '`IoUMetric` is deprecated, defaults return all metrics now!')
+        collect_device = kwargs.pop('collect_device', None)
+
+        if collect_device is not None:
+            warnings.warn(
+                'DeprecationWarning: The `collect_device` parameter of '
+                '`IoUMetric` is deprecated, use `dist_backend` instead.')
+
+        # Changes the default value of `classwise_results` to True.
+        super().__init__(
+            classwise_results=True, dist_backend=dist_backend, **kwargs)

    def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
        """Process one batch of data samples and predictions.
@@ -55,83 +55,60 @@ class SegMetric(BaseMetric):
            data_samples (Sequence[dict]): A batch of outputs from
                the model.
        """
+        predictions, labels = [], []
        for data_sample in data_samples:
-            pred_3d = data_sample['pred_pts_seg']
-            eval_ann_info = data_sample['eval_ann_info']
-            cpu_pred_3d = dict()
-            for k, v in pred_3d.items():
-                if hasattr(v, 'to'):
-                    cpu_pred_3d[k] = v.to('cpu').numpy()
-                else:
-                    cpu_pred_3d[k] = v
-            self.results.append((eval_ann_info, cpu_pred_3d))
-
-    def format_results(self, results):
-        r"""Format the results to txt file. Refer to `ScanNet documentation
-        <http://kaldir.vc.in.tum.de/scannet_benchmark/documentation>`_.
-
-        Args:
-            outputs (list[dict]): Testing results of the dataset.
-
-        Returns:
-            tuple: (outputs, tmp_dir), outputs is the detection results,
-                tmp_dir is the temporal directory created for saving submission
-                files when ``submission_prefix`` is not specified.
+            # (num_points, ) -> (num_points, 1)
+            pred = data_sample['pred_pts_seg']['pts_semantic_mask'].unsqueeze(
+                -1)
+            label = data_sample['gt_pts_seg']['pts_semantic_mask'].unsqueeze(
+                -1)
+            predictions.append(pred)
+            labels.append(label)
+        self.add(predictions, labels)
+
+    def evaluate(self, *args, **kwargs):
+        """Returns metric results and print pretty table of metrics per class.
+
+        This method would be invoked by ``mmengine.Evaluator``.
        """
-
-        submission_prefix = self.submission_prefix
-        if submission_prefix is None:
-            tmp_dir = tempfile.TemporaryDirectory()
-            submission_prefix = osp.join(tmp_dir.name, 'results')
-        mmcv.mkdir_or_exist(submission_prefix)
-        ignore_index = self.dataset_meta['ignore_index']
-        # need to map network output to original label idx
-        cat2label = np.zeros(len(self.dataset_meta['label2cat'])).astype(
-            np.int)
-        for original_label, output_idx in self.dataset_meta['label2cat'].items(
-        ):
-            if output_idx != ignore_index:
-                cat2label[output_idx] = original_label
-
-        for i, (eval_ann, result) in enumerate(results):
-            sample_idx = eval_ann['point_cloud']['lidar_idx']
-            pred_sem_mask = result['semantic_mask'].numpy().astype(np.int)
-            pred_label = cat2label[pred_sem_mask]
-            curr_file = f'{submission_prefix}/{sample_idx}.txt'
-            np.savetxt(curr_file, pred_label, fmt='%d')
-
-    def compute_metrics(self, results: list) -> Dict[str, float]:
-        """Compute the metrics from processed results.
-
-        Args:
-            results (list): The processed results of each batch.
-
-        Returns:
-            Dict[str, float]: The computed metrics. The keys are the names of
-            the metrics, and the values are corresponding results.
-        """
-        logger: MMLogger = MMLogger.get_current_instance()
-
-        if self.submission_prefix:
-            self.format_results(results)
-            return None
-
-        label2cat = self.dataset_meta['label2cat']
-        ignore_index = self.dataset_meta['ignore_index']
-
-        gt_semantic_masks = []
-        pred_semantic_masks = []
-
-        for eval_ann, sinlge_pred_results in results:
-            gt_semantic_masks.append(eval_ann['pts_semantic_mask'])
-            pred_semantic_masks.append(
-                sinlge_pred_results['pts_semantic_mask'])
-
-        ret_dict = seg_eval(
-            gt_semantic_masks,
-            pred_semantic_masks,
-            label2cat,
-            ignore_index,
-            logger=logger)
-
-        return ret_dict
+        metric_results = self.compute(*args, **kwargs)
+        self.reset()
+
+        classwise_results = metric_results['classwise_results']
+        del metric_results['classwise_results']
+
+        # Ascii table of the metric results per class.
+        header = ['Class']
+        header += classwise_results.keys()
+        classes = self.dataset_meta['classes']
+        table_data = [header]
+        for i in range(self.num_classes):
+            row_data = [classes[i]]
+            for _, value in classwise_results.items():
+                row_data.append(f'{value[i]*100:.2f}')
+            table_data.append(row_data)
+
+        table = AsciiTable(table_data)
+        print_log('per class results:', logger='current')
+        print_log('\n' + table.table, logger='current')
+
+        # Ascii table of the metric results overall.
+        header = ['Class']
+        header += metric_results.keys()
+
+        table_data = [header]
+        row_data = ['results']
+        for _, value in metric_results.items():
+            row_data.append(f'{value*100:.2f}')
+        table_data.append(row_data)
+        table = AsciiTable(table_data)
+        table.inner_footing_row_border = True
+        print_log('overall results:', logger='current')
+        print_log('\n' + table.table, logger='current')
+
+        # Multiply value by 100 to convert to percentage and rounding.
+        evaluate_results = {
+            k: round(v * 100, 2)
+            for k, v in metric_results.items()
+        }
+        return evaluate_results
--- a/mmdet3d/models/detectors/sassd.py
+++ b/mmdet3d/models/detectors/sassd.py
 # Copyright (c) OpenMMLab. All rights reserved.
-import torch
-from mmcv.ops import Voxelization
-from torch.nn import functional as F
+from typing import Tuple, Union

-from mmdet3d.models.test_time_augs import merge_aug_bboxes_3d
-from mmdet3d.structures.ops import bbox3d2result
-from mmdet.models.builder import DETECTORS
-from mmdet.registry import MODELS
+from torch import Tensor
+
+from mmdet3d.registry import MODELS
+from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
+from ...structures.det3d_data_sample import SampleList
 from .single_stage import SingleStage3DDetector


-@DETECTORS.register_module()
+@MODELS.register_module()
 class SASSD(SingleStage3DDetector):
    r"""`SASSD <https://github.com/skyhehe123/SA-SSD>` _ for 3D detection."""

    def __init__(self,
-                 voxel_layer,
-                 voxel_encoder,
-                 middle_encoder,
-                 backbone,
-                 neck=None,
-                 bbox_head=None,
-                 train_cfg=None,
-                 test_cfg=None,
-                 init_cfg=None,
-                 pretrained=None):
+                 voxel_encoder: ConfigType,
+                 middle_encoder: ConfigType,
+                 backbone: ConfigType,
+                 neck: OptConfigType = None,
+                 bbox_head: OptConfigType = None,
+                 train_cfg: OptConfigType = None,
+                 test_cfg: OptConfigType = None,
+                 data_preprocessor: OptConfigType = None,
+                 init_cfg: OptMultiConfig = None):
        super(SASSD, self).__init__(
            backbone=backbone,
            neck=neck,
            bbox_head=bbox_head,
            train_cfg=train_cfg,
            test_cfg=test_cfg,
-            init_cfg=init_cfg,
-            pretrained=pretrained)
+            data_preprocessor=data_preprocessor,
+            init_cfg=init_cfg)

-        self.voxel_layer = Voxelization(**voxel_layer)
        self.voxel_encoder = MODELS.build(voxel_encoder)
        self.middle_encoder = MODELS.build(middle_encoder)

-    def extract_feat(self, points, img_metas=None, test_mode=False):
-        """Extract features from points."""
-        voxels, num_points, coors = self.voxelize(points)
-        voxel_features = self.voxel_encoder(voxels, num_points, coors)
-        batch_size = coors[-1, 0].item() + 1
-        x, point_misc = self.middle_encoder(voxel_features, coors, batch_size,
+    def extract_feat(
+        self,
+        batch_inputs_dict: dict,
+        test_mode: bool = True
+    ) -> Union[Tuple[Tuple[Tensor], Tuple], Tuple[Tensor]]:
+        """Extract features from points.
+
+        Args:
+            batch_inputs_dict (dict): The batch inputs.
+            test_mode (bool, optional): Whether test mode. Defaults to True.
+
+        Returns:
+            Union[Tuple[Tuple[Tensor], Tuple], Tuple[Tensor]]: In test mode, it
+            returns the features of points from multiple levels. In training
+            mode, it returns the features of points from multiple levels and a
+            tuple containing the mean features of points and the targets of
+            clssification and regression.
+        """
+        voxel_dict = batch_inputs_dict['voxels']
+        voxel_features = self.voxel_encoder(voxel_dict['voxels'],
+                                            voxel_dict['num_points'],
+                                            voxel_dict['coors'])
+        batch_size = voxel_dict['coors'][-1, 0].item() + 1
+        # `point_misc` is a tuple containing the mean features of points and
+        # the targets of clssification and regression. It's only used for
+        # calculating auxiliary loss in training mode.
+        x, point_misc = self.middle_encoder(voxel_features,
+                                            voxel_dict['coors'], batch_size,
                                            test_mode)
        x = self.backbone(x)
        if self.with_neck:
            x = self.neck(x)
-        return x, point_misc

-    @torch.no_grad()
-    def voxelize(self, points):
-        """Apply hard voxelization to points."""
-        voxels, coors, num_points = [], [], []
-        for res in points:
-            res_voxels, res_coors, res_num_points = self.voxel_layer(res)
-            voxels.append(res_voxels)
-            coors.append(res_coors)
-            num_points.append(res_num_points)
-        voxels = torch.cat(voxels, dim=0)
-        num_points = torch.cat(num_points, dim=0)
-        coors_batch = []
-        for i, coor in enumerate(coors):
-            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
-            coors_batch.append(coor_pad)
-        coors_batch = torch.cat(coors_batch, dim=0)
-        return voxels, num_points, coors_batch
+        return (x, point_misc) if not test_mode else x

-    def forward_train(self,
-                      points,
-                      img_metas,
-                      gt_bboxes_3d,
-                      gt_labels_3d,
-                      gt_bboxes_ignore=None):
-        """Training forward function.
+    def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
+             **kwargs) -> dict:
+        """Calculate losses from a batch of inputs dict and data samples.

        Args:
-            points (list[torch.Tensor]): Point cloud of each sample.
-            img_metas (list[dict]): Meta information of each sample
-            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
-                boxes for each sample.
-            gt_labels_3d (list[torch.Tensor]): Ground truth labels for
-                boxes of each sampole
-            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
-                boxes to be ignored. Defaults to None.
+            batch_inputs_dict (dict): The model input dict which include
+                'points' keys.
+                    - points (list[torch.Tensor]): Point cloud of each sample.
+
+            batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
+                Samples. It usually includes information such as
+                `gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.

        Returns:
-            dict: Losses of each branch.
+            dict: A dictionary of loss components.
        """
-
-        x, point_misc = self.extract_feat(points, img_metas, test_mode=False)
-        aux_loss = self.middle_encoder.aux_loss(*point_misc, gt_bboxes_3d)
-
-        outs = self.bbox_head(x)
-        loss_inputs = outs + (gt_bboxes_3d, gt_labels_3d, img_metas)
-        losses = self.bbox_head.loss(
-            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
+        x, point_misc = self.extract_feat(batch_inputs_dict, test_mode=False)
+        batch_gt_bboxes_3d = [
+            data_sample.gt_instances_3d.bboxes_3d
+            for data_sample in batch_data_samples
+        ]
+        aux_loss = self.middle_encoder.aux_loss(*point_misc,
+                                                batch_gt_bboxes_3d)
+        losses = self.bbox_head.loss(x, batch_data_samples)
        losses.update(aux_loss)
        return losses
-
-    def simple_test(self, points, img_metas, imgs=None, rescale=False):
-        """Test function without augmentaiton."""
-        x, _ = self.extract_feat(points, img_metas, test_mode=True)
-        outs = self.bbox_head(x)
-        bbox_list = self.bbox_head.get_bboxes(
-            *outs, img_metas, rescale=rescale)
-        bbox_results = [
-            bbox3d2result(bboxes, scores, labels)
-            for bboxes, scores, labels in bbox_list
-        ]
-        return bbox_results
-
-    def aug_test(self, points, img_metas, imgs=None, rescale=False):
-        """Test function with augmentaiton."""
-        feats = self.extract_feats(points, img_metas, test_mode=True)
-
-        # only support aug_test for one sample
-        aug_bboxes = []
-        for x, img_meta in zip(feats, img_metas):
-            outs = self.bbox_head(x)
-            bbox_list = self.bbox_head.get_bboxes(
-                *outs, img_meta, rescale=rescale)
-            bbox_list = [
-                dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
-                for bboxes, scores, labels in bbox_list
-            ]
-            aug_bboxes.append(bbox_list[0])
-
-        # after merging, bboxes will be rescaled to the original image size
-        merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
-                                            self.bbox_head.test_cfg)
-
-        return [merged_bboxes]
--- a/mmdet3d/models/middle_encoders/sparse_encoder.py
+++ b/mmdet3d/models/middle_encoders/sparse_encoder.py
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Tuple
+
 import torch
 from mmcv.ops import points_in_boxes_all, three_interpolate, three_nn
+from torch import Tensor
 from torch import nn as nn

 from mmdet3d.models.layers import SparseBasicBlock, make_sparse_convmodule
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
 from mmdet3d.registry import MODELS
+from mmdet3d.structures import BaseInstance3DBoxes
 from mmdet.models.losses import sigmoid_focal_loss, smooth_l1_loss

 if IS_SPCONV2_AVAILABLE:
@@ -238,17 +242,17 @@ class SparseEncoderSASSD(SparseEncoder):
    """

    def __init__(self,
-                 in_channels,
-                 sparse_shape,
-                 order=('conv', 'norm', 'act'),
-                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-                 base_channels=16,
-                 output_channels=128,
-                 encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
-                                                                        64)),
-                 encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
-                                                                 1)),
-                 block_type='conv_module'):
+                 in_channels: int,
+                 sparse_shape: List[int],
+                 order: Tuple[str] = ('conv', 'norm', 'act'),
+                 norm_cfg: dict = dict(type='BN1d', eps=1e-3, momentum=0.01),
+                 base_channels: int = 16,
+                 output_channels: int = 128,
+                 encoder_channels: Tuple[tuple] = ((16, ), (32, 32, 32),
+                                                   (64, 64, 64), (64, 64, 64)),
+                 encoder_paddings: Tuple[tuple] = ((1, ), (1, 1, 1), (1, 1, 1),
+                                                   ((0, 1, 1), 1, 1)),
+                 block_type: str = 'conv_module'):
        super(SparseEncoderSASSD, self).__init__(
            in_channels=in_channels,
            sparse_shape=sparse_shape,
@@ -264,7 +268,11 @@ class SparseEncoderSASSD(SparseEncoder):
        self.point_cls = nn.Linear(64, 1, bias=False)
        self.point_reg = nn.Linear(64, 3, bias=False)

-    def forward(self, voxel_features, coors, batch_size, test_mode=False):
+    def forward(self,
+                voxel_features: Tensor,
+                coors: Tensor,
+                batch_size: Tensor,
+                test_mode: bool = False) -> Tuple[Tensor, tuple]:
        """Forward of SparseEncoder.

        Args:
@@ -276,7 +284,7 @@ class SparseEncoderSASSD(SparseEncoder):
                Defaults to False.

        Returns:
-            dict: Backbone features.
+            Tensor: Backbone features.
            tuple[torch.Tensor]: Mean feature value of the points,
                Classification result of the points,
                Regression offsets of the points.
@@ -333,14 +341,17 @@ class SparseEncoderSASSD(SparseEncoder):

        return spatial_features, point_misc

-    def get_auxiliary_targets(self, nxyz, gt_boxes3d, enlarge=1.0):
+    def get_auxiliary_targets(self,
+                              points_feats: Tensor,
+                              gt_bboxes_3d: List[BaseInstance3DBoxes],
+                              enlarge: float = 1.0) -> Tuple[Tensor, Tensor]:
        """Get auxiliary target.

        Args:
-            nxyz (torch.Tensor): Mean features of the points.
-            gt_boxes3d (torch.Tensor): Coordinates in shape (N, 4),
-                the columns in the order of (batch_idx, z_idx, y_idx, x_idx).
-            enlarge (int, optional): Enlaged scale. Defaults to 1.0.
+            points_feats (torch.Tensor): Mean features of the points.
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):  Ground truth
+                boxes for each sample.
+            enlarge (float, optional): Enlaged scale. Defaults to 1.0.

        Returns:
            tuple[torch.Tensor]: Label of the points and
@@ -348,31 +359,32 @@ class SparseEncoderSASSD(SparseEncoder):
        """
        center_offsets = list()
        pts_labels = list()
-        for i in range(len(gt_boxes3d)):
-            boxes3d = gt_boxes3d[i].tensor.cpu()
-            idx = torch.nonzero(nxyz[:, 0] == i).view(-1)
-            new_xyz = nxyz[idx, 1:].cpu()
+        for i in range(len(gt_bboxes_3d)):
+            boxes3d = gt_bboxes_3d[i].tensor.detach().clone()
+            idx = torch.nonzero(points_feats[:, 0] == i).view(-1)
+            point_xyz = points_feats[idx, 1:].detach().clone()

            boxes3d[:, 3:6] *= enlarge

            pts_in_flag, center_offset = self.calculate_pts_offsets(
-                new_xyz, boxes3d)
+                point_xyz, boxes3d)
            pts_label = pts_in_flag.max(0)[0].byte()
            pts_labels.append(pts_label)
            center_offsets.append(center_offset)

-        center_offsets = torch.cat(center_offsets).cuda()
+        center_offsets = torch.cat(center_offsets)
        pts_labels = torch.cat(pts_labels).to(center_offsets.device)

        return pts_labels, center_offsets

-    def calculate_pts_offsets(self, points, boxes):
+    def calculate_pts_offsets(self, points: Tensor,
+                              bboxes_3d: Tensor) -> Tuple[Tensor, Tensor]:
        """Find all boxes in which each point is, as well as the offsets from
        the box centers.

        Args:
-            points (torch.Tensor): [M, 3], [x, y, z] in LiDAR/DEPTH coordinate
-            boxes (torch.Tensor): [T, 7],
+            points (torch.Tensor): [M, 3], [x, y, z] in LiDAR coordinate
+            bboxes_3d (torch.Tensor): [T, 7],
                num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
                (x, y, z) is the bottom center.

@@ -383,44 +395,41 @@ class SparseEncoderSASSD(SparseEncoder):
                if it belows to the box, with the shape of (M, 3).
                Default background = 0.
        """
-        boxes_num = len(boxes)
+        boxes_num = len(bboxes_3d)
        pts_num = len(points)
-        points = points.cuda()
-        boxes = boxes.to(points.device)

-        box_idxs_of_pts = points_in_boxes_all(points[None, ...], boxes[None,
+        box_indices = points_in_boxes_all(points[None, ...], bboxes_3d[None,
                                                                       ...])
-
-        pts_indices = box_idxs_of_pts.squeeze(0).transpose(0, 1)
-
+        pts_indices = box_indices.squeeze(0).transpose(0, 1)
        center_offsets = torch.zeros_like(points).to(points.device)

        for i in range(boxes_num):
            for j in range(pts_num):
                if pts_indices[i][j] == 1:
-                    center_offsets[j][0] = points[j][0] - boxes[i][0]
-                    center_offsets[j][1] = points[j][1] - boxes[i][1]
+                    center_offsets[j][0] = points[j][0] - bboxes_3d[i][0]
+                    center_offsets[j][1] = points[j][1] - bboxes_3d[i][1]
                    center_offsets[j][2] = (
-                        points[j][2] - (boxes[i][2] + boxes[i][2] / 2.0))
-        return pts_indices.cpu(), center_offsets.cpu()
+                        points[j][2] -
+                        (bboxes_3d[i][2] + bboxes_3d[i][2] / 2.0))
+        return pts_indices, center_offsets

-    def aux_loss(self, points, point_cls, point_reg, gt_bboxes):
+    def aux_loss(self, points: Tensor, point_cls: Tensor, point_reg: Tensor,
+                 gt_bboxes_3d: Tensor) -> dict:
        """Calculate auxiliary loss.

        Args:
            points (torch.Tensor): Mean feature value of the points.
            point_cls (torch.Tensor): Classification result of the points.
            point_reg (torch.Tensor): Regression offsets of the points.
-            gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
                boxes for each sample.

        Returns:
-            dict: Backbone features.
+            dict: Auxiliary loss.
        """
-        num_boxes = len(gt_bboxes)
-
+        num_boxes = len(gt_bboxes_3d)
        pts_labels, center_targets = self.get_auxiliary_targets(
-            points, gt_bboxes)
+            points, gt_bboxes_3d)

        rpn_cls_target = pts_labels.long()
        pos = (pts_labels > 0).float()
@@ -449,11 +458,13 @@ class SparseEncoderSASSD(SparseEncoder):

        return dict(aux_loss_cls=aux_loss_cls, aux_loss_reg=aux_loss_reg)

-    def make_auxiliary_points(self,
-                              source_tensor,
-                              target,
-                              offset=(0., -40., -3.),
-                              voxel_size=(.05, .05, .1)):
+    def make_auxiliary_points(
+        self,
+        source_tensor: Tensor,
+        target: Tensor,
+        offset: Tuple = (0., -40., -3.),
+        voxel_size: Tuple = (.05, .05, .1)
+    ) -> Tensor:
        """Make auxiliary points for loss computation.

        Args:

--- a/tests/test_evaluation/test_metrics/test_seg_metric.py
+++ b/tests/test_evaluation/test_metrics/test_seg_metric.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import unittest

-import numpy as np
 import torch
 from mmengine.structures import BaseDataElement

@@ -13,19 +12,18 @@ class TestSegMetric(unittest.TestCase):

    def _demo_mm_model_output(self):
        """Create a superset of inputs needed to run test or train batches."""
-        pred_pts_semantic_mask = torch.Tensor([
+        pred_pts_semantic_mask = torch.LongTensor([
            0, 0, 1, 0, 0, 2, 1, 3, 1, 2, 1, 0, 2, 2, 2, 2, 1, 3, 0, 3, 3, 3, 3
        ])
        pred_pts_seg_data = dict(pts_semantic_mask=pred_pts_semantic_mask)
        data_sample = Det3DDataSample()
        data_sample.pred_pts_seg = PointData(**pred_pts_seg_data)

-        gt_pts_semantic_mask = np.array([
-            0, 0, 0, 255, 0, 0, 1, 1, 1, 255, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
-            3, 255
-        ])
-        ann_info_data = dict(pts_semantic_mask=gt_pts_semantic_mask)
-        data_sample.eval_ann_info = ann_info_data
+        gt_pts_semantic_mask = torch.LongTensor(([
+            0, 0, 0, 4, 0, 0, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4
+        ]))
+        gt_pts_seg_data = dict(pts_semantic_mask=gt_pts_semantic_mask)
+        data_sample.gt_pts_seg = PointData(**gt_pts_seg_data)

        batch_data_samples = [data_sample]

@@ -40,14 +38,8 @@ class TestSegMetric(unittest.TestCase):
    def test_evaluate(self):
        data_batch = {}
        predictions = self._demo_mm_model_output()
-        label2cat = {
-            0: 'car',
-            1: 'bicycle',
-            2: 'motorcycle',
-            3: 'truck',
-        }
-        dataset_meta = dict(label2cat=label2cat, ignore_index=255)
-        seg_metric = SegMetric()
+        dataset_meta = dict(classes=('car', 'bicyle', 'motorcycle', 'truck'))
+        seg_metric = SegMetric(ignore_index=len(dataset_meta['classes']))
        seg_metric.dataset_meta = dataset_meta
        seg_metric.process(data_batch, predictions)
        res = seg_metric.evaluate(1)

--- a/tests/test_models/test_detectors/test_sassd.py
+++ b/tests/test_models/test_detectors/test_sassd.py
+import unittest
+
+import torch
+from mmengine import DefaultScope
+
+from mmdet3d.registry import MODELS
+from tests.utils.model_utils import (_create_detector_inputs,
+                                     _get_detector_cfg, _setup_seed)
+
+
+class TestSDSSD(unittest.TestCase):
+
+    def test_3dssd(self):
+        import mmdet3d.models
+
+        assert hasattr(mmdet3d.models, 'SASSD')
+        DefaultScope.get_instance('test_sassd', scope_name='mmdet3d')
+        _setup_seed(0)
+        voxel_net_cfg = _get_detector_cfg(
+            'sassd/sassd_8xb6-80e_kitti-3d-3class.py')
+        model = MODELS.build(voxel_net_cfg)
+        num_gt_instance = 3
+        packed_inputs = _create_detector_inputs(
+            num_gt_instance=num_gt_instance, num_classes=1)
+
+        if torch.cuda.is_available():
+            model = model.cuda()
+            # test simple_test
+            with torch.no_grad():
+                data = model.data_preprocessor(packed_inputs, True)
+                torch.cuda.empty_cache()
+                results = model.forward(**data, mode='predict')
+            self.assertEqual(len(results), 1)
+            self.assertIn('bboxes_3d', results[0].pred_instances_3d)
+            self.assertIn('scores_3d', results[0].pred_instances_3d)
+            self.assertIn('labels_3d', results[0].pred_instances_3d)
+
+            losses = model.forward(**data, mode='loss')
+            self.assertGreaterEqual(losses['loss_dir'][0], 0)
+            self.assertGreaterEqual(losses['loss_bbox'][0], 0)
+            self.assertGreaterEqual(losses['loss_cls'][0], 0)
+            self.assertGreater(losses['aux_loss_cls'][0], 0)
+            self.assertGreater(losses['aux_loss_reg'][0], 0)