[Feature] Add panoptic segmentation metric (#2230)

* panoptic segmentation metric * fix * update * update * fix * fix * fix * fix * rename * merge * merge * fix test * update * update * update

[Feature] Add panoptic segmentation metric (#2230)
* panoptic segmentation metric * fix * update * update * fix * fix * fix * fix * rename * merge * merge * fix test * update * update * update
4cfae3f0 · xizaoqu · GitHub · f4ae6d75 · 4cfae3f0 · 4cfae3f0
Unverified Commit 4cfae3f0 authored Feb 15, 2023 by xizaoqu Committed by GitHub Feb 15, 2023
7 changed files
--- a/mmdet3d/evaluation/__init__.py
+++ b/mmdet3d/evaluation/__init__.py
@@ -6,10 +6,11 @@ from .functional import (aggregate_predictions, average_precision,
                         eval_det_cls, eval_map_recall, fast_hist, get_acc,
                         get_acc_cls, get_classwise_aps, get_single_class_aps,
                         indoor_eval, instance_seg_eval, load_lyft_gts,
-                         load_lyft_predictions, lyft_eval, per_class_iou,
-                         rename_gt, seg_eval)
+                         load_lyft_predictions, lyft_eval, panoptic_seg_eval,
+                         per_class_iou, rename_gt, seg_eval)
 from .metrics import (IndoorMetric, InstanceSegMetric, KittiMetric, LyftMetric,
-                      NuScenesMetric, SegMetric, WaymoMetric)
+                      NuScenesMetric, PanopticSegMetric, SegMetric,
+                      WaymoMetric)

 __all__ = [
    'kitti_eval_coco_style', 'kitti_eval', 'indoor_eval', 'lyft_eval',
@@ -19,5 +20,6 @@ __all__ = [
    'get_classwise_aps', 'get_single_class_aps', 'fast_hist', 'per_class_iou',
    'get_acc', 'get_acc_cls', 'seg_eval', 'KittiMetric', 'NuScenesMetric',
    'IndoorMetric', 'LyftMetric', 'SegMetric', 'InstanceSegMetric',
-    'WaymoMetric', 'eval_class', 'do_eval'
+    'WaymoMetric', 'eval_class', 'do_eval', 'PanopticSegMetric',
+    'panoptic_seg_eval'
 ]
--- a/mmdet3d/evaluation/functional/__init__.py
+++ b/mmdet3d/evaluation/functional/__init__.py
@@ -6,6 +6,7 @@ from .instance_seg_eval import (aggregate_predictions, instance_seg_eval,
 from .kitti_utils import do_eval, kitti_eval, kitti_eval_coco_style
 from .lyft_eval import (get_classwise_aps, get_single_class_aps, load_lyft_gts,
                        load_lyft_predictions, lyft_eval)
+from .panoptic_seg_eval import panoptic_seg_eval
 from .scannet_utils import evaluate_matches, scannet_eval
 from .seg_eval import fast_hist, get_acc, get_acc_cls, per_class_iou, seg_eval

@@ -15,5 +16,5 @@ __all__ = [
    'load_lyft_predictions', 'lyft_eval', 'get_classwise_aps',
    'get_single_class_aps', 'fast_hist', 'per_class_iou', 'get_acc',
    'get_acc_cls', 'seg_eval', 'kitti_eval', 'kitti_eval_coco_style',
-    'scannet_eval', 'evaluate_matches', 'do_eval'
+    'scannet_eval', 'evaluate_matches', 'do_eval', 'panoptic_seg_eval'
 ]
--- a/mmdet3d/evaluation/functional/panoptic_seg_eval.py
+++ b/mmdet3d/evaluation/functional/panoptic_seg_eval.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, List, Tuple
+
+import numpy as np
+from mmengine.logging import MMLogger, print_log
+
+PQReturnsType = Tuple[np.double, np.double, np.ndarray, np.ndarray, np.ndarray]
+
+
+class EvalPanoptic:
+    r"""Evaluate panoptic results for Semantickitti and NuScenes.
+    Please refer to the `semantic kitti api
+    <https://github.com/PRBonn/semantic-kitti-api/>`_ for more details
+
+    Args:
+        classes (list): Classes used in the dataset.
+        thing_classes (list): Thing classes used in the dataset.
+        stuff_classes (list): Stuff classes used in the dataset.
+        min_num_points (int): Minimum number of points of an object to be
+            counted as ground truth in evaluation.
+        id_offset (int): Offset for instance ids to concat with
+            semantic labels.
+        label2cat (dict[str]): Mapping from label to category.
+        ignore_index (list[int]): Indices of ignored classes in evaluation.
+        logger (logging.Logger | str, optional): Logger used for printing.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 classes: List[str],
+                 thing_classes: List[str],
+                 stuff_classes: List[str],
+                 min_num_points: int,
+                 id_offset: int,
+                 label2cat: Dict[str, str],
+                 ignore_index: List[str],
+                 logger: MMLogger = None):
+        self.classes = classes
+        self.thing_classes = thing_classes
+        self.stuff_classes = stuff_classes
+        self.ignore_index = np.array(ignore_index, dtype=int)
+        self.num_classes = len(classes)
+        self.label2cat = label2cat
+        self.logger = logger
+        self.include = np.array(
+            [n for n in range(self.num_classes) if n not in self.ignore_index],
+            dtype=int)
+        self.id_offset = id_offset
+        self.eps = 1e-15
+        self.min_num_points = min_num_points
+        self.reset()
+
+    def reset(self):
+        """Reset class variables."""
+        # general things
+        # iou stuff
+        self.confusion_matrix = np.zeros((self.num_classes, self.num_classes),
+                                         dtype=int)
+        # panoptic stuff
+        self.pan_tp = np.zeros(self.num_classes, dtype=int)
+        self.pan_iou = np.zeros(self.num_classes, dtype=np.double)
+        self.pan_fp = np.zeros(self.num_classes, dtype=int)
+        self.pan_fn = np.zeros(self.num_classes, dtype=int)
+
+        self.evaluated_fnames = []
+
+    def evaluate(self, gt_labels: List[Dict[str, np.ndarray]],
+                 seg_preds: List[Dict[str, np.ndarray]]) -> Dict[str, float]:
+        """Evaluate the predictions.
+
+        Args:
+            gt_labels (list[dict[np.ndarray]]): Ground Truth.
+            seg_preds (list[dict[np.ndarray]]): Predictions.
+
+        Returns:
+            dict[float]: The computed metrics. The keys are the names of
+            the metrics, and the values are corresponding results.
+        """
+        assert len(seg_preds) == len(gt_labels)
+        for f in range(len(seg_preds)):
+            gt_semantic_seg = gt_labels[f]['pts_semantic_mask'].astype(int)
+            gt_instance_seg = gt_labels[f]['pts_instance_mask'].astype(int)
+            pred_semantic_seg = seg_preds[f]['pts_semantic_mask'].astype(int)
+            pred_instance_seg = seg_preds[f]['pts_instance_mask'].astype(int)
+
+            self.add_semantic_sample(pred_semantic_seg, gt_semantic_seg)
+            self.add_panoptic_sample(pred_semantic_seg, gt_semantic_seg,
+                                     pred_instance_seg, gt_instance_seg)
+
+        result_dicts = self.print_results()
+
+        return result_dicts
+
+    def print_results(self) -> Dict[str, float]:
+        """Print results.
+
+        Returns:
+            dict[float]: The computed metrics. The keys are the names of
+            the metrics, and the values are corresponding results.
+        """
+        pq, sq, rq, all_pq, all_sq, all_rq = self.get_pq()
+        miou, iou = self.get_iou()
+
+        # now make a nice dictionary
+        output_dict = {}
+
+        # make python variables
+        pq = pq.item()
+        sq = sq.item()
+        rq = rq.item()
+        all_pq = all_pq.flatten().tolist()
+        all_sq = all_sq.flatten().tolist()
+        all_rq = all_rq.flatten().tolist()
+        miou = miou.item()
+        iou = iou.flatten().tolist()
+
+        output_dict['all'] = {}
+        output_dict['all']['pq'] = pq
+        output_dict['all']['sq'] = sq
+        output_dict['all']['rq'] = rq
+        output_dict['all']['miou'] = miou
+        for idx, (_pq, _sq, _rq,
+                  _iou) in enumerate(zip(all_pq, all_sq, all_rq, iou)):
+            class_str = self.classes[idx]
+            output_dict[class_str] = {}
+            output_dict[class_str]['pq'] = _pq
+            output_dict[class_str]['sq'] = _sq
+            output_dict[class_str]['rq'] = _rq
+            output_dict[class_str]['miou'] = _iou
+
+        pq_dagger = np.mean(
+            [float(output_dict[c]['pq']) for c in self.thing_classes] +
+            [float(output_dict[c]['miou']) for c in self.stuff_classes])
+
+        pq_things = np.mean(
+            [float(output_dict[c]['pq']) for c in self.thing_classes])
+        rq_things = np.mean(
+            [float(output_dict[c]['rq']) for c in self.thing_classes])
+        sq_things = np.mean(
+            [float(output_dict[c]['sq']) for c in self.thing_classes])
+
+        pq_stuff = np.mean(
+            [float(output_dict[c]['pq']) for c in self.stuff_classes])
+        rq_stuff = np.mean(
+            [float(output_dict[c]['rq']) for c in self.stuff_classes])
+        sq_stuff = np.mean(
+            [float(output_dict[c]['sq']) for c in self.stuff_classes])
+
+        result_dicts = {}
+        result_dicts['pq'] = float(pq)
+        result_dicts['pq_dagger'] = float(pq_dagger)
+        result_dicts['sq_mean'] = float(sq)
+        result_dicts['rq_mean'] = float(rq)
+        result_dicts['miou'] = float(miou)
+        result_dicts['pq_stuff'] = float(pq_stuff)
+        result_dicts['rq_stuff'] = float(rq_stuff)
+        result_dicts['sq_stuff'] = float(sq_stuff)
+        result_dicts['pq_things'] = float(pq_things)
+        result_dicts['rq_things'] = float(rq_things)
+        result_dicts['sq_things'] = float(sq_things)
+
+        if self.logger is not None:
+            print_log('|        |   IoU   |   PQ   |   RQ   |  SQ   |',
+                      self.logger)
+            for k, v in output_dict.items():
+                print_log(
+                    '|{}| {:.4f} | {:.4f} | {:.4f} | {:.4f} |'.format(
+                        k.ljust(8)[-8:], v['miou'], v['pq'], v['rq'], v['sq']),
+                    self.logger)
+            print_log('True Positive: ', self.logger)
+            print_log('\t|\t'.join([str(x) for x in self.pan_tp]), self.logger)
+            print_log('False Positive: ')
+            print_log('\t|\t'.join([str(x) for x in self.pan_fp]), self.logger)
+            print_log('False Negative: ')
+            print_log('\t|\t'.join([str(x) for x in self.pan_fn]), self.logger)
+
+        else:
+            print('|        |   IoU   |   PQ   |   RQ   |  SQ   |')
+            for k, v in output_dict.items():
+                print('|{}| {:.4f} | {:.4f} | {:.4f} | {:.4f} |'.format(
+                    k.ljust(8)[-8:], v['miou'], v['pq'], v['rq'], v['sq']))
+            print('True Positive: ')
+            print('\t|\t'.join([str(x) for x in self.pan_tp]))
+            print('False Positive: ')
+            print('\t|\t'.join([str(x) for x in self.pan_fp]))
+            print('False Negative: ')
+            print('\t|\t'.join([str(x) for x in self.pan_fn]))
+
+        return result_dicts
+
+    def get_pq(self) -> PQReturnsType:
+        """Get results of PQ metric.
+
+        Returns:
+            tuple(np.ndarray): PQ, SQ, RQ of each class and all class.
+        """
+        # get PQ and first calculate for all classes
+        sq_all = self.pan_iou.astype(np.double) / np.maximum(
+            self.pan_tp.astype(np.double), self.eps)
+        rq_all = self.pan_tp.astype(np.double) / np.maximum(
+            self.pan_tp.astype(np.double) + 0.5 * self.pan_fp.astype(np.double)
+            + 0.5 * self.pan_fn.astype(np.double), self.eps)
+        pq_all = sq_all * rq_all
+
+        # then do the REAL mean (no ignored classes)
+        sq = sq_all[self.include].mean()
+        rq = rq_all[self.include].mean()
+        pq = pq_all[self.include].mean()
+
+        return (pq, sq, rq, pq_all, sq_all, rq_all)
+
+    def get_iou(self) -> Tuple[np.double, np.ndarray]:
+        """Get results of IOU metric.
+
+        Returns:
+            tuple(np.ndarray): iou of all class and each class.
+        """
+        tp, fp, fn = self.get_iou_stats()
+        intersection = tp
+        union = tp + fp + fn
+        union = np.maximum(union, self.eps)
+        iou = intersection.astype(np.double) / union.astype(np.double)
+        iou_mean = (intersection[self.include].astype(np.double) /
+                    union[self.include].astype(np.double)).mean()
+
+        return iou_mean, iou
+
+    def get_iou_stats(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """Get IOU statistics of TP, FP and FN.
+
+        Returns:
+            tuple(np.ndarray): TP, FP, FN of all class.
+        """
+        # copy to avoid modifying the real deal
+        conf = self.confusion_matrix.copy().astype(np.double)
+        # remove fp from confusion on the ignore classes predictions
+        # points that were predicted of another class, but were ignore
+        # (corresponds to zeroing the cols of those classes,
+        # since the predictions go on the rows)
+        conf[:, self.ignore_index] = 0
+
+        # get the clean stats
+        tp = conf.diagonal()
+        fp = conf.sum(axis=1) - tp
+        fn = conf.sum(axis=0) - tp
+        return tp, fp, fn
+
+    def add_semantic_sample(self, semantic_preds: np.ndarray,
+                            gt_semantics: np.ndarray):
+        """Add one batch of semantic predictions and ground truths.
+
+        Args:
+            semantic_preds (np.ndarray): Semantic predictions.
+            gt_semantics (np.ndarray): Semantic ground truths.
+        """
+        idxs = np.stack([semantic_preds, gt_semantics], axis=0)
+        # make confusion matrix (cols = gt, rows = pred)
+        np.add.at(self.confusion_matrix, tuple(idxs), 1)
+
+    def add_panoptic_sample(self, semantic_preds: np.ndarray,
+                            gt_semantics: np.ndarray,
+                            instance_preds: np.ndarray,
+                            gt_instances: np.ndarray):
+        """Add one sample of panoptic predictions and ground truths for
+        evaluation.
+
+        Args:
+            semantic_preds (np.ndarray): Semantic predictions.
+            gt_semantics (np.ndarray): Semantic ground truths.
+            instance_preds (np.ndarray): Instance predictions.
+            gt_instances (np.ndarray): Instance ground truths.
+        """
+        # avoid zero (ignored label)
+        instance_preds = instance_preds + 1
+        gt_instances = gt_instances + 1
+
+        # only interested in points that are
+        # outside the void area (not in excluded classes)
+        for cl in self.ignore_index:
+            # make a mask for this class
+            gt_not_in_excl_mask = gt_semantics != cl
+            # remove all other points
+            semantic_preds = semantic_preds[gt_not_in_excl_mask]
+            gt_semantics = gt_semantics[gt_not_in_excl_mask]
+            instance_preds = instance_preds[gt_not_in_excl_mask]
+            gt_instances = gt_instances[gt_not_in_excl_mask]
+
+        # first step is to count intersections > 0.5 IoU
+        # for each class (except the ignored ones)
+        for cl in self.include:
+            # get a class mask
+            pred_inst_in_cl_mask = semantic_preds == cl
+            gt_inst_in_cl_mask = gt_semantics == cl
+
+            # get instance points in class (makes outside stuff 0)
+            pred_inst_in_cl = instance_preds * pred_inst_in_cl_mask.astype(int)
+            gt_inst_in_cl = gt_instances * gt_inst_in_cl_mask.astype(int)
+
+            # generate the areas for each unique instance prediction
+            unique_pred, counts_pred = np.unique(
+                pred_inst_in_cl[pred_inst_in_cl > 0], return_counts=True)
+            id2idx_pred = {id: idx for idx, id in enumerate(unique_pred)}
+            matched_pred = np.array([False] * unique_pred.shape[0])
+
+            # generate the areas for each unique instance gt_np
+            unique_gt, counts_gt = np.unique(
+                gt_inst_in_cl[gt_inst_in_cl > 0], return_counts=True)
+            id2idx_gt = {id: idx for idx, id in enumerate(unique_gt)}
+            matched_gt = np.array([False] * unique_gt.shape[0])
+
+            # generate intersection using offset
+            valid_combos = np.logical_and(pred_inst_in_cl > 0,
+                                          gt_inst_in_cl > 0)
+            id_offset_combo = pred_inst_in_cl[
+                valid_combos] + self.id_offset * gt_inst_in_cl[valid_combos]
+            unique_combo, counts_combo = np.unique(
+                id_offset_combo, return_counts=True)
+
+            # generate an intersection map
+            # count the intersections with over 0.5 IoU as TP
+            gt_labels = unique_combo // self.id_offset
+            pred_labels = unique_combo % self.id_offset
+            gt_areas = np.array([counts_gt[id2idx_gt[id]] for id in gt_labels])
+            pred_areas = np.array(
+                [counts_pred[id2idx_pred[id]] for id in pred_labels])
+            intersections = counts_combo
+            unions = gt_areas + pred_areas - intersections
+            ious = intersections.astype(float) / unions.astype(float)
+
+            tp_indexes = ious > 0.5
+            self.pan_tp[cl] += np.sum(tp_indexes)
+            self.pan_iou[cl] += np.sum(ious[tp_indexes])
+
+            matched_gt[[id2idx_gt[id] for id in gt_labels[tp_indexes]]] = True
+            matched_pred[[id2idx_pred[id]
+                          for id in pred_labels[tp_indexes]]] = True
+
+            # count the FN
+            if len(counts_gt) > 0:
+                self.pan_fn[cl] += np.sum(
+                    np.logical_and(counts_gt >= self.min_num_points,
+                                   ~matched_gt))
+
+            # count the FP
+            if len(matched_pred) > 0:
+                self.pan_fp[cl] += np.sum(
+                    np.logical_and(counts_pred >= self.min_num_points,
+                                   ~matched_pred))
+
+
+def panoptic_seg_eval(gt_labels: List[np.ndarray],
+                      seg_preds: List[np.ndarray],
+                      classes: List[str],
+                      thing_classes: List[str],
+                      stuff_classes: List[str],
+                      min_num_points: int,
+                      id_offset: int,
+                      label2cat: Dict[str, str],
+                      ignore_index: List[int],
+                      logger: MMLogger = None) -> Dict[str, float]:
+    """Panoptic Segmentation Evaluation.
+
+    Evaluate the result of the panoptic segmentation.
+
+    Args:
+        gt_labels (list[dict[np.ndarray]]): Ground Truth.
+        seg_preds (list[dict[np.ndarray]]): Predictions.
+        classes (list[str]): Classes used in the dataset.
+        thing_classes (list[str]): Thing classes used in the dataset.
+        stuff_classes (list[str]): Stuff classes used in the dataset.
+        min_num_points (int): Minimum point number of object to be
+            counted as ground truth in evaluation.
+        id_offset (int): Offset for instance ids to concat with
+            semantic labels.
+        label2cat (dict[str]): Mapping from label to category.
+        ignore_index (list[int]): Indices of ignored classes in evaluation.
+        logger (logging.Logger | str, optional): Logger used for printing.
+            Defaults to None.
+
+    Returns:
+        dict[float]: Dict of results.
+    """
+    panoptic_seg_eval = EvalPanoptic(classes, thing_classes, stuff_classes,
+                                     min_num_points, id_offset, label2cat,
+                                     ignore_index, logger)
+    ret_dict = panoptic_seg_eval.evaluate(gt_labels, seg_preds)
+    return ret_dict
--- a/mmdet3d/evaluation/metrics/__init__.py
+++ b/mmdet3d/evaluation/metrics/__init__.py
@@ -4,10 +4,11 @@ from .instance_seg_metric import InstanceSegMetric  # noqa: F401,F403
 from .kitti_metric import KittiMetric  # noqa: F401,F403
 from .lyft_metric import LyftMetric  # noqa: F401,F403
 from .nuscenes_metric import NuScenesMetric  # noqa: F401,F403
+from .panoptic_seg_metric import PanopticSegMetric  # noqa: F401,F403
 from .seg_metric import SegMetric  # noqa: F401,F403
 from .waymo_metric import WaymoMetric  # noqa: F401,F403

 __all__ = [
    'KittiMetric', 'NuScenesMetric', 'IndoorMetric', 'LyftMetric', 'SegMetric',
-    'InstanceSegMetric', 'WaymoMetric'
+    'InstanceSegMetric', 'WaymoMetric', 'PanopticSegMetric'
 ]
--- a/mmdet3d/evaluation/metrics/panoptic_seg_metric.py
+++ b/mmdet3d/evaluation/metrics/panoptic_seg_metric.py
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from typing import Dict, List, Optional
+
+from mmengine.logging import MMLogger
+
+from mmdet3d.evaluation import panoptic_seg_eval
+from mmdet3d.registry import METRICS
+from .seg_metric import SegMetric
+
+
+@METRICS.register_module()
+class PanopticSegMetric(SegMetric):
+    """3D Panoptic segmentation evaluation metric.
+
+    Args:
+        thing_class_inds (list[int]): Indices of thing classes.
+        stuff_class_inds (list[int]): Indices of stuff classes.
+        min_num_points (int): Minimum number of points of an object to be
+            counted as ground truth in evaluation.
+        id_offset (int): Offset for instance ids to concat with
+            semantic labels.
+        collect_device (str, optional): Device name used for collecting
+            results from different ranks during distributed training.
+            Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
+        prefix (str, optional): The prefix that will be added in the metric
+            names to disambiguate homonymous metrics of different evaluators.
+            If prefix is not provided in the argument, self.default_prefix
+            will be used instead. Default to None.
+        pklfile_prefix (str, optional): The prefix of pkl files, including
+            the file path and the prefix of filename, e.g., "a/b/prefix".
+            If not specified, a temp file will be created. Default to None.
+        submission_prefix (str, optional): The prefix of submission data.
+            If not specified, the submission data will not be generated.
+            Default to None.
+    """
+
+    def __init__(self,
+                 thing_class_inds: List[int],
+                 stuff_class_inds: List[int],
+                 min_num_points: int,
+                 id_offset: int,
+                 collect_device: str = 'cpu',
+                 prefix: Optional[str] = None,
+                 pklfile_prefix: str = None,
+                 submission_prefix: str = None,
+                 **kwargs):
+        self.thing_class_inds = thing_class_inds
+        self.stuff_class_inds = stuff_class_inds
+        self.min_num_points = min_num_points
+        self.id_offset = id_offset
+
+        super(PanopticSegMetric, self).__init__(
+            pklfile_prefix=pklfile_prefix,
+            submission_prefix=submission_prefix,
+            prefix=prefix,
+            collect_device=collect_device,
+            **kwargs)
+
+    # TODO modify format_result for panoptic segmentation evaluation, \
+    # different datasets have different needs.
+
+    def compute_metrics(self, results: list) -> Dict[str, float]:
+        """Compute the metrics from processed results.
+
+        Args:
+            results (list): The processed results of each batch.
+
+        Returns:
+            Dict[str, float]: The computed metrics. The keys are the names of
+            the metrics, and the values are corresponding results.
+        """
+        logger: MMLogger = MMLogger.get_current_instance()
+
+        if self.submission_prefix:
+            self.format_results(results)
+            return None
+
+        label2cat = self.dataset_meta['label2cat']
+        ignore_index = self.dataset_meta['ignore_index']
+        classes = self.dataset_meta['classes']
+        thing_classes = [classes[i] for i in self.thing_class_inds]
+        stuff_classes = [classes[i] for i in self.stuff_class_inds]
+
+        gt_labels = []
+        seg_preds = []
+        for eval_ann, sinlge_pred_results in results:
+            gt_labels.append(eval_ann)
+            seg_preds.append(sinlge_pred_results)
+
+        ret_dict = panoptic_seg_eval(gt_labels, seg_preds, classes,
+                                     thing_classes, stuff_classes,
+                                     self.min_num_points, self.id_offset,
+                                     label2cat, [ignore_index], logger)
+
+        return ret_dict
--- a/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py
+++ b/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import pytest
+import torch
+
+from mmdet3d.evaluation.functional.panoptic_seg_eval import panoptic_seg_eval
+
+
+def test_panoptic_seg_eval():
+    if not torch.cuda.is_available():
+        pytest.skip()
+
+    classes = ['unlabeled', 'person', 'dog', 'grass', 'sky']
+    label2cat = {
+        0: 'unlabeled',
+        1: 'person',
+        2: 'dog',
+        3: 'grass',
+        4: 'sky',
+    }
+
+    thing_classes = ['person', 'dog']
+    stuff_classes = ['grass', 'sky']
+    ignore_index = [0]  # only ignore ignore class
+    min_points = 1  # for this example we care about all points
+    offset = 2**16
+
+    # generate ground truth and prediction
+    semantic_preds = []
+    instance_preds = []
+    gt_semantic = []
+    gt_instance = []
+
+    # some ignore stuff
+    num_ignore = 50
+    semantic_preds.extend([0 for i in range(num_ignore)])
+    instance_preds.extend([0 for i in range(num_ignore)])
+    gt_semantic.extend([0 for i in range(num_ignore)])
+    gt_instance.extend([0 for i in range(num_ignore)])
+
+    # grass segment
+    num_grass = 50
+    num_grass_pred = 40  # rest is sky
+    semantic_preds.extend([1 for i in range(num_grass_pred)])  # grass
+    semantic_preds.extend([2
+                           for i in range(num_grass - num_grass_pred)])  # sky
+    instance_preds.extend([0 for i in range(num_grass)])
+    gt_semantic.extend([1 for i in range(num_grass)])  # grass
+    gt_instance.extend([0 for i in range(num_grass)])
+
+    # sky segment
+    num_sky = 50
+    num_sky_pred = 40  # rest is grass
+    semantic_preds.extend([2 for i in range(num_sky_pred)])  # sky
+    semantic_preds.extend([1 for i in range(num_sky - num_sky_pred)])  # grass
+    instance_preds.extend([0 for i in range(num_sky)])  # first instance
+    gt_semantic.extend([2 for i in range(num_sky)])  # sky
+    gt_instance.extend([0 for i in range(num_sky)])  # first instance
+
+    # wrong dog as person prediction
+    num_dog = 50
+    num_person = num_dog
+    semantic_preds.extend([3 for i in range(num_person)])
+    instance_preds.extend([35 for i in range(num_person)])
+    gt_semantic.extend([4 for i in range(num_dog)])
+    gt_instance.extend([22 for i in range(num_dog)])
+
+    # two persons in prediction, but three in gt
+    num_person = 50
+    semantic_preds.extend([3 for i in range(6 * num_person)])
+    instance_preds.extend([8 for i in range(4 * num_person)])
+    instance_preds.extend([95 for i in range(2 * num_person)])
+    gt_semantic.extend([3 for i in range(6 * num_person)])
+    gt_instance.extend([33 for i in range(3 * num_person)])
+    gt_instance.extend([42 for i in range(num_person)])
+    gt_instance.extend([11 for i in range(2 * num_person)])
+
+    # gt and pred to numpy
+    semantic_preds = np.array(semantic_preds, dtype=int).reshape(1, -1)
+    instance_preds = np.array(instance_preds, dtype=int).reshape(1, -1)
+    gt_semantic = np.array(gt_semantic, dtype=int).reshape(1, -1)
+    gt_instance = np.array(gt_instance, dtype=int).reshape(1, -1)
+
+    gt_labels = [{
+        'pts_semantic_mask': gt_semantic,
+        'pts_instance_mask': gt_instance
+    }]
+
+    seg_preds = [{
+        'pts_semantic_mask': semantic_preds,
+        'pts_instance_mask': instance_preds
+    }]
+
+    ret_value = panoptic_seg_eval(gt_labels, seg_preds, classes, thing_classes,
+                                  stuff_classes, min_points, offset, label2cat,
+                                  ignore_index)
+
+    assert np.isclose(ret_value['pq'], 0.47916666666666663)
+    assert np.isclose(ret_value['rq_mean'], 0.6666666666666666)
+    assert np.isclose(ret_value['sq_mean'], 0.5520833333333333)
+    assert np.isclose(ret_value['miou'], 0.5476190476190476)
--- a/tests/test_evaluation/test_metrics/test_panoptic_seg_metric.py
+++ b/tests/test_evaluation/test_metrics/test_panoptic_seg_metric.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import unittest
+
+import numpy as np
+import torch
+from mmengine.structures import BaseDataElement
+
+from mmdet3d.evaluation.metrics import PanopticSegMetric
+from mmdet3d.structures import Det3DDataSample, PointData
+
+
+class TestPanopticSegMetric(unittest.TestCase):
+
+    def _demo_mm_model_output(self):
+        """Create a superset of inputs needed to run test or train batches."""
+        # generate ground truth and prediction
+        semantic_preds = []
+        instance_preds = []
+        gt_semantic = []
+        gt_instance = []
+
+        # some ignore stuff
+        num_ignore = 50
+        semantic_preds.extend([0 for i in range(num_ignore)])
+        instance_preds.extend([0 for i in range(num_ignore)])
+        gt_semantic.extend([0 for i in range(num_ignore)])
+        gt_instance.extend([0 for i in range(num_ignore)])
+
+        # grass segment
+        num_grass = 50
+        num_grass_pred = 40  # rest is sky
+        semantic_preds.extend([1 for i in range(num_grass_pred)])  # grass
+        semantic_preds.extend([2 for i in range(num_grass - num_grass_pred)
+                               ])  # sky
+        instance_preds.extend([0 for i in range(num_grass)])
+        gt_semantic.extend([1 for i in range(num_grass)])  # grass
+        gt_instance.extend([0 for i in range(num_grass)])
+
+        # sky segment
+        num_sky = 50
+        num_sky_pred = 40  # rest is grass
+        semantic_preds.extend([2 for i in range(num_sky_pred)])  # sky
+        semantic_preds.extend([1 for i in range(num_sky - num_sky_pred)
+                               ])  # grass
+        instance_preds.extend([0 for i in range(num_sky)])  # first instance
+        gt_semantic.extend([2 for i in range(num_sky)])  # sky
+        gt_instance.extend([0 for i in range(num_sky)])  # first instance
+
+        # wrong dog as person prediction
+        num_dog = 50
+        num_person = num_dog
+        semantic_preds.extend([3 for i in range(num_person)])
+        instance_preds.extend([35 for i in range(num_person)])
+        gt_semantic.extend([4 for i in range(num_dog)])
+        gt_instance.extend([22 for i in range(num_dog)])
+
+        # two persons in prediction, but three in gt
+        num_person = 50
+        semantic_preds.extend([3 for i in range(6 * num_person)])
+        instance_preds.extend([8 for i in range(4 * num_person)])
+        instance_preds.extend([95 for i in range(2 * num_person)])
+        gt_semantic.extend([3 for i in range(6 * num_person)])
+        gt_instance.extend([33 for i in range(3 * num_person)])
+        gt_instance.extend([42 for i in range(num_person)])
+        gt_instance.extend([11 for i in range(2 * num_person)])
+
+        # gt and pred to numpy
+        semantic_preds = np.array(semantic_preds, dtype=int).reshape(1, -1)
+        instance_preds = np.array(instance_preds, dtype=int).reshape(1, -1)
+        gt_semantic = np.array(gt_semantic, dtype=int).reshape(1, -1)
+        gt_instance = np.array(gt_instance, dtype=int).reshape(1, -1)
+
+        pred_pts_semantic_mask = torch.Tensor(semantic_preds)
+        pred_pts_instance_mask = torch.Tensor(instance_preds)
+        pred_pts_seg_data = dict(
+            pts_semantic_mask=pred_pts_semantic_mask,
+            pts_instance_mask=pred_pts_instance_mask)
+        data_sample = Det3DDataSample()
+        data_sample.pred_pts_seg = PointData(**pred_pts_seg_data)
+
+        ann_info_data = dict(
+            pts_semantic_mask=gt_semantic, pts_instance_mask=gt_instance)
+        data_sample.eval_ann_info = ann_info_data
+
+        batch_data_samples = [data_sample]
+
+        predictions = []
+        for pred in batch_data_samples:
+            if isinstance(pred, BaseDataElement):
+                pred = pred.to_dict()
+            predictions.append(pred)
+
+        return predictions
+
+    def test_evaluate(self):
+        data_batch = {}
+        predictions = self._demo_mm_model_output()
+
+        classes = ['unlabeled', 'person', 'dog', 'grass', 'sky']
+        label2cat = {
+            0: 'unlabeled',
+            1: 'person',
+            2: 'dog',
+            3: 'grass',
+            4: 'sky',
+        }
+
+        ignore_index = [0]  # only ignore ignore class
+        min_num_points = 1  # for this example we care about all points
+        id_offset = 2**16
+
+        dataset_meta = dict(
+            label2cat=label2cat, ignore_index=ignore_index, classes=classes)
+        panoptic_seg_metric = PanopticSegMetric(
+            thing_class_inds=[0, 1],
+            stuff_class_inds=[2, 3],
+            min_num_points=min_num_points,
+            id_offset=id_offset,
+        )
+        panoptic_seg_metric.dataset_meta = dataset_meta
+        panoptic_seg_metric.process(data_batch, predictions)
+        res = panoptic_seg_metric.evaluate(1)
+        self.assertIsInstance(res, dict)