pgd_bbox_coder.py

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from torch.nn import functional as F

from mmdet3d.registry import TASK_UTILS
from .fcos3d_bbox_coder import FCOS3DBBoxCoder


@TASK_UTILS.register_module()
class PGDBBoxCoder(FCOS3DBBoxCoder):
    """Bounding box coder for PGD."""

    def encode(self, gt_bboxes_3d, gt_labels_3d, gt_bboxes, gt_labels):
        # TODO: refactor the encoder codes in the FCOS3D and PGD head
        pass

    def decode_2d(self,
                  bbox,
                  scale,
                  stride,
                  max_regress_range,
                  training,
                  pred_keypoints=False,
                  pred_bbox2d=True):
        """Decode regressed 2D attributes.

        Args:
            bbox (torch.Tensor): Raw bounding box predictions in shape
                [N, C, H, W].
            scale (tuple[`Scale`]): Learnable scale parameters.
            stride (int): Stride for a specific feature level.
            max_regress_range (int): Maximum regression range for a specific
                feature level.
            training (bool): Whether the decoding is in the training
                procedure.
            pred_keypoints (bool, optional): Whether to predict keypoints.
                Defaults to False.
            pred_bbox2d (bool, optional): Whether to predict 2D bounding
                boxes. Defaults to False.

        Returns:
            torch.Tensor: Decoded boxes.
        """
        clone_bbox = bbox.clone()
        if pred_keypoints:
            scale_kpts = scale[3]
            # 2 dimension of offsets x 8 corners of a 3D bbox
            bbox[:, self.bbox_code_size:self.bbox_code_size + 16] = \
                torch.tanh(scale_kpts(clone_bbox[
                    :, self.bbox_code_size:self.bbox_code_size + 16]).float())

        if pred_bbox2d:
            scale_bbox2d = scale[-1]
            # The last four dimensions are offsets to four sides of a 2D bbox
            bbox[:, -4:] = scale_bbox2d(clone_bbox[:, -4:]).float()

        if self.norm_on_bbox:
            if pred_bbox2d:
                bbox[:, -4:] = F.relu(bbox.clone()[:, -4:])
            if not training:
                if pred_keypoints:
                    bbox[
                        :, self.bbox_code_size:self.bbox_code_size + 16] *= \
                           max_regress_range
                if pred_bbox2d:
                    bbox[:, -4:] *= stride
        else:
            if pred_bbox2d:
                bbox[:, -4:] = bbox.clone()[:, -4:].exp()
        return bbox

    def decode_prob_depth(self, depth_cls_preds, depth_range, depth_unit,
                          division, num_depth_cls):
        """Decode probabilistic depth map.

        Args:
            depth_cls_preds (torch.Tensor): Depth probabilistic map in shape
                [..., self.num_depth_cls] (raw output before softmax).
            depth_range (tuple[float]): Range of depth estimation.
            depth_unit (int): Unit of depth range division.
            division (str): Depth division method. Options include 'uniform',
                'linear', 'log', 'loguniform'.
            num_depth_cls (int): Number of depth classes.

        Returns:
            torch.Tensor: Decoded probabilistic depth estimation.
        """
        if division == 'uniform':
            depth_multiplier = depth_unit * \
                depth_cls_preds.new_tensor(
                    list(range(num_depth_cls))).reshape([1, -1])
            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *
                                depth_multiplier).sum(dim=-1)
            return prob_depth_preds
        elif division == 'linear':
            split_pts = depth_cls_preds.new_tensor(list(
                range(num_depth_cls))).reshape([1, -1])
            depth_multiplier = depth_range[0] + (
                depth_range[1] - depth_range[0]) / \
                (num_depth_cls * (num_depth_cls - 1)) * \
                (split_pts * (split_pts+1))
            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *
                                depth_multiplier).sum(dim=-1)
            return prob_depth_preds
        elif division == 'log':
            split_pts = depth_cls_preds.new_tensor(list(
                range(num_depth_cls))).reshape([1, -1])
            start = max(depth_range[0], 1)
            end = depth_range[1]
            depth_multiplier = (np.log(start) +
                                split_pts * np.log(end / start) /
                                (num_depth_cls - 1)).exp()
            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *
                                depth_multiplier).sum(dim=-1)
            return prob_depth_preds
        elif division == 'loguniform':
            split_pts = depth_cls_preds.new_tensor(list(
                range(num_depth_cls))).reshape([1, -1])
            start = max(depth_range[0], 1)
            end = depth_range[1]
            log_multiplier = np.log(start) + \
                split_pts * np.log(end / start) / (num_depth_cls - 1)
            prob_depth_preds = (F.softmax(depth_cls_preds.clone(), dim=-1) *
                                log_multiplier).sum(dim=-1).exp()
            return prob_depth_preds
        else:
            raise NotImplementedError