target_transforms.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np


class PanopticTargetGenerator(object):
    """
    Generates panoptic training target for Panoptic-DeepLab.
    Annotation is assumed to have Cityscapes format.

    Args:
        ignore_index (int): The ignore label for semantic segmentation.
        rgb2id (Function): Function, panoptic label is encoded in a colored image, this function convert color to the
            corresponding panoptic label.
        thing_list (list): A list of thing classes
        sigma (int, optional): The sigma for Gaussian kernel. Default: 8.
        ignore_stuff_in_offset (bool, optional): Whether to ignore stuff region when training the offset branch. Default: False.
        small_instance_area (int, optional): Indicates largest area for small instances. Default: 0.
        small_instance_weight (int, optional): Indicates semantic loss weights for small instances. Default: 1.
        ignore_crowd_in_semantic (bool, optional): Whether to ignore crowd region in semantic segmentation branch,
            crowd region is ignored in the original TensorFlow implementation. Default: False.
    """

    def __init__(self,
                 ignore_index,
                 rgb2id,
                 thing_list,
                 sigma=8,
                 ignore_stuff_in_offset=False,
                 small_instance_area=0,
                 small_instance_weight=1,
                 ignore_crowd_in_semantic=False):
        self.ignore_index = ignore_index
        self.rgb2id = rgb2id
        self.thing_list = thing_list
        self.ignore_stuff_in_offset = ignore_stuff_in_offset
        self.small_instance_area = small_instance_area
        self.small_instance_weight = small_instance_weight
        self.ignore_crowd_in_semantic = ignore_crowd_in_semantic

        self.sigma = sigma
        size = 6 * sigma + 3
        x = np.arange(0, size, 1, float)
        y = x[:, np.newaxis]
        x0, y0 = 3 * sigma + 1, 3 * sigma + 1
        self.g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))

    def __call__(self, panoptic, segments):
        """Generates the training target.
        reference: https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/createPanopticImgs.py
        reference: https://github.com/facebookresearch/detectron2/blob/master/datasets/prepare_panoptic_fpn.py#L18

        Args:
            panoptic (np.ndarray): Colored image encoding panoptic label.
            segments (list): A list of dictionary containing information of every segment, it has fields:
                - id: panoptic id, after decoding `panoptic`.
                - category_id: semantic class id.
                - area: segment area.
                - bbox: segment bounding box.
                - iscrowd: crowd region.

        Returns:
            A dictionary with fields:
                - semantic: Tensor, semantic label, shape=(H, W).
                - foreground: Tensor, foreground mask label, shape=(H, W).
                - center: Tensor, center heatmap, shape=(1, H, W).
                - center_points: List, center coordinates, with tuple (y-coord, x-coord).
                - offset: Tensor, offset, shape=(2, H, W), first dim is (offset_y, offset_x).
                - semantic_weights: Tensor, loss weight for semantic prediction, shape=(H, W).
                - center_weights: Tensor, ignore region of center prediction, shape=(H, W), used as weights for center
                    regression 0 is ignore, 1 is has instance. Multiply this mask to loss.
                - offset_weights: Tensor, ignore region of offset prediction, shape=(H, W), used as weights for offset
                    regression 0 is ignore, 1 is has instance. Multiply this mask to loss.
        """
        panoptic = self.rgb2id(panoptic)
        height, width = panoptic.shape[0], panoptic.shape[1]
        semantic = np.zeros_like(panoptic, dtype=np.uint8) + self.ignore_index
        foreground = np.zeros_like(panoptic, dtype=np.uint8)
        center = np.zeros((1, height, width), dtype=np.float32)
        center_pts = []
        offset = np.zeros((2, height, width), dtype=np.float32)
        y_coord = np.ones_like(panoptic, dtype=np.float32)
        x_coord = np.ones_like(panoptic, dtype=np.float32)
        y_coord = np.cumsum(y_coord, axis=0) - 1
        x_coord = np.cumsum(x_coord, axis=1) - 1
        # Generate pixel-wise loss weights
        semantic_weights = np.ones_like(panoptic, dtype=np.uint8)
        # 0: ignore, 1: has instance
        # three conditions for a region to be ignored for instance branches:
        # (1) It is labeled as `ignore_index`
        # (2) It is crowd region (iscrowd=1)
        # (3) (Optional) It is stuff region (for offset branch)
        center_weights = np.zeros_like(panoptic, dtype=np.uint8)
        offset_weights = np.zeros_like(panoptic, dtype=np.uint8)
        for seg in segments:
            cat_id = seg["category_id"]
            if self.ignore_crowd_in_semantic:
                if not seg['iscrowd']:
                    semantic[panoptic == seg["id"]] = cat_id
            else:
                semantic[panoptic == seg["id"]] = cat_id
            if cat_id in self.thing_list:
                foreground[panoptic == seg["id"]] = 1
            if not seg['iscrowd']:
                # Ignored regions are not in `segments`.
                # Handle crowd region.
                center_weights[panoptic == seg["id"]] = 1
                if self.ignore_stuff_in_offset:
                    # Handle stuff region.
                    if cat_id in self.thing_list:
                        offset_weights[panoptic == seg["id"]] = 1
                else:
                    offset_weights[panoptic == seg["id"]] = 1
            if cat_id in self.thing_list:
                # find instance center
                mask_index = np.where(panoptic == seg["id"])
                if len(mask_index[0]) == 0:
                    # the instance is completely cropped
                    continue

                # Find instance area
                ins_area = len(mask_index[0])
                if ins_area < self.small_instance_area:
                    semantic_weights[panoptic ==
                                     seg["id"]] = self.small_instance_weight

                center_y, center_x = np.mean(mask_index[0]), np.mean(mask_index[
                    1])
                center_pts.append([center_y, center_x])

                # generate center heatmap
                y, x = int(center_y), int(center_x)
                # outside image boundary
                if x < 0 or y < 0 or \
                        x >= width or y >= height:
                    continue
                sigma = self.sigma
                # upper left
                ul = int(np.round(x - 3 * sigma - 1)), int(
                    np.round(y - 3 * sigma - 1))
                # bottom right
                br = int(np.round(x + 3 * sigma + 2)), int(
                    np.round(y + 3 * sigma + 2))

                c, d = max(0, -ul[0]), min(br[0], width) - ul[0]
                a, b = max(0, -ul[1]), min(br[1], height) - ul[1]

                cc, dd = max(0, ul[0]), min(br[0], width)
                aa, bb = max(0, ul[1]), min(br[1], height)
                center[0, aa:bb, cc:dd] = np.maximum(center[0, aa:bb, cc:dd],
                                                     self.g[a:b, c:d])

                # generate offset (2, h, w) -> (y-dir, x-dir)
                offset_y_index = (np.zeros_like(mask_index[0]), mask_index[0],
                                  mask_index[1])
                offset_x_index = (np.ones_like(mask_index[0]), mask_index[0],
                                  mask_index[1])
                offset[offset_y_index] = center_y - y_coord[mask_index]
                offset[offset_x_index] = center_x - x_coord[mask_index]

        return dict(
            semantic=semantic.astype('long'),
            foreground=foreground.astype('long'),
            center=center.astype(np.float32),
            center_points=center_pts,
            offset=offset.astype(np.float32),
            semantic_weights=semantic_weights.astype(np.float32),
            center_weights=center_weights.astype(np.float32),
            offset_weights=offset_weights.astype(np.float32))


class SemanticTargetGenerator(object):
    """
    Generates semantic training target only for Panoptic-DeepLab (no instance).
    Annotation is assumed to have Cityscapes format.

    Args:
        ignore_index (int): The ignore label for semantic segmentation.
        rgb2id (function): Function, panoptic label is encoded in a colored image, this function convert color to the
            corresponding panoptic label.
    """

    def __init__(self, ignore_index, rgb2id):
        self.ignore_index = ignore_index
        self.rgb2id = rgb2id

    def __call__(self, panoptic, segments):
        """Generates the training target.
        reference: https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/createPanopticImgs.py
        reference: https://github.com/facebookresearch/detectron2/blob/master/datasets/prepare_panoptic_fpn.py#L18

        Args:
            panoptic (np.ndarray): Colored image encoding panoptic label.
            segments (list): A list of dictionary containing information of every segment, it has fields:
                - id: panoptic id, after decoding `panoptic`.
                - category_id: semantic class id.
                - area: segment area.
                - bbox: segment bounding box.
                - iscrowd: crowd region.

        Returns:
            A dictionary with fields:
                - semantic: Tensor, semantic label, shape=(H, W).
        """
        panoptic = self.rgb2id(panoptic)
        semantic = np.zeros_like(panoptic, dtype=np.uint8) + self.ignore_index
        for seg in segments:
            cat_id = seg["category_id"]
            semantic[panoptic == seg["id"]] = cat_id

        return dict(semantic=semantic.astype('long'))


class InstanceTargetGenerator(object):
    """
    Generates instance target only for Panoptic-DeepLab.
    Annotation is assumed to have Cityscapes format.

    Args:
        rgb2id (function): Function, panoptic label is encoded in a colored image, this function convert color to the
            corresponding panoptic label.
    """

    def __init__(self, rgb2id):
        self.rgb2id = rgb2id

    def __call__(self, panoptic):
        """Generates the instance target.

        Args:
            panoptic (np.ndarray): Colored image encoding panoptic label.

        Returns:
            A dictionary with fields:
                - instance: Tensor, shape=(H, W). 0 is background. 1, 2, 3 ... is instance, so it is class agnostic.
        """
        panoptic = self.rgb2id(panoptic)
        instance = np.zeros_like(panoptic, dtype=np.int64)
        ids = np.unique(panoptic)
        ins_id = 1
        for i, id in enumerate(ids):
            if id > 1000:
                instance[panoptic == id] = ins_id
                ins_id += 1

        return dict(instance=instance)


class RawPanopticTargetGenerator(object):
    """
    Generator the panoptc ground truth for evaluation, where values are 0,1,2,3,...
        11000, 11001, ..., 18000, 18001, ignore_index(general 255).

    Args:
        ignore_index (int): The ignore label for semantic segmentation.
        rgb2id (function): Function, panoptic label is encoded in a colored image, this function convert color to the
            corresponding panoptic label.
        label_divisor(int, optional): An Integer, used to convert panoptic id = semantic id * label_divisor + instance_id. Default: 1000.
    """

    def __init__(self, ignore_index, rgb2id, label_divisor=1000):
        self.ingore_index = ignore_index
        self.rgb2id = rgb2id
        self.label_divisor = label_divisor

    def __call__(self, panoptic, segments):
        """
        Generates the raw panoptic target

        Args:
            panoptic (numpy.array): colored image encoding panoptic label.
            segments (list): A list of dictionary containing information of every segment, it has fields:
                - id: panoptic id, after decoding `panoptic`.
                - category_id: semantic class id.
                - area: segment area.
                - bbox: segment bounding box.
                - iscrowd: crowd region.

        Returns:
            A dictionary with fields:
                - panoptic: Tensor, panoptic label, shape=(H, W).
        """
        panoptic = self.rgb2id(panoptic)
        raw_panoptic = np.zeros_like(panoptic) + self.ingore_index
        for seg in segments:
            cat_id = seg['category_id']
            # if seg['iscrowd'] == 1:
            #     continue
            if seg['id'] < 1000:
                raw_panoptic[panoptic == seg['id']] = cat_id
            else:
                ins_id = seg['id'] % self.label_divisor
                raw_panoptic[panoptic ==
                             seg['id']] = cat_id * self.label_divisor + ins_id
        return dict(panoptic=raw_panoptic.astype('long'))