Commit 52902342 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #10537 from srihari-humbarwadi:panoptic-deeplab

PiperOrigin-RevId: 452568716
parents 0290848d 1f765c55
...@@ -83,6 +83,12 @@ ResNet-50 | 3x | `panoptic_fpn_coco` | 40.64 | 36.29 ...@@ -83,6 +83,12 @@ ResNet-50 | 3x | `panoptic_fpn_coco` | 40.64 | 36.29
**Note**: Here 1x schedule refers to ~12 epochs **Note**: Here 1x schedule refers to ~12 epochs
### Panoptic Deeplab
Backbone | Experiment name | Overall PQ | Things PQ | Stuff PQ | Checkpoints
:---------------------| :-------------------------------| ---------- | --------- | -------- | ------------:
Dilated ResNet-50 | `panoptic_deeplab_resnet_coco` | 36.80 | 37.51 | 35.73 | [ckpt](gs://tf_model_garden/vision/panoptic/panoptic_deeplab/coco/resnet50)
Dilated ResNet-101 | `panoptic_deeplab_resnet_coco` | 38.39 | 39.47 | 36.75 | [ckpt](gs://tf_model_garden/vision/panoptic/panoptic_deeplab/coco/resnet101)
___ ___
## Citation ## Citation
``` ```
...@@ -94,4 +100,12 @@ ___ ...@@ -94,4 +100,12 @@ ___
archivePrefix={arXiv}, archivePrefix={arXiv},
primaryClass={cs.CV} primaryClass={cs.CV}
} }
@article{Cheng2020PanopticDeepLabAS,
title={Panoptic-DeepLab: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation},
author={Bowen Cheng and Maxwell D. Collins and Yukun Zhu and Ting Liu and Thomas S. Huang and Hartwig Adam and Liang-Chieh Chen},
journal={2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2020},
pages={12472-12482}
}
``` ```
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic Deeplab configuration definition."""
import dataclasses
import os
from typing import List, Optional, Union
import numpy as np
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.vision.configs import common
from official.vision.configs import decoders
from official.vision.configs.google import backbones
_COCO_INPUT_PATH_BASE = 'coco/tfrecords'
_COCO_TRAIN_EXAMPLES = 118287
_COCO_VAL_EXAMPLES = 5000
@dataclasses.dataclass
class Parser(hyperparams.Config):
"""Panoptic deeplab parser."""
ignore_label: int = 0
# If resize_eval_groundtruth is set to False, original image sizes are used
# for eval. In that case, groundtruth_padded_size has to be specified too to
# allow for batching the variable input sizes of images.
resize_eval_groundtruth: bool = True
groundtruth_padded_size: List[int] = dataclasses.field(default_factory=list)
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_rand_hflip: bool = True
aug_type: common.Augmentation = common.Augmentation()
sigma: float = 8.0
small_instance_area_threshold: int = 4096
small_instance_weight: float = 3.0
dtype = 'float32'
@dataclasses.dataclass
class TfExampleDecoder(common.TfExampleDecoder):
"""A simple TF Example decoder config."""
panoptic_category_mask_key: str = 'image/panoptic/category_mask'
panoptic_instance_mask_key: str = 'image/panoptic/instance_mask'
@dataclasses.dataclass
class DataDecoder(common.DataDecoder):
"""Data decoder config."""
simple_decoder: TfExampleDecoder = TfExampleDecoder()
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
decoder: DataDecoder = DataDecoder()
parser: Parser = Parser()
input_path: str = ''
drop_remainder: bool = True
file_type: str = 'tfrecord'
is_training: bool = True
global_batch_size: int = 1
@dataclasses.dataclass
class PanopticDeeplabHead(hyperparams.Config):
"""Panoptic Deeplab head config."""
level: int = 3
num_convs: int = 2
num_filters: int = 256
kernel_size: int = 5
use_depthwise_convolution: bool = False
upsample_factor: int = 1
low_level: List[int] = dataclasses.field(default_factory=lambda: [3, 2])
low_level_num_filters: List[int] = dataclasses.field(
default_factory=lambda: [64, 32])
fusion_num_output_filters: int = 256
@dataclasses.dataclass
class SemanticHead(PanopticDeeplabHead):
"""Semantic head config."""
prediction_kernel_size: int = 1
@dataclasses.dataclass
class InstanceHead(PanopticDeeplabHead):
"""Instance head config."""
prediction_kernel_size: int = 1
@dataclasses.dataclass
class PanopticDeeplabPostProcessor(hyperparams.Config):
"""Panoptic Deeplab PostProcessing config."""
output_size: List[int] = dataclasses.field(
default_factory=list)
center_score_threshold: float = 0.1
thing_class_ids: List[int] = dataclasses.field(default_factory=list)
label_divisor: int = 256 * 256 * 256
stuff_area_limit: int = 4096
ignore_label: int = 0
nms_kernel: int = 7
keep_k_centers: int = 200
rescale_predictions: bool = True
@dataclasses.dataclass
class PanopticDeeplab(hyperparams.Config):
"""Panoptic Deeplab model config."""
num_classes: int = 2
input_size: List[int] = dataclasses.field(default_factory=list)
min_level: int = 3
max_level: int = 6
norm_activation: common.NormActivation = common.NormActivation()
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
decoder: decoders.Decoder = decoders.Decoder(type='aspp')
semantic_head: SemanticHead = SemanticHead()
instance_head: InstanceHead = InstanceHead()
shared_decoder: bool = False
generate_panoptic_masks: bool = True
post_processor: PanopticDeeplabPostProcessor = PanopticDeeplabPostProcessor()
@dataclasses.dataclass
class Losses(hyperparams.Config):
label_smoothing: float = 0.0
ignore_label: int = 0
class_weights: List[float] = dataclasses.field(default_factory=list)
l2_weight_decay: float = 1e-4
top_k_percent_pixels: float = 0.15
segmentation_loss_weight: float = 1.0
center_heatmap_loss_weight: float = 200
center_offset_loss_weight: float = 0.01
@dataclasses.dataclass
class Evaluation(hyperparams.Config):
"""Evaluation config."""
ignored_label: int = 0
max_instances_per_category: int = 256
offset: int = 256 * 256 * 256
is_thing: List[float] = dataclasses.field(
default_factory=list)
rescale_predictions: bool = True
report_per_class_pq: bool = False
report_per_class_iou: bool = False
report_train_mean_iou: bool = True # Turning this off can speed up training.
@dataclasses.dataclass
class PanopticDeeplabTask(cfg.TaskConfig):
"""Panoptic deeplab task config."""
model: PanopticDeeplab = PanopticDeeplab()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(
is_training=False,
drop_remainder=False)
losses: Losses = Losses()
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
evaluation: Evaluation = Evaluation()
@exp_factory.register_config_factory('panoptic_deeplab_resnet_coco')
def panoptic_deeplab_coco() -> cfg.ExperimentConfig:
"""COCO panoptic segmentation with Panoptic Deeplab."""
train_steps = 200000
train_batch_size = 64
eval_batch_size = 1
steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size
validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size
num_panoptic_categories = 201
num_thing_categories = 91
ignore_label = 0
is_thing = [False]
for idx in range(1, num_panoptic_categories):
is_thing.append(True if idx <= num_thing_categories else False)
input_size = [640, 640, 3]
output_stride = 16
aspp_dilation_rates = [6, 12, 18]
multigrid = [1, 2, 4]
stem_type = 'v1'
level = int(np.math.log2(output_stride))
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(
mixed_precision_dtype='bfloat16', enable_xla=True),
task=PanopticDeeplabTask(
init_checkpoint='gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800', # pylint: disable=line-too-long
init_checkpoint_modules=['backbone'],
model=PanopticDeeplab(
num_classes=num_panoptic_categories,
input_size=input_size,
backbone=backbones.Backbone(
type='dilated_resnet', dilated_resnet=backbones.DilatedResNet(
model_id=50,
stem_type=stem_type,
output_stride=output_stride,
multigrid=multigrid,
se_ratio=0.25,
last_stage_repeats=1,
stochastic_depth_drop_rate=0.2)),
decoder=decoders.Decoder(
type='aspp',
aspp=decoders.ASPP(
level=level,
num_filters=256,
pool_kernel_size=input_size[:2],
dilation_rates=aspp_dilation_rates,
use_depthwise_convolution=True,
dropout_rate=0.1)),
semantic_head=SemanticHead(
level=level,
num_convs=1,
num_filters=256,
kernel_size=5,
use_depthwise_convolution=True,
upsample_factor=1,
low_level=[3, 2],
low_level_num_filters=[64, 32],
fusion_num_output_filters=256,
prediction_kernel_size=1),
instance_head=InstanceHead(
level=level,
num_convs=1,
num_filters=32,
kernel_size=5,
use_depthwise_convolution=True,
upsample_factor=1,
low_level=[3, 2],
low_level_num_filters=[32, 16],
fusion_num_output_filters=128,
prediction_kernel_size=1),
shared_decoder=False,
generate_panoptic_masks=True,
post_processor=PanopticDeeplabPostProcessor(
output_size=input_size[:2],
center_score_threshold=0.1,
thing_class_ids=list(range(1, num_thing_categories)),
label_divisor=256,
stuff_area_limit=4096,
ignore_label=ignore_label,
nms_kernel=41,
keep_k_centers=200,
rescale_predictions=True)),
losses=Losses(
label_smoothing=0.0,
ignore_label=ignore_label,
l2_weight_decay=0.0,
top_k_percent_pixels=0.2,
segmentation_loss_weight=1.0,
center_heatmap_loss_weight=200,
center_offset_loss_weight=0.01),
train_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(
aug_scale_min=0.5,
aug_scale_max=1.5,
aug_rand_hflip=True,
aug_type=common.Augmentation(
type='autoaug',
autoaug=common.AutoAugment(
augmentation_name='panoptic_deeplab_policy')),
sigma=8.0,
small_instance_area_threshold=4096,
small_instance_weight=3.0)),
validation_data=DataConfig(
input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
parser=Parser(
resize_eval_groundtruth=False,
groundtruth_padded_size=[640, 640],
aug_scale_min=1.0,
aug_scale_max=1.0,
aug_rand_hflip=False,
aug_type=None,
sigma=8.0,
small_instance_area_threshold=4096,
small_instance_weight=3.0),
drop_remainder=False),
evaluation=Evaluation(
ignored_label=ignore_label,
max_instances_per_category=256,
offset=256*256*256,
is_thing=is_thing,
rescale_predictions=True,
report_per_class_pq=False,
report_per_class_iou=False,
report_train_mean_iou=False)),
trainer=cfg.TrainerConfig(
train_steps=train_steps,
validation_steps=validation_steps,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'adam',
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 0.0005,
'decay_steps': train_steps,
'end_learning_rate': 0.0,
'power': 0.9
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Panoptic Deeplab."""
from typing import List, Optional
import numpy as np
import tensorflow as tf
from official.vision.configs import common
from official.vision.dataloaders import parser
from official.vision.dataloaders import tf_example_decoder
from official.vision.ops import augment
from official.vision.ops import preprocess_ops
def _compute_gaussian_from_std(sigma):
"""Computes the Gaussian and its size from a given standard deviation."""
size = int(6 * sigma + 3)
x = np.arange(size, dtype=np.float)
y = x[:, np.newaxis]
x0, y0 = 3 * sigma + 1, 3 * sigma + 1
gaussian = tf.constant(
np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2)),
dtype=tf.float32)
return gaussian, size
class TfExampleDecoder(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def __init__(
self,
regenerate_source_id: bool,
panoptic_category_mask_key: str = 'image/panoptic/category_mask',
panoptic_instance_mask_key: str = 'image/panoptic/instance_mask'):
super(TfExampleDecoder,
self).__init__(
include_mask=True,
regenerate_source_id=regenerate_source_id)
self._panoptic_category_mask_key = panoptic_category_mask_key
self._panoptic_instance_mask_key = panoptic_instance_mask_key
self._panoptic_keys_to_features = {
panoptic_category_mask_key:
tf.io.FixedLenFeature((), tf.string, default_value=''),
panoptic_instance_mask_key:
tf.io.FixedLenFeature((), tf.string, default_value='')
}
def decode(self, serialized_example):
decoded_tensors = super(TfExampleDecoder,
self).decode(serialized_example)
parsed_tensors = tf.io.parse_single_example(
serialized_example, self._panoptic_keys_to_features)
category_mask = tf.io.decode_image(
parsed_tensors[self._panoptic_category_mask_key], channels=1)
instance_mask = tf.io.decode_image(
parsed_tensors[self._panoptic_instance_mask_key], channels=1)
category_mask.set_shape([None, None, 1])
instance_mask.set_shape([None, None, 1])
decoded_tensors.update({
'groundtruth_panoptic_category_mask': category_mask,
'groundtruth_panoptic_instance_mask': instance_mask
})
return decoded_tensors
class Parser(parser.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(
self,
output_size: List[int],
resize_eval_groundtruth: bool = True,
groundtruth_padded_size: Optional[List[int]] = None,
ignore_label: int = 0,
aug_rand_hflip: bool = False,
aug_scale_min: float = 1.0,
aug_scale_max: float = 1.0,
aug_type: Optional[common.Augmentation] = None,
sigma: float = 8.0,
small_instance_area_threshold: int = 4096,
small_instance_weight: float = 3.0,
dtype: str = 'float32'):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are
resized to output_size.
groundtruth_padded_size: `Tensor` or `list` for [height, width]. When
resize_eval_groundtruth is set to False, the groundtruth masks are
padded to this size.
ignore_label: `int` the pixel with ignore label will not used for training
and evaluation.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
aug_type: An optional Augmentation object with params for AutoAugment.
sigma: `float`, standard deviation for generating 2D Gaussian to encode
centers.
small_instance_area_threshold: `int`, small instance area threshold.
small_instance_weight: `float`, small instance weight.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
"""
self._output_size = output_size
self._resize_eval_groundtruth = resize_eval_groundtruth
if (not resize_eval_groundtruth) and (groundtruth_padded_size is None):
raise ValueError(
'groundtruth_padded_size ([height, width]) needs to be'
'specified when resize_eval_groundtruth is False.')
self._groundtruth_padded_size = groundtruth_padded_size
self._ignore_label = ignore_label
# Data augmentation.
self._aug_rand_hflip = aug_rand_hflip
self._aug_scale_min = aug_scale_min
self._aug_scale_max = aug_scale_max
if aug_type and aug_type.type:
if aug_type.type == 'autoaug':
self._augmenter = augment.AutoAugment(
augmentation_name=aug_type.autoaug.augmentation_name,
cutout_const=aug_type.autoaug.cutout_const,
translate_const=aug_type.autoaug.translate_const)
else:
raise ValueError('Augmentation policy {} not supported.'.format(
aug_type.type))
else:
self._augmenter = None
self._dtype = dtype
self._sigma = sigma
self._gaussian, self._gaussian_size = _compute_gaussian_from_std(
self._sigma)
self._gaussian = tf.reshape(self._gaussian, shape=[-1])
self._small_instance_area_threshold = small_instance_area_threshold
self._small_instance_weight = small_instance_weight
def _resize_and_crop_mask(self, mask, image_info, is_training):
"""Resizes and crops mask using `image_info` dict."""
height = image_info[0][0]
width = image_info[0][1]
mask = tf.reshape(mask, shape=[1, height, width, 1])
mask += 1
if is_training or self._resize_eval_groundtruth:
image_scale = image_info[2, :]
offset = image_info[3, :]
mask = preprocess_ops.resize_and_crop_masks(
mask,
image_scale,
self._output_size,
offset)
else:
mask = tf.image.pad_to_bounding_box(
mask, 0, 0,
self._groundtruth_padded_size[0],
self._groundtruth_padded_size[1])
mask -= 1
# Assign ignore label to the padded region.
mask = tf.where(
tf.equal(mask, -1),
self._ignore_label * tf.ones_like(mask),
mask)
mask = tf.squeeze(mask, axis=0)
return mask
def _parse_data(self, data, is_training):
image = data['image']
if self._augmenter is not None and is_training:
image = self._augmenter.distort(image)
image = preprocess_ops.normalize_image(image)
category_mask = tf.cast(
data['groundtruth_panoptic_category_mask'][:, :, 0],
dtype=tf.float32)
instance_mask = tf.cast(
data['groundtruth_panoptic_instance_mask'][:, :, 0],
dtype=tf.float32)
# Flips image randomly during training.
if self._aug_rand_hflip and is_training:
masks = tf.stack([category_mask, instance_mask], axis=0)
image, _, masks = preprocess_ops.random_horizontal_flip(
image=image, masks=masks)
category_mask = masks[0]
instance_mask = masks[1]
# Resizes and crops image.
image, image_info = preprocess_ops.resize_and_crop_image(
image,
self._output_size,
self._output_size,
aug_scale_min=self._aug_scale_min if is_training else 1.0,
aug_scale_max=self._aug_scale_max if is_training else 1.0)
category_mask = self._resize_and_crop_mask(
category_mask,
image_info,
is_training=is_training)
instance_mask = self._resize_and_crop_mask(
instance_mask,
image_info,
is_training=is_training)
(instance_centers_heatmap,
instance_centers_offset,
semantic_weights) = self._encode_centers_and_offets(
instance_mask=instance_mask[:, :, 0])
# Cast image and labels as self._dtype
image = tf.cast(image, dtype=self._dtype)
category_mask = tf.cast(category_mask, dtype=self._dtype)
instance_mask = tf.cast(instance_mask, dtype=self._dtype)
instance_centers_heatmap = tf.cast(
instance_centers_heatmap, dtype=self._dtype)
instance_centers_offset = tf.cast(
instance_centers_offset, dtype=self._dtype)
valid_mask = tf.not_equal(
category_mask, self._ignore_label)
things_mask = tf.not_equal(
instance_mask, self._ignore_label)
labels = {
'category_mask': category_mask,
'instance_mask': instance_mask,
'instance_centers_heatmap': instance_centers_heatmap,
'instance_centers_offset': instance_centers_offset,
'semantic_weights': semantic_weights,
'valid_mask': valid_mask,
'things_mask': things_mask,
'image_info': image_info
}
return image, labels
def _parse_train_data(self, data):
"""Parses data for training."""
return self._parse_data(data=data, is_training=True)
def _parse_eval_data(self, data):
"""Parses data for evaluation."""
return self._parse_data(data=data, is_training=False)
def _encode_centers_and_offets(self, instance_mask):
"""Generates center heatmaps and offets from instance id mask.
Args:
instance_mask: `tf.Tensor` of shape [height, width] representing
groundtruth instance id mask.
Returns:
instance_centers_heatmap: `tf.Tensor` of shape [height, width, 1]
instance_centers_offset: `tf.Tensor` of shape [height, width, 2]
"""
shape = tf.shape(instance_mask)
height, width = shape[0], shape[1]
padding_start = int(3 * self._sigma + 1)
padding_end = int(3 * self._sigma + 2)
# padding should be equal to self._gaussian_size which is calculated
# as size = int(6 * sigma + 3)
padding = padding_start + padding_end
instance_centers_heatmap = tf.zeros(
shape=[height + padding, width + padding],
dtype=tf.float32)
centers_offset_y = tf.zeros(
shape=[height, width],
dtype=tf.float32)
centers_offset_x = tf.zeros(
shape=[height, width],
dtype=tf.float32)
semantic_weights = tf.ones(
shape=[height, width],
dtype=tf.float32)
unique_instance_ids, _ = tf.unique(tf.reshape(instance_mask, [-1]))
# The following method for encoding center heatmaps and offets is inspired
# by the reference implementation available at
# https://github.com/google-research/deeplab2/blob/main/data/sample_generator.py # pylint: disable=line-too-long
for instance_id in unique_instance_ids:
if instance_id == self._ignore_label:
continue
mask = tf.equal(instance_mask, instance_id)
mask_area = tf.reduce_sum(tf.cast(mask, dtype=tf.float32))
mask_indices = tf.cast(tf.where(mask), dtype=tf.float32)
mask_center = tf.reduce_mean(mask_indices, axis=0)
mask_center_y = tf.cast(tf.round(mask_center[0]), dtype=tf.int32)
mask_center_x = tf.cast(tf.round(mask_center[1]), dtype=tf.int32)
if mask_area < self._small_instance_area_threshold:
semantic_weights = tf.where(
mask,
self._small_instance_weight,
semantic_weights)
gaussian_size = self._gaussian_size
indices_y = tf.range(mask_center_y, mask_center_y + gaussian_size)
indices_x = tf.range(mask_center_x, mask_center_x + gaussian_size)
indices = tf.stack(tf.meshgrid(indices_y, indices_x))
indices = tf.reshape(
indices, shape=[2, gaussian_size * gaussian_size])
indices = tf.transpose(indices)
instance_centers_heatmap = tf.tensor_scatter_nd_max(
tensor=instance_centers_heatmap,
indices=indices,
updates=self._gaussian)
centers_offset_y = tf.tensor_scatter_nd_update(
tensor=centers_offset_y,
indices=tf.cast(mask_indices, dtype=tf.int32),
updates=tf.cast(mask_center_y, dtype=tf.float32) - mask_indices[:, 0])
centers_offset_x = tf.tensor_scatter_nd_update(
tensor=centers_offset_x,
indices=tf.cast(mask_indices, dtype=tf.int32),
updates=tf.cast(mask_center_x, dtype=tf.float32) - mask_indices[:, 1])
instance_centers_heatmap = instance_centers_heatmap[
padding_start:padding_start + height,
padding_start:padding_start + width]
instance_centers_heatmap = tf.expand_dims(instance_centers_heatmap, axis=-1)
instance_centers_offset = tf.stack(
[centers_offset_y, centers_offset_x],
axis=-1)
return (instance_centers_heatmap,
instance_centers_offset,
semantic_weights)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses used for panoptic deeplab model."""
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.beta.projects.panoptic_maskrcnn.ops import mask_ops
EPSILON = 1e-5
class WeightedBootstrappedCrossEntropyLoss:
"""Weighted semantic segmentation loss."""
def __init__(self, label_smoothing, class_weights, ignore_label,
top_k_percent_pixels=1.0):
self._top_k_percent_pixels = top_k_percent_pixels
self._class_weights = class_weights
self._ignore_label = ignore_label
self._label_smoothing = label_smoothing
def __call__(self, logits, labels, sample_weight=None):
_, _, _, num_classes = logits.get_shape().as_list()
logits = tf.image.resize(
logits, tf.shape(labels)[1:3],
method=tf.image.ResizeMethod.BILINEAR)
valid_mask = tf.not_equal(labels, self._ignore_label)
normalizer = tf.reduce_sum(tf.cast(valid_mask, tf.float32)) + EPSILON
# Assign pixel with ignore label to class 0 (background). The loss on the
# pixel will later be masked out.
labels = tf.where(valid_mask, labels, tf.zeros_like(labels))
labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3)
valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3)
onehot_labels = tf.one_hot(labels, num_classes)
onehot_labels = onehot_labels * (
1 - self._label_smoothing) + self._label_smoothing / num_classes
cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
labels=onehot_labels, logits=logits)
if not self._class_weights:
class_weights = [1] * num_classes
else:
class_weights = self._class_weights
if num_classes != len(class_weights):
raise ValueError(
'Length of class_weights should be {}'.format(num_classes))
weight_mask = tf.einsum('...y,y->...',
tf.one_hot(labels, num_classes, dtype=tf.float32),
tf.constant(class_weights, tf.float32))
valid_mask *= weight_mask
if sample_weight is not None:
valid_mask *= sample_weight
cross_entropy_loss *= tf.cast(valid_mask, tf.float32)
if self._top_k_percent_pixels >= 1.0:
loss = tf.reduce_sum(cross_entropy_loss) / normalizer
else:
loss = self._compute_top_k_loss(cross_entropy_loss)
return loss
def _compute_top_k_loss(self, loss):
"""Computs top k loss."""
batch_size = tf.shape(loss)[0]
loss = tf.reshape(loss, shape=[batch_size, -1])
top_k_pixels = tf.cast(
self._top_k_percent_pixels *
tf.cast(tf.shape(loss)[-1], dtype=tf.float32),
dtype=tf.int32)
# shape: [batch_size, top_k_pixels]
per_sample_top_k_loss = tf.map_fn(
fn=lambda x: tf.nn.top_k(x, k=top_k_pixels, sorted=False)[0],
elems=loss,
parallel_iterations=32,
fn_output_signature=tf.float32)
# shape: [batch_size]
per_sample_normalizer = tf.reduce_sum(
tf.cast(
tf.not_equal(per_sample_top_k_loss, 0.0),
dtype=tf.float32),
axis=-1) + EPSILON
per_sample_normalized_loss = tf.reduce_sum(
per_sample_top_k_loss, axis=-1) / per_sample_normalizer
normalized_loss = tf_utils.safe_mean(per_sample_normalized_loss)
return normalized_loss
class CenterHeatmapLoss:
"""Center heatmap loss."""
def __init__(self):
self._loss_fn = tf.losses.mean_squared_error
def __call__(self, logits, labels, sample_weight=None):
_, height, width, _ = labels.get_shape().as_list()
logits = tf.image.resize(
logits,
size=[height, width],
method=tf.image.ResizeMethod.BILINEAR)
loss = self._loss_fn(y_true=labels, y_pred=logits)
if sample_weight is not None:
loss *= sample_weight
return tf_utils.safe_mean(loss)
class CenterOffsetLoss:
"""Center offset loss."""
def __init__(self):
self._loss_fn = tf.losses.mean_absolute_error
def __call__(self, logits, labels, sample_weight=None):
_, height, width, _ = labels.get_shape().as_list()
logits = mask_ops.resize_and_rescale_offsets(
logits, target_size=[height, width])
loss = self._loss_fn(y_true=labels, y_pred=logits)
if sample_weight is not None:
loss *= sample_weight
return tf_utils.safe_mean(loss)
...@@ -13,12 +13,17 @@ ...@@ -13,12 +13,17 @@
# limitations under the License. # limitations under the License.
"""Factory method to build panoptic segmentation model.""" """Factory method to build panoptic segmentation model."""
from typing import Optional
import tensorflow as tf import tensorflow as tf
from official.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn from official.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_maskrcnn_model
from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_segmentation_generator
from official.vision.modeling import backbones from official.vision.modeling import backbones
from official.vision.modeling.decoders import factory as decoder_factory from official.vision.modeling.decoders import factory as decoder_factory
...@@ -142,3 +147,104 @@ def build_panoptic_maskrcnn( ...@@ -142,3 +147,104 @@ def build_panoptic_maskrcnn(
aspect_ratios=model_config.anchor.aspect_ratios, aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size) anchor_size=model_config.anchor.anchor_size)
return model return model
def build_panoptic_deeplab(
input_specs: tf.keras.layers.InputSpec,
model_config: panoptic_deeplab_cfg.PanopticDeeplab,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds Panoptic Deeplab model.
Args:
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
model_config: Config instance for the panoptic deeplab model.
l2_regularizer: Optional `tf.keras.regularizers.Regularizer`, if specified,
the model is built with the provided regularization layer.
Returns:
tf.keras.Model for the panoptic segmentation model.
"""
norm_activation_config = model_config.norm_activation
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
semantic_decoder = decoder_factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
if model_config.shared_decoder:
instance_decoder = None
else:
# semantic and instance share the same decoder type
instance_decoder = decoder_factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
semantic_head_config = model_config.semantic_head
instance_head_config = model_config.instance_head
semantic_head = panoptic_deeplab_heads.SemanticHead(
num_classes=model_config.num_classes,
level=semantic_head_config.level,
num_convs=semantic_head_config.num_convs,
kernel_size=semantic_head_config.kernel_size,
prediction_kernel_size=semantic_head_config.prediction_kernel_size,
num_filters=semantic_head_config.num_filters,
use_depthwise_convolution=semantic_head_config.use_depthwise_convolution,
upsample_factor=semantic_head_config.upsample_factor,
low_level=semantic_head_config.low_level,
low_level_num_filters=semantic_head_config.low_level_num_filters,
fusion_num_output_filters=semantic_head_config.fusion_num_output_filters,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
instance_head = panoptic_deeplab_heads.InstanceHead(
level=instance_head_config.level,
num_convs=instance_head_config.num_convs,
kernel_size=instance_head_config.kernel_size,
prediction_kernel_size=instance_head_config.prediction_kernel_size,
num_filters=instance_head_config.num_filters,
use_depthwise_convolution=instance_head_config.use_depthwise_convolution,
upsample_factor=instance_head_config.upsample_factor,
low_level=instance_head_config.low_level,
low_level_num_filters=instance_head_config.low_level_num_filters,
fusion_num_output_filters=instance_head_config.fusion_num_output_filters,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
if model_config.generate_panoptic_masks:
post_processing_config = model_config.post_processor
post_processor = panoptic_deeplab_merge.PostProcessor(
output_size=post_processing_config.output_size,
center_score_threshold=post_processing_config.center_score_threshold,
thing_class_ids=post_processing_config.thing_class_ids,
label_divisor=post_processing_config.label_divisor,
stuff_area_limit=post_processing_config.stuff_area_limit,
ignore_label=post_processing_config.ignore_label,
nms_kernel=post_processing_config.nms_kernel,
keep_k_centers=post_processing_config.keep_k_centers,
rescale_predictions=post_processing_config.rescale_predictions)
else:
post_processor = None
model = panoptic_deeplab_model.PanopticDeeplabModel(
backbone=backbone,
semantic_decoder=semantic_decoder,
instance_decoder=instance_decoder,
semantic_head=semantic_head,
instance_head=instance_head,
post_processor=post_processor)
return model
...@@ -18,6 +18,8 @@ from absl.testing import parameterized ...@@ -18,6 +18,8 @@ from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.distribute import combinations
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
from official.vision.configs import backbones from official.vision.configs import backbones
...@@ -62,5 +64,51 @@ class PanopticMaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -62,5 +64,51 @@ class PanopticMaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
model_config=model_config, model_config=model_config,
l2_regularizer=l2_regularizer) l2_regularizer=l2_regularizer)
class PanopticDeeplabBuilderTest(parameterized.TestCase, tf.test.TestCase):
@combinations.generate(
combinations.combine(
input_size=[(640, 640), (512, 512)],
backbone_type=['resnet', 'dilated_resnet'],
decoder_type=['aspp', 'fpn'],
level=[2, 3, 4],
low_level=[(4, 3), (3, 2)],
shared_decoder=[True, False],
generate_panoptic_masks=[True, False]))
def test_builder(self, input_size, backbone_type,
level, low_level, decoder_type,
shared_decoder, generate_panoptic_masks):
num_classes = 10
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model_config = panoptic_deeplab_cfg.PanopticDeeplab(
num_classes=num_classes,
input_size=input_size,
backbone=backbones.Backbone(type=backbone_type),
decoder=decoders.Decoder(type=decoder_type),
semantic_head=panoptic_deeplab_cfg.SemanticHead(
level=level,
num_convs=1,
kernel_size=5,
prediction_kernel_size=1,
low_level=low_level),
instance_head=panoptic_deeplab_cfg.InstanceHead(
level=level,
num_convs=1,
kernel_size=5,
prediction_kernel_size=1,
low_level=low_level),
shared_decoder=shared_decoder,
generate_panoptic_masks=generate_panoptic_masks)
l2_regularizer = tf.keras.regularizers.l2(5e-5)
_ = factory.build_panoptic_deeplab(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for Panoptic Deeplab heads."""
from typing import List, Union, Optional, Mapping, Tuple
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import fusion_layers
from official.vision.ops import spatial_transform_ops
class PanopticDeeplabHead(tf.keras.layers.Layer):
"""Creates a panoptic deeplab head."""
def __init__(
self,
level: Union[int, str],
num_convs: int = 2,
num_filters: int = 256,
kernel_size: int = 3,
use_depthwise_convolution: bool = False,
upsample_factor: int = 1,
low_level: Optional[List[int]] = None,
low_level_num_filters: Optional[List[int]] = None,
fusion_num_output_filters: int = 256,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a panoptic deeplab head.
Args:
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(PanopticDeeplabHead, self).__init__(**kwargs)
self._config_dict = {
'level': level,
'num_convs': num_convs,
'num_filters': num_filters,
'kernel_size': kernel_size,
'use_depthwise_convolution': use_depthwise_convolution,
'upsample_factor': upsample_factor,
'low_level': low_level,
'low_level_num_filters': low_level_num_filters,
'fusion_num_output_filters': fusion_num_output_filters,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
kernel_size = self._config_dict['kernel_size']
use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
conv_op = tf.keras.layers.Conv2D
conv_kwargs = {
'kernel_size': kernel_size if not use_depthwise_convolution else 1,
'padding': 'same',
'use_bias': True,
'kernel_initializer': random_initializer,
'kernel_regularizer': self._config_dict['kernel_regularizer'],
}
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._panoptic_deeplab_fusion = fusion_layers.PanopticDeepLabFusion(
level=self._config_dict['level'],
low_level=self._config_dict['low_level'],
num_projection_filters=self._config_dict['low_level_num_filters'],
num_output_filters=self._config_dict['fusion_num_output_filters'],
use_depthwise_convolution=self
._config_dict['use_depthwise_convolution'],
activation=self._config_dict['activation'],
use_sync_bn=self._config_dict['use_sync_bn'],
norm_momentum=self._config_dict['norm_momentum'],
norm_epsilon=self._config_dict['norm_epsilon'],
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
# Stacked convolutions layers.
self._convs = []
self._norms = []
for i in range(self._config_dict['num_convs']):
if use_depthwise_convolution:
self._convs.append(
tf.keras.layers.DepthwiseConv2D(
name='panoptic_deeplab_head_depthwise_conv_{}'.format(i),
kernel_size=kernel_size,
padding='same',
use_bias=True,
depthwise_initializer=random_initializer,
depthwise_regularizer=self._config_dict['kernel_regularizer'],
depth_multiplier=1))
norm_name = 'panoptic_deeplab_head_depthwise_norm_{}'.format(i)
self._norms.append(bn_op(name=norm_name, **bn_kwargs))
conv_name = 'panoptic_deeplab_head_conv_{}'.format(i)
self._convs.append(
conv_op(
name=conv_name,
filters=self._config_dict['num_filters'],
**conv_kwargs))
norm_name = 'panoptic_deeplab_head_norm_{}'.format(i)
self._norms.append(bn_op(name=norm_name, **bn_kwargs))
super().build(input_shape)
def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
Union[tf.Tensor, Mapping[str, tf.Tensor]]],
training=None):
"""Forward pass of the head.
It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone endpoints, and the second is decoder endpoints. When inputs are
tensors, they are from a single level of feature maps. When inputs are
dictionaries, they contain multiple levels of feature maps, where the key
is the index of feature map.
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
training: A bool, runs the model in training/eval mode.
Returns:
A `tf.Tensor` of the fused backbone and decoder features.
"""
if training is None:
training = tf.keras.backend.learning_phase()
x = self._panoptic_deeplab_fusion(inputs, training=training)
for conv, norm in zip(self._convs, self._norms):
x = conv(x)
x = norm(x, training=training)
x = self._activation(x)
if self._config_dict['upsample_factor'] > 1:
x = spatial_transform_ops.nearest_upsampling(
x, scale=self._config_dict['upsample_factor'])
return x
def get_config(self):
base_config = super().get_config()
return dict(list(base_config.items()) + list(self._config_dict.items()))
@classmethod
def from_config(cls, config):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Vision')
class SemanticHead(PanopticDeeplabHead):
"""Creates a semantic head."""
def __init__(
self,
num_classes: int,
level: Union[int, str],
num_convs: int = 2,
num_filters: int = 256,
kernel_size: int = 3,
prediction_kernel_size: int = 3,
use_depthwise_convolution: bool = False,
upsample_factor: int = 1,
low_level: Optional[List[int]] = None,
low_level_num_filters: Optional[List[int]] = None,
fusion_num_output_filters: int = 256,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a instance center head.
Args:
num_classes: An `int` number of mask classification categories. The number
of classes does not include background class.
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(SemanticHead, self).__init__(
level=level,
num_convs=num_convs,
num_filters=num_filters,
use_depthwise_convolution=use_depthwise_convolution,
kernel_size=kernel_size,
upsample_factor=upsample_factor,
low_level=low_level,
low_level_num_filters=low_level_num_filters,
fusion_num_output_filters=fusion_num_output_filters,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
**kwargs)
self._config_dict.update({
'num_classes': num_classes,
'prediction_kernel_size': prediction_kernel_size})
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the semantic head."""
super(SemanticHead, self).build(input_shape)
self._classifier = tf.keras.layers.Conv2D(
name='semantic_output',
filters=self._config_dict['num_classes'],
kernel_size=self._config_dict['prediction_kernel_size'],
padding='same',
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
Union[tf.Tensor, Mapping[str, tf.Tensor]]],
training=None):
"""Forward pass of the head."""
if training is None:
training = tf.keras.backend.learning_phase()
x = super(SemanticHead, self).call(inputs, training=training)
outputs = self._classifier(x)
return outputs
@tf.keras.utils.register_keras_serializable(package='Vision')
class InstanceHead(PanopticDeeplabHead):
"""Creates a instance head."""
def __init__(
self,
level: Union[int, str],
num_convs: int = 2,
num_filters: int = 256,
kernel_size: int = 3,
prediction_kernel_size: int = 3,
use_depthwise_convolution: bool = False,
upsample_factor: int = 1,
low_level: Optional[List[int]] = None,
low_level_num_filters: Optional[List[int]] = None,
fusion_num_output_filters: int = 256,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a instance center head.
Args:
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(InstanceHead, self).__init__(
level=level,
num_convs=num_convs,
num_filters=num_filters,
use_depthwise_convolution=use_depthwise_convolution,
kernel_size=kernel_size,
upsample_factor=upsample_factor,
low_level=low_level,
low_level_num_filters=low_level_num_filters,
fusion_num_output_filters=fusion_num_output_filters,
activation=activation,
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
**kwargs)
self._config_dict.update({
'prediction_kernel_size': prediction_kernel_size})
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the instance head."""
super(InstanceHead, self).build(input_shape)
self._instance_center_prediction_conv = tf.keras.layers.Conv2D(
name='instance_centers_heatmap',
filters=1,
kernel_size=self._config_dict['prediction_kernel_size'],
padding='same',
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
self._instance_center_regression_conv = tf.keras.layers.Conv2D(
name='instance_centers_offset',
filters=2,
kernel_size=self._config_dict['prediction_kernel_size'],
padding='same',
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
Union[tf.Tensor, Mapping[str, tf.Tensor]]],
training=None):
"""Forward pass of the head."""
if training is None:
training = tf.keras.backend.learning_phase()
x = super(InstanceHead, self).call(inputs, training=training)
instance_centers_heatmap = self._instance_center_prediction_conv(x)
instance_centers_offset = self._instance_center_regression_conv(x)
outputs = {
'instance_centers_heatmap': instance_centers_heatmap,
'instance_centers_offset': instance_centers_offset
}
return outputs
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_deeplab_heads.py."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
class PanopticDeeplabHeadsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(2, (2,), (48,)),
(3, (2,), (48,)),
(2, (2,), (48,)),
(2, (2,), (48,)),
(3, (2,), (48,)),
(3, (2,), (48,)),
(4, (4, 3), (64, 32)),
(4, (3, 2), (64, 32)))
def test_forward(self, level, low_level, low_level_num_filters):
backbone_features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
'5': np.random.rand(2, 32, 32, 16),
}
decoder_features = {
'3': np.random.rand(2, 128, 128, 64),
'4': np.random.rand(2, 64, 64, 64),
'5': np.random.rand(2, 32, 32, 64),
'6': np.random.rand(2, 16, 16, 64),
}
backbone_features['2'] = np.random.rand(2, 256, 256, 16)
decoder_features['2'] = np.random.rand(2, 256, 256, 64)
num_classes = 10
semantic_head = panoptic_deeplab_heads.SemanticHead(
num_classes=num_classes,
level=level,
low_level=low_level,
low_level_num_filters=low_level_num_filters)
instance_head = panoptic_deeplab_heads.InstanceHead(
level=level,
low_level=low_level,
low_level_num_filters=low_level_num_filters)
semantic_outputs = semantic_head((backbone_features, decoder_features))
instance_outputs = instance_head((backbone_features, decoder_features))
if str(level) in decoder_features:
h, w = decoder_features[str(low_level[-1])].shape[1:3]
self.assertAllEqual(
semantic_outputs.numpy().shape,
[2, h, w, num_classes])
self.assertAllEqual(
instance_outputs['instance_centers_heatmap'].numpy().shape,
[2, h, w, 1])
self.assertAllEqual(
instance_outputs['instance_centers_offset'].numpy().shape,
[2, h, w, 2])
def test_serialize_deserialize(self):
semantic_head = panoptic_deeplab_heads.SemanticHead(num_classes=2, level=3)
instance_head = panoptic_deeplab_heads.InstanceHead(level=3)
semantic_head_config = semantic_head.get_config()
instance_head_config = instance_head.get_config()
new_semantic_head = panoptic_deeplab_heads.SemanticHead.from_config(
semantic_head_config)
new_instance_head = panoptic_deeplab_heads.InstanceHead.from_config(
instance_head_config)
self.assertAllEqual(semantic_head.get_config(),
new_semantic_head.get_config())
self.assertAllEqual(instance_head.get_config(),
new_instance_head.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains feature fusion blocks for panoptic segmentation models."""
from typing import Any, Callable, Dict, List, Mapping, Optional, Union
import tensorflow as tf
from official.modeling import tf_utils
# Type annotations.
States = Dict[str, tf.Tensor]
Activation = Union[str, Callable]
class PanopticDeepLabFusion(tf.keras.layers.Layer):
"""Creates a Panoptic DeepLab feature Fusion layer.
This implements the feature fusion introduced in the paper:
Cheng et al. Panoptic-DeepLab
(https://arxiv.org/pdf/1911.10194.pdf)
"""
def __init__(
self,
level: int,
low_level: List[int],
num_projection_filters: List[int],
num_output_filters: int = 256,
use_depthwise_convolution: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
interpolation: str = 'bilinear',
**kwargs):
"""Initializes panoptic FPN feature fusion layer.
Args:
level: An `int` level at which the decoder was appled at.
low_level: A list of `int` of minimum level to use in feature fusion.
num_projection_filters: A list of `int` with number of filters for
projection conv2d layers.
num_output_filters: An `int` number of filters in output conv2d layers.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
activation: A `str` name of the activation function.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
interpolation: A `str` interpolation method for upsampling. Defaults to
`bilinear`.
**kwargs: Additional keyword arguments to be passed.
Returns:
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
super(PanopticDeepLabFusion, self).__init__(**kwargs)
self._config_dict = {
'level': level,
'low_level': low_level,
'num_projection_filters': num_projection_filters,
'num_output_filters': num_output_filters,
'use_depthwise_convolution': use_depthwise_convolution,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'interpolation': interpolation
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._channel_axis = -1
else:
self._channel_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: List[tf.TensorShape]):
conv_op = tf.keras.layers.Conv2D
conv_kwargs = {
'padding': 'same',
'use_bias': True,
'kernel_initializer': tf.initializers.VarianceScaling(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
}
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._channel_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._projection_convs = []
self._projection_norms = []
self._fusion_convs = []
self._fusion_norms = []
for i in range(len(self._config_dict['low_level'])):
self._projection_convs.append(
conv_op(
filters=self._config_dict['num_projection_filters'][i],
kernel_size=1,
**conv_kwargs))
if self._config_dict['use_depthwise_convolution']:
depthwise_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
fusion_conv = tf.keras.Sequential([
tf.keras.layers.DepthwiseConv2D(
kernel_size=5,
padding='same',
use_bias=True,
depthwise_initializer=depthwise_initializer,
depthwise_regularizer=self._config_dict['kernel_regularizer'],
depth_multiplier=1),
bn_op(**bn_kwargs),
conv_op(
filters=self._config_dict['num_output_filters'],
kernel_size=1,
**conv_kwargs)])
else:
fusion_conv = conv_op(
filters=self._config_dict['num_output_filters'],
kernel_size=5,
**conv_kwargs)
self._fusion_convs.append(fusion_conv)
self._projection_norms.append(bn_op(**bn_kwargs))
self._fusion_norms.append(bn_op(**bn_kwargs))
def call(self, inputs, training=None):
if training is None:
training = tf.keras.backend.learning_phase()
backbone_output = inputs[0]
decoder_output = inputs[1][str(self._config_dict['level'])]
x = decoder_output
for i in range(len(self._config_dict['low_level'])):
feature = backbone_output[str(self._config_dict['low_level'][i])]
feature = self._projection_convs[i](feature)
feature = self._projection_norms[i](feature, training=training)
feature = self._activation(feature)
shape = tf.shape(feature)
x = tf.image.resize(
x, size=[shape[1], shape[2]],
method=self._config_dict['interpolation'])
x = tf.cast(x, dtype=feature.dtype)
x = tf.concat([x, feature], axis=self._channel_axis)
x = self._fusion_convs[i](x)
x = self._fusion_norms[i](x, training=training)
x = self._activation(x)
return x
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test for panoptic_deeplab_merge.py.
Note that the tests are branched from
https://raw.githubusercontent.com/google-research/deeplab2/main/model/post_processor/panoptic_deeplab_test.py
"""
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
class PostProcessingTest(tf.test.TestCase):
def test_py_func_merge_semantic_and_instance_maps_can_run(self):
batch = 1
height = 5
width = 5
semantic_prediction = tf.random.uniform((batch, height, width),
minval=0,
maxval=20,
dtype=tf.int32)
instance_maps = tf.random.uniform((batch, height, width),
minval=0,
maxval=3,
dtype=tf.int32)
thing_class_ids = tf.convert_to_tensor([1, 2, 3])
label_divisor = 256
stuff_area_limit = 3
void_label = 255
panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
semantic_prediction, instance_maps, thing_class_ids, label_divisor,
stuff_area_limit, void_label)
self.assertListEqual(semantic_prediction.get_shape().as_list(),
panoptic_prediction.get_shape().as_list())
def test_merge_semantic_and_instance_maps_with_a_simple_example(self):
semantic_prediction = tf.convert_to_tensor(
[[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 2, 2, 0],
[2, 2, 3, 3]]], dtype=tf.int32)
instance_maps = tf.convert_to_tensor(
[[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 1, 1, 0],
[2, 2, 3, 3]]], dtype=tf.int32)
thing_class_ids = tf.convert_to_tensor([2, 3])
label_divisor = 256
stuff_area_limit = 3
void_label = 255
# The expected_panoptic_prediction is computed as follows.
# For `thing` segmentation, instance 1, 2, and 3 are kept, but instance 3
# will have a new instance ID 1, since it is the first instance in its
# own semantic label.
# For `stuff` segmentation, class-0 region is kept, while class-1 region
# is re-labeled as `void_label * label_divisor` since its area is smaller
# than stuff_area_limit.
expected_panoptic_prediction = tf.convert_to_tensor(
[[[0, 0, 0, 0],
[0, void_label * label_divisor, void_label * label_divisor, 0],
[0, 2 * label_divisor + 1, 2 * label_divisor + 1, 0],
[2 * label_divisor + 2, 2 * label_divisor + 2, 3 * label_divisor + 1,
3 * label_divisor + 1]]], dtype=tf.int32)
panoptic_prediction = panoptic_deeplab_merge._merge_semantic_and_instance_maps(
semantic_prediction, instance_maps, thing_class_ids, label_divisor,
stuff_area_limit, void_label)
self.assertAllClose(expected_panoptic_prediction,
panoptic_prediction)
def test_gets_panoptic_predictions_with_score(self):
batch = 1
height = 5
width = 5
classes = 3
semantic_logits = tf.random.uniform((batch, 1, 1, classes))
semantic_logits = tf.tile(semantic_logits, (1, height, width, 1))
center_heatmap = tf.convert_to_tensor([
[1.0, 0.0, 0.0, 0.0, 0.0],
[0.8, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.1, 0.7],
[0.0, 0.0, 0.0, 0.0, 0.2],
], dtype=tf.float32)
center_heatmap = tf.expand_dims(center_heatmap, 0)
center_heatmap = tf.expand_dims(center_heatmap, 3)
center_offsets = tf.zeros((batch, height, width, 2))
center_threshold = 0.0
thing_class_ids = tf.range(classes) # No "stuff" classes.
label_divisor = 256
stuff_area_limit = 16
void_label = classes
nms_kernel_size = 3
keep_k_centers = 2
result = panoptic_deeplab_merge._get_panoptic_predictions(
semantic_logits, center_heatmap, center_offsets, center_threshold,
thing_class_ids, label_divisor, stuff_area_limit, void_label,
nms_kernel_size, keep_k_centers)
instance_maps = result[3].numpy()
instance_scores = result[2].numpy()
self.assertSequenceEqual(instance_maps.shape, (batch, height, width))
expected_instances = [[
[1, 1, 1, 1, 2],
[1, 1, 1, 2, 2],
[1, 1, 2, 2, 2],
[1, 2, 2, 2, 2],
[1, 2, 2, 2, 2],
]]
np.testing.assert_array_equal(instance_maps, expected_instances)
self.assertSequenceEqual(instance_scores.shape, (batch, height, width))
expected_instance_scores = [[
[1.0, 1.0, 1.0, 1.0, 0.7],
[1.0, 1.0, 1.0, 0.7, 0.7],
[1.0, 1.0, 0.7, 0.7, 0.7],
[1.0, 0.7, 0.7, 0.7, 0.7],
[1.0, 0.7, 0.7, 0.7, 0.7],
]]
self.assertAllClose(result[2],
tf.constant(expected_instance_scores))
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build Panoptic Deeplab model."""
from typing import Any, Mapping, Optional, Union
import tensorflow as tf
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
@tf.keras.utils.register_keras_serializable(package='Vision')
class PanopticDeeplabModel(tf.keras.Model):
"""Panoptic Deeplab model."""
def __init__(
self,
backbone: tf.keras.Model,
semantic_decoder: tf.keras.Model,
semantic_head: tf.keras.layers.Layer,
instance_head: tf.keras.layers.Layer,
instance_decoder: Optional[tf.keras.Model] = None,
post_processor: Optional[panoptic_deeplab_merge.PostProcessor] = None,
**kwargs):
"""Panoptic deeplab model initializer.
Args:
backbone: a backbone network.
semantic_decoder: a decoder network. E.g. FPN.
semantic_head: segmentation head.
instance_head: instance center head.
instance_decoder: Optional decoder network for instance predictions.
post_processor: Optional post processor layer.
**kwargs: keyword arguments to be passed.
"""
super(PanopticDeeplabModel, self).__init__(**kwargs)
self._config_dict = {
'backbone': backbone,
'semantic_decoder': semantic_decoder,
'instance_decoder': instance_decoder,
'semantic_head': semantic_head,
'instance_head': instance_head,
'post_processor': post_processor
}
self.backbone = backbone
self.semantic_decoder = semantic_decoder
self.instance_decoder = instance_decoder
self.semantic_head = semantic_head
self.instance_head = instance_head
self.post_processor = post_processor
def call(
self, inputs: tf.Tensor,
image_info: tf.Tensor,
training: bool = None):
if training is None:
training = tf.keras.backend.learning_phase()
backbone_features = self.backbone(inputs, training=training)
semantic_features = self.semantic_decoder(
backbone_features, training=training)
if self.instance_decoder is None:
instance_features = semantic_features
else:
instance_features = self.instance_decoder(
backbone_features, training=training)
segmentation_outputs = self.semantic_head(
(backbone_features, semantic_features),
training=training)
instance_outputs = self.instance_head(
(backbone_features, instance_features),
training=training)
outputs = {
'segmentation_outputs': segmentation_outputs,
'instance_centers_heatmap':
instance_outputs['instance_centers_heatmap'],
'instance_centers_offset':
instance_outputs['instance_centers_offset'],
}
if training:
return outputs
if self.post_processor is not None:
panoptic_masks = self.post_processor(outputs, image_info)
outputs.update(panoptic_masks)
return outputs
@property
def checkpoint_items(
self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(
backbone=self.backbone,
semantic_decoder=self.semantic_decoder,
semantic_head=self.semantic_head,
instance_head=self.instance_head)
if self.instance_decoder is not None:
items.update(instance_decoder=self.instance_decoder)
return items
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Panoptic Deeplab network."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from tensorflow.python.distribute import combinations
from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
from official.vision.beta.projects.panoptic_maskrcnn.modeling.layers import panoptic_deeplab_merge
from official.vision.modeling import backbones
from official.vision.modeling.decoders import aspp
class PanopticDeeplabNetworkTest(parameterized.TestCase, tf.test.TestCase):
@combinations.generate(
combinations.combine(
level=[2, 3, 4],
input_size=[256, 512],
low_level=[[4, 3], [3, 2]],
shared_decoder=[True, False],
training=[True, False]))
def test_panoptic_deeplab_network_creation(
self, input_size, level, low_level, shared_decoder, training):
"""Test for creation of a panoptic deeplab network."""
batch_size = 2 if training else 1
num_classes = 10
inputs = np.random.rand(batch_size, input_size, input_size, 3)
image_info = tf.convert_to_tensor(
[[[input_size, input_size], [input_size, input_size], [1, 1], [0, 0]]])
image_info = tf.tile(image_info, [batch_size, 1, 1])
tf.keras.backend.set_image_data_format('channels_last')
backbone = backbones.ResNet(model_id=50)
semantic_decoder = aspp.ASPP(
level=level, dilation_rates=[6, 12, 18])
if shared_decoder:
instance_decoder = semantic_decoder
else:
instance_decoder = aspp.ASPP(
level=level, dilation_rates=[6, 12, 18])
semantic_head = panoptic_deeplab_heads.SemanticHead(
num_classes,
level=level,
low_level=low_level,
low_level_num_filters=(64, 32))
instance_head = panoptic_deeplab_heads.InstanceHead(
level=level,
low_level=low_level,
low_level_num_filters=(64, 32))
post_processor = panoptic_deeplab_merge.PostProcessor(
output_size=[input_size, input_size],
center_score_threshold=0.1,
thing_class_ids=[1, 2, 3, 4],
label_divisor=[256],
stuff_area_limit=4096,
ignore_label=0,
nms_kernel=41,
keep_k_centers=41,
rescale_predictions=True)
model = panoptic_deeplab_model.PanopticDeeplabModel(
backbone=backbone,
semantic_decoder=semantic_decoder,
instance_decoder=instance_decoder,
semantic_head=semantic_head,
instance_head=instance_head,
post_processor=post_processor)
outputs = model(
inputs=inputs,
image_info=image_info,
training=training)
if training:
self.assertIn('segmentation_outputs', outputs)
self.assertIn('instance_centers_heatmap', outputs)
self.assertIn('instance_centers_offset', outputs)
self.assertAllEqual(
[2, input_size // (2**low_level[-1]),
input_size //(2**low_level[-1]),
num_classes],
outputs['segmentation_outputs'].numpy().shape)
self.assertAllEqual(
[2, input_size // (2**low_level[-1]),
input_size // (2**low_level[-1]),
1],
outputs['instance_centers_heatmap'].numpy().shape)
self.assertAllEqual(
[2, input_size // (2**low_level[-1]),
input_size // (2**low_level[-1]),
2],
outputs['instance_centers_offset'].numpy().shape)
else:
self.assertIn('panoptic_outputs', outputs)
self.assertIn('category_mask', outputs)
self.assertIn('instance_mask', outputs)
self.assertIn('instance_centers', outputs)
self.assertIn('instance_scores', outputs)
self.assertIn('segmentation_outputs', outputs)
@combinations.generate(
combinations.combine(
level=[2, 3, 4],
low_level=[(4, 3), (3, 2)],
shared_decoder=[True, False]))
def test_serialize_deserialize(self, level, low_level, shared_decoder):
"""Validate the network can be serialized and deserialized."""
num_classes = 10
backbone = backbones.ResNet(model_id=50)
semantic_decoder = aspp.ASPP(
level=level, dilation_rates=[6, 12, 18])
if shared_decoder:
instance_decoder = semantic_decoder
else:
instance_decoder = aspp.ASPP(
level=level, dilation_rates=[6, 12, 18])
semantic_head = panoptic_deeplab_heads.SemanticHead(
num_classes,
level=level,
low_level=low_level,
low_level_num_filters=(64, 32))
instance_head = panoptic_deeplab_heads.InstanceHead(
level=level,
low_level=low_level,
low_level_num_filters=(64, 32))
post_processor = panoptic_deeplab_merge.PostProcessor(
output_size=[640, 640],
center_score_threshold=0.1,
thing_class_ids=[1, 2, 3, 4],
label_divisor=[256],
stuff_area_limit=4096,
ignore_label=0,
nms_kernel=41,
keep_k_centers=41,
rescale_predictions=True)
model = panoptic_deeplab_model.PanopticDeeplabModel(
backbone=backbone,
semantic_decoder=semantic_decoder,
instance_decoder=instance_decoder,
semantic_head=semantic_head,
instance_head=instance_head,
post_processor=post_processor)
config = model.get_config()
new_model = panoptic_deeplab_model.PanopticDeeplabModel.from_config(config)
# Validate that the config can be forced to JSON.
_ = new_model.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(model.get_config(), new_model.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for masks."""
import tensorflow as tf
def resize_and_rescale_offsets(input_tensor: tf.Tensor, target_size):
"""Bilinearly resizes and rescales the offsets.
Reference:
https://github.com/google-research/deeplab2/blob/main/model/utils.py#L157
Args:
input_tensor: A tf.Tensor of shape [batch, height, width, 2].
target_size: A list or tuple or 1D tf.Tensor that specifies the height and
width after resizing.
Returns:
The input_tensor resized to shape `[batch, target_height, target_width, 2]`.
Moreover, the offsets along the y-axis are rescaled by a factor equal to
(target_height - 1) / (reference_height - 1) and the offsets along the
x-axis are rescaled by a factor equal to
(target_width - 1) / (reference_width - 1).
"""
input_size_y = tf.shape(input_tensor)[1]
input_size_x = tf.shape(input_tensor)[2]
dtype = input_tensor.dtype
scale_y = tf.cast(target_size[0] - 1, dtype=dtype) / tf.cast(
input_size_y - 1, dtype=dtype)
scale_x = tf.cast(target_size[1] - 1, dtype=dtype) / tf.cast(
input_size_x - 1, dtype=dtype)
target_y, target_x = tf.split(
value=input_tensor, num_or_size_splits=2, axis=3)
target_y *= scale_y
target_x *= scale_x
_ = tf.concat([target_y, target_x], 3)
return tf.image.resize(
input_tensor,
size=target_size,
method=tf.image.ResizeMethod.BILINEAR)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic Deeplab task definition."""
from typing import Any, Dict, List, Mapping, Optional, Tuple
from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import task_factory
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as exp_cfg
from official.vision.beta.projects.panoptic_maskrcnn.dataloaders import panoptic_deeplab_input
from official.vision.beta.projects.panoptic_maskrcnn.losses import panoptic_deeplab_losses
from official.vision.beta.projects.panoptic_maskrcnn.modeling import factory
from official.vision.dataloaders import input_reader_factory
from official.vision.evaluation import panoptic_quality_evaluator
from official.vision.evaluation import segmentation_metrics
@task_factory.register_task_cls(exp_cfg.PanopticDeeplabTask)
class PanopticDeeplabTask(base_task.Task):
"""A task for Panoptic Deeplab."""
def build_model(self):
"""Builds panoptic deeplab model."""
input_specs = tf.keras.layers.InputSpec(
shape=[None] + self.task_config.model.input_size)
l2_weight_decay = self.task_config.losses.l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
l2_regularizer = (tf.keras.regularizers.l2(
l2_weight_decay / 2.0) if l2_weight_decay else None)
model = factory.build_panoptic_deeplab(
input_specs=input_specs,
model_config=self.task_config.model,
l2_regularizer=l2_regularizer)
return model
def initialize(self, model: tf.keras.Model):
"""Loads pretrained checkpoint."""
if not self.task_config.init_checkpoint:
return
ckpt_dir_or_file = self.task_config.init_checkpoint
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
# Restoring checkpoint.
if 'all' in self.task_config.init_checkpoint_modules:
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
else:
ckpt_items = {}
if 'backbone' in self.task_config.init_checkpoint_modules:
ckpt_items.update(backbone=model.backbone)
if 'decoder' in self.task_config.init_checkpoint_modules:
ckpt_items.update(semantic_decoder=model.semantic_decoder)
if not self.task_config.model.shared_decoder:
ckpt_items.update(instance_decoder=model.instance_decoder)
ckpt = tf.train.Checkpoint(**ckpt_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
def build_inputs(self,
params: exp_cfg.DataConfig,
input_context: Optional[tf.distribute.InputContext] = None):
"""Builds panoptic deeplab input."""
decoder_cfg = params.decoder.get()
if params.decoder.type == 'simple_decoder':
decoder = panoptic_deeplab_input.TfExampleDecoder(
regenerate_source_id=decoder_cfg.regenerate_source_id,
panoptic_category_mask_key=decoder_cfg.panoptic_category_mask_key,
panoptic_instance_mask_key=decoder_cfg.panoptic_instance_mask_key)
else:
raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))
parser = panoptic_deeplab_input.Parser(
output_size=self.task_config.model.input_size[:2],
ignore_label=params.parser.ignore_label,
resize_eval_groundtruth=params.parser.resize_eval_groundtruth,
groundtruth_padded_size=params.parser.groundtruth_padded_size,
aug_scale_min=params.parser.aug_scale_min,
aug_scale_max=params.parser.aug_scale_max,
aug_rand_hflip=params.parser.aug_rand_hflip,
aug_type=params.parser.aug_type,
sigma=params.parser.sigma,
dtype=params.parser.dtype)
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def build_losses(self,
labels: Mapping[str, tf.Tensor],
model_outputs: Mapping[str, tf.Tensor],
aux_losses: Optional[Any] = None):
"""Panoptic deeplab losses.
Args:
labels: labels.
model_outputs: Output logits from panoptic deeplab.
aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model.
Returns:
The total loss tensor.
"""
loss_config = self._task_config.losses
segmentation_loss_fn = panoptic_deeplab_losses.WeightedBootstrappedCrossEntropyLoss(
loss_config.label_smoothing,
loss_config.class_weights,
loss_config.ignore_label,
top_k_percent_pixels=loss_config.top_k_percent_pixels)
instance_center_heatmap_loss_fn = panoptic_deeplab_losses.CenterHeatmapLoss(
)
instance_center_offset_loss_fn = panoptic_deeplab_losses.CenterOffsetLoss()
semantic_weights = tf.cast(
labels['semantic_weights'],
dtype=model_outputs['instance_centers_heatmap'].dtype)
things_mask = tf.cast(
tf.squeeze(labels['things_mask'], axis=3),
dtype=model_outputs['instance_centers_heatmap'].dtype)
valid_mask = tf.cast(
tf.squeeze(labels['valid_mask'], axis=3),
dtype=model_outputs['instance_centers_heatmap'].dtype)
segmentation_loss = segmentation_loss_fn(
model_outputs['segmentation_outputs'],
labels['category_mask'],
sample_weight=semantic_weights)
instance_center_heatmap_loss = instance_center_heatmap_loss_fn(
model_outputs['instance_centers_heatmap'],
labels['instance_centers_heatmap'],
sample_weight=valid_mask)
instance_center_offset_loss = instance_center_offset_loss_fn(
model_outputs['instance_centers_offset'],
labels['instance_centers_offset'],
sample_weight=things_mask)
model_loss = (
loss_config.segmentation_loss_weight * segmentation_loss +
loss_config.center_heatmap_loss_weight * instance_center_heatmap_loss +
loss_config.center_offset_loss_weight * instance_center_offset_loss)
total_loss = model_loss
if aux_losses:
total_loss += tf.add_n(aux_losses)
losses = {
'total_loss': total_loss,
'model_loss': model_loss,
'segmentation_loss': segmentation_loss,
'instance_center_heatmap_loss': instance_center_heatmap_loss,
'instance_center_offset_loss': instance_center_offset_loss
}
return losses
def build_metrics(self, training: bool = True) -> List[
tf.keras.metrics.Metric]:
"""Build metrics."""
eval_config = self.task_config.evaluation
metrics = []
if training:
metric_names = [
'total_loss',
'segmentation_loss',
'instance_center_heatmap_loss',
'instance_center_offset_loss',
'model_loss']
for name in metric_names:
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
if eval_config.report_train_mean_iou:
self.train_mean_iou = segmentation_metrics.MeanIoU(
name='train_mean_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=False,
dtype=tf.float32)
else:
rescale_predictions = (not self.task_config.validation_data.parser
.resize_eval_groundtruth)
self.perclass_iou_metric = segmentation_metrics.PerClassIoU(
name='per_class_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=rescale_predictions,
dtype=tf.float32)
if isinstance(tf.distribute.get_strategy(), tf.distribute.TPUStrategy):
self._process_iou_metric_on_cpu = True
else:
self._process_iou_metric_on_cpu = False
if self.task_config.model.generate_panoptic_masks:
self.panoptic_quality_metric = panoptic_quality_evaluator.PanopticQualityEvaluator(
num_categories=self.task_config.model.num_classes,
ignored_label=eval_config.ignored_label,
max_instances_per_category=eval_config.max_instances_per_category,
offset=eval_config.offset,
is_thing=eval_config.is_thing,
rescale_predictions=eval_config.rescale_predictions)
# Update state on CPU if TPUStrategy due to dynamic resizing.
self._process_iou_metric_on_cpu = isinstance(
tf.distribute.get_strategy(),
tf.distribute.TPUStrategy)
return metrics
def train_step(
self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
optimizer: tf.keras.optimizers.Optimizer,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(
inputs=images,
image_info=labels['image_info'],
training=True)
outputs = tf.nest.map_structure(
lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
losses = self.build_losses(
labels=labels,
model_outputs=outputs,
aux_losses=model.losses)
scaled_loss = losses['total_loss'] / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient when LossScaleOptimizer is used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: losses['total_loss']}
if metrics:
for m in metrics:
m.update_state(losses[m.name])
if self.task_config.evaluation.report_train_mean_iou:
segmentation_labels = {
'masks': labels['category_mask'],
'valid_masks': labels['valid_mask'],
'image_info': labels['image_info']
}
self.process_metrics(
metrics=[self.train_mean_iou],
labels=segmentation_labels,
model_outputs=outputs['segmentation_outputs'])
logs.update({
self.train_mean_iou.name:
self.train_mean_iou.result()
})
return logs
def validation_step(
self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images, labels = inputs
outputs = model(
inputs=images,
image_info=labels['image_info'],
training=False)
logs = {self.loss: 0}
segmentation_labels = {
'masks': labels['category_mask'],
'valid_masks': labels['valid_mask'],
'image_info': labels['image_info']
}
if self._process_iou_metric_on_cpu:
logs.update({
self.perclass_iou_metric.name:
(segmentation_labels, outputs['segmentation_outputs'])
})
else:
self.perclass_iou_metric.update_state(
segmentation_labels,
outputs['segmentation_outputs'])
if self.task_config.model.generate_panoptic_masks:
pq_metric_labels = {
'category_mask':
tf.squeeze(labels['category_mask'], axis=3),
'instance_mask':
tf.squeeze(labels['instance_mask'], axis=3),
'image_info': labels['image_info']
}
panoptic_outputs = {
'category_mask':
outputs['category_mask'],
'instance_mask':
outputs['instance_mask'],
}
logs.update({
self.panoptic_quality_metric.name:
(pq_metric_labels, panoptic_outputs)})
return logs
def aggregate_logs(self, state=None, step_outputs=None):
if state is None:
self.perclass_iou_metric.reset_states()
state = [self.perclass_iou_metric]
if self.task_config.model.generate_panoptic_masks:
state += [self.panoptic_quality_metric]
if self._process_iou_metric_on_cpu:
self.perclass_iou_metric.update_state(
step_outputs[self.perclass_iou_metric.name][0],
step_outputs[self.perclass_iou_metric.name][1])
if self.task_config.model.generate_panoptic_masks:
self.panoptic_quality_metric.update_state(
step_outputs[self.panoptic_quality_metric.name][0],
step_outputs[self.panoptic_quality_metric.name][1])
return state
def reduce_aggregated_logs(self, aggregated_logs, global_step=None):
result = {}
ious = self.perclass_iou_metric.result()
if self.task_config.evaluation.report_per_class_iou:
for i, value in enumerate(ious.numpy()):
result.update({'segmentation_iou/class_{}'.format(i): value})
# Computes mean IoU
result.update({'segmentation_mean_iou': tf.reduce_mean(ious).numpy()})
if self.task_config.model.generate_panoptic_masks:
panoptic_quality_results = self.panoptic_quality_metric.result()
for k, value in panoptic_quality_results.items():
if k.endswith('per_class'):
if self.task_config.evaluation.report_per_class_pq:
for i, per_class_value in enumerate(value):
metric_key = 'panoptic_quality/{}/class_{}'.format(k, i)
result[metric_key] = per_class_value
else:
continue
else:
result['panoptic_quality/{}'.format(k)] = value
return result
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_deeplab.py."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as cfg
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_deeplab
# TODO(b/234636381): add unit test for train and validation step
class PanopticDeeplabTaskTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(['all'], False),
(['backbone'], False),
(['decoder'], False),
(['decoder'], True))
def test_model_initializing(self, init_checkpoint_modules, shared_decoder):
task_config = cfg.PanopticDeeplabTask(
model=cfg.PanopticDeeplab(
num_classes=10,
input_size=[640, 640, 3],
shared_decoder=shared_decoder))
task = panoptic_deeplab.PanopticDeeplabTask(task_config)
model = task.build_model()
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
ckpt_save_dir = self.create_tempdir().full_path
ckpt.save(os.path.join(ckpt_save_dir, 'ckpt'))
task._task_config.init_checkpoint = ckpt_save_dir
task._task_config.init_checkpoint_modules = init_checkpoint_modules
task.initialize(model)
@parameterized.parameters(
(True,),
(False,))
def test_build_metrics(self, training):
task_config = cfg.PanopticDeeplabTask(
model=cfg.PanopticDeeplab(
num_classes=10,
input_size=[640, 640, 3],
shared_decoder=False))
task = panoptic_deeplab.PanopticDeeplabTask(task_config)
metrics = task.build_metrics(training=training)
if training:
expected_metric_names = {
'total_loss',
'segmentation_loss',
'instance_center_heatmap_loss',
'instance_center_offset_loss',
'model_loss'}
self.assertEqual(
expected_metric_names,
set([metric.name for metric in metrics]))
else:
assert hasattr(task, 'perclass_iou_metric')
assert hasattr(task, 'panoptic_quality_metric')
if __name__ == '__main__':
tf.test.main()
...@@ -18,9 +18,12 @@ from absl import app ...@@ -18,9 +18,12 @@ from absl import app
from official.common import flags as tfm_flags from official.common import flags as tfm_flags
from official.vision import train from official.vision import train
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as cfg # pylint: disable=unused-import # pylint: disable=unused-import
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_maskrcnn as task # pylint: disable=unused-import from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_deeplab as panoptic_deeplab_task
from official.vision.beta.projects.panoptic_maskrcnn.tasks import panoptic_maskrcnn as panoptic_maskrcnn_task
# pylint: enable=unused-import
if __name__ == '__main__': if __name__ == '__main__':
tfm_flags.define_flags() tfm_flags.define_flags()
......
...@@ -1583,6 +1583,7 @@ class AutoAugment(ImageAugment): ...@@ -1583,6 +1583,7 @@ class AutoAugment(ImageAugment):
'reduced_cifar10': self.policy_reduced_cifar10(), 'reduced_cifar10': self.policy_reduced_cifar10(),
'svhn': self.policy_svhn(), 'svhn': self.policy_svhn(),
'reduced_imagenet': self.policy_reduced_imagenet(), 'reduced_imagenet': self.policy_reduced_imagenet(),
'panoptic_deeplab_policy': self.panoptic_deeplab_policy(),
} }
if not policies: if not policies:
...@@ -1888,6 +1889,16 @@ class AutoAugment(ImageAugment): ...@@ -1888,6 +1889,16 @@ class AutoAugment(ImageAugment):
] ]
return policy return policy
@staticmethod
def panoptic_deeplab_policy():
policy = [
[('Sharpness', 0.4, 1.4), ('Brightness', 0.2, 2.0)],
[('Equalize', 0.0, 1.8), ('Contrast', 0.2, 2.0)],
[('Sharpness', 0.2, 1.8), ('Color', 0.2, 1.8)],
[('Solarize', 0.2, 1.4), ('Equalize', 0.6, 1.8)],
[('Sharpness', 0.2, 0.2), ('Equalize', 0.2, 1.4)]]
return policy
@staticmethod @staticmethod
def policy_test(): def policy_test():
"""Autoaugment test policy for debugging.""" """Autoaugment test policy for debugging."""
...@@ -2025,7 +2036,7 @@ class RandAugment(ImageAugment): ...@@ -2025,7 +2036,7 @@ class RandAugment(ImageAugment):
aug_image, aug_bboxes = tf.switch_case( aug_image, aug_bboxes = tf.switch_case(
branch_index=op_to_select, branch_index=op_to_select,
branch_fns=branch_fns, branch_fns=branch_fns,
default=lambda: (tf.identity(image), _maybe_identity(bboxes))) default=lambda: (tf.identity(image), _maybe_identity(bboxes))) # pylint: disable=cell-var-from-loop
if self.prob_to_apply is not None: if self.prob_to_apply is not None:
aug_image, aug_bboxes = tf.cond( aug_image, aug_bboxes = tf.cond(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment