Unverified Commit c18fc1bb authored by srihari-humbarwadi's avatar srihari-humbarwadi
Browse files

added `PanopticFPNFusion`

parent d3998b07
......@@ -65,11 +65,13 @@ class SegmentationHead(hyperparams.Config):
use_depthwise_convolution: bool = False
prediction_kernel_size: int = 1
upsample_factor: int = 1
feature_fusion: Optional[str] = None # None, deeplabv3plus, or pyramid_fusion
feature_fusion: Optional[str] = None # None, deeplabv3plus, panoptic_fpn_fusion or pyramid_fusion
# deeplabv3plus feature fusion params
low_level: Union[int, str] = 2
low_level_num_filters: int = 48
# panoptic_fpn_fusion params
decoder_min_level: Optional[Union[int, str]] = None
decoder_max_level: Optional[Union[int, str]] = None
@dataclasses.dataclass
class SemanticSegmentationModel(hyperparams.Config):
......
......@@ -21,7 +21,7 @@ from official.vision.beta.modeling.layers import nn_layers
from official.vision.beta.ops import spatial_transform_ops
@tf.keras.utils.register_keras_serializable(package='Vision')
# @tf.keras.utils.register_keras_serializable(package='Vision')
class SegmentationHead(tf.keras.layers.Layer):
"""Creates a segmentation head."""
......@@ -35,8 +35,11 @@ class SegmentationHead(tf.keras.layers.Layer):
prediction_kernel_size: int = 1,
upsample_factor: int = 1,
feature_fusion: Optional[str] = None,
decoder_min_level: Optional[int] = None,
decoder_max_level: Optional[int] = None,
low_level: int = 2,
low_level_num_filters: int = 48,
num_decoder_filters: int = 256,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
......@@ -60,15 +63,24 @@ class SegmentationHead(tf.keras.layers.Layer):
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If
`deeplabv3plus`, features from decoder_features[level] will be fused
with low level feature maps from backbone. If `pyramid_fusion`,
multiscale features will be resized and fused at the target level.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level.
decoder_min_level: An `int` of minimum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
decoder_max_level: An `int` of maximum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
......@@ -91,14 +103,17 @@ class SegmentationHead(tf.keras.layers.Layer):
'prediction_kernel_size': prediction_kernel_size,
'upsample_factor': upsample_factor,
'feature_fusion': feature_fusion,
'decoder_min_level': decoder_min_level,
'decoder_max_level': decoder_max_level,
'low_level': low_level,
'low_level_num_filters': low_level_num_filters,
'num_decoder_filters': num_decoder_filters,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'bias_regularizer': bias_regularizer
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
......@@ -141,6 +156,17 @@ class SegmentationHead(tf.keras.layers.Layer):
self._dlv3p_norm = bn_op(
name='segmentation_head_deeplabv3p_fusion_norm', **bn_kwargs)
elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
self._panoptic_fpn_fusion = nn_layers.PanopticFPNFusion(
min_level=self._config_dict['decoder_min_level'],
max_level=self._config_dict['decoder_max_level'],
target_level=self._config_dict['level'],
num_filters=self._config_dict['num_filters'],
num_fpn_filters=self._config_dict['num_decoder_filters'],
activation=self._config_dict['activation'],
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
# Segmentation head layers.
self._convs = []
self._norms = []
......@@ -210,6 +236,8 @@ class SegmentationHead(tf.keras.layers.Layer):
elif self._config_dict['feature_fusion'] == 'pyramid_fusion':
x = nn_layers.pyramid_feature_fusion(decoder_output,
self._config_dict['level'])
elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
x = self._panoptic_fpn_fusion(decoder_output)
else:
x = decoder_output[str(self._config_dict['level'])]
......
......@@ -22,24 +22,41 @@ import tensorflow as tf
from official.vision.beta.modeling.heads import segmentation_heads
class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(2, 'pyramid_fusion'),
(3, 'pyramid_fusion'),
)
def test_forward(self, level, feature_fusion):
head = segmentation_heads.SegmentationHead(
num_classes=10, level=level, feature_fusion=feature_fusion)
(2, 'pyramid_fusion', None, None),
(3, 'pyramid_fusion', None, None),
(2, 'panoptic_fpn_fusion', 2, 5),
(2, 'panoptic_fpn_fusion', 2, 6),
(3, 'panoptic_fpn_fusion', 3, 5),
(3, 'panoptic_fpn_fusion', 3, 6))
def test_forward(self, level, feature_fusion,
decoder_min_level, decoder_max_level):
backbone_features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
'5': np.random.rand(2, 32, 32, 16),
}
decoder_features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
'3': np.random.rand(2, 128, 128, 64),
'4': np.random.rand(2, 64, 64, 64),
'5': np.random.rand(2, 32, 32, 64),
'6': np.random.rand(2, 16, 16, 64),
}
if feature_fusion == 'panoptic_fpn_fusion':
backbone_features['2'] = np.random.rand(2, 256, 256, 16)
decoder_features['2'] = np.random.rand(2, 256, 256, 64)
head = segmentation_heads.SegmentationHead(
num_classes=10,
level=level,
feature_fusion=feature_fusion,
decoder_min_level=decoder_min_level,
decoder_max_level=decoder_max_level,
num_decoder_filters=64)
logits = head(backbone_features, decoder_features)
if level in decoder_features:
......
......@@ -13,12 +13,14 @@
# limitations under the License.
"""Contains common building blocks for neural networks."""
from typing import Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
from absl import logging
import tensorflow as tf
import tensorflow_addons as tfa
from official.modeling import tf_utils
from official.vision.beta.ops import spatial_transform_ops
# Type annotations.
......@@ -308,6 +310,113 @@ def pyramid_feature_fusion(inputs, target_level):
return tf.math.add_n(resampled_feats)
class PanopticFPNFusion(tf.keras.Model):
"""Creates a Panoptic FPN feature Fusion layer.
This implements feature fusion for semantic segmentation head from the paper:
Alexander Kirillov, Ross Girshick, Kaiming He and Piotr Dollar.
Panoptic Feature Pyramid Networks.
(https://arxiv.org/pdf/1901.02446.pdf)
"""
def __init__(
self,
min_level: int = 2,
max_level: int = 5,
target_level: int = 2,
num_filters: int = 128,
num_fpn_filters: int = 256,
activation: str = 'relu',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes panoptic FPN feature fusion layer.
Args:
min_level: An `int` of minimum level to use in feature fusion.
max_level: An `int` of maximum level to use in feature fusion.
target_level: An `int` of the target feature level for feature fusion..
num_filters: An `int` number of filters in conv2d layers.
num_fpn_filters: An `int` number of filters in the FPN outputs
activation: A `str` name of the activation function.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
Returns:
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
if target_level > max_level:
raise ValueError('target_level should be less than max_level')
self._config_dict = {
'min_level': min_level,
'max_level': max_level,
'target_level': target_level,
'num_filters': num_filters,
'num_fpn_filters': num_fpn_filters,
'activation': activation,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
norm = tfa.layers.GroupNormalization
conv2d = tf.keras.layers.Conv2D
activation_fn = tf.keras.layers.Activation(
tf_utils.get_activation(activation))
if tf.keras.backend.image_data_format() == 'channels_last':
norm_axis = -1
else:
norm_axis = 1
inputs = self._build_inputs(num_fpn_filters, min_level, max_level)
upscaled_features = []
for level in range(min_level, max_level + 1):
num_conv_layers = max(1, level - target_level)
x = inputs[str(level)]
for i in range(num_conv_layers):
x = conv2d(
filters=num_filters,
kernel_size=3,
padding='same',
kernel_initializer=tf.keras.initializers.VarianceScaling(),
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(x)
x = norm(groups=32, axis=norm_axis)(x)
x = activation_fn(x)
if not level == target_level:
x = spatial_transform_ops.nearest_upsampling(x, scale=2)
upscaled_features.append(x)
fused_features = tf.math.add_n(upscaled_features)
self._output_specs = {str(target_level): fused_features.get_shape()}
super(PanopticFPNFusion, self).__init__(
inputs=inputs, outputs=fused_features, **kwargs)
def _build_inputs(self, num_filters: int,
min_level: int, max_level: int):
inputs = {}
for level in range(min_level, max_level + 1):
inputs[str(level)] = tf.keras.Input(shape=[None, None, num_filters])
return inputs
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@tf.keras.utils.register_keras_serializable(package='Vision')
class Scale(tf.keras.layers.Layer):
"""Scales the input by a trainable scalar weight.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment