Commit 4c8e8c82 authored by Fan Yang's avatar Fan Yang Committed by A. Unique TensorFlower
Browse files

Internal change to docstring.

PiperOrigin-RevId: 362334017
parent 3510aa1d
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""ASPP decoder.""" """Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
...@@ -22,7 +22,7 @@ from official.vision import keras_cv ...@@ -22,7 +22,7 @@ from official.vision import keras_cv
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class ASPP(tf.keras.layers.Layer): class ASPP(tf.keras.layers.Layer):
"""ASPP.""" """Creates an Atrous Spatial Pyramid Pooling (ASPP) layer."""
def __init__(self, def __init__(self,
level, level,
...@@ -38,26 +38,28 @@ class ASPP(tf.keras.layers.Layer): ...@@ -38,26 +38,28 @@ class ASPP(tf.keras.layers.Layer):
kernel_regularizer=None, kernel_regularizer=None,
interpolation='bilinear', interpolation='bilinear',
**kwargs): **kwargs):
"""ASPP initialization function. """Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
Args: Args:
level: `int` level to apply ASPP. level: An `int` level to apply ASPP.
dilation_rates: `list` of dilation rates. dilation_rates: A `list` of dilation rates.
num_filters: `int` number of output filters in ASPP. num_filters: An `int` number of output filters in ASPP.
pool_kernel_size: `list` of [height, width] of pooling kernel size or pool_kernel_size: A `list` of [height, width] of pooling kernel size or
None. Pooling size is with respect to original image size, it will be None. Pooling size is with respect to original image size, it will be
scaled down by 2**level. If None, global average pooling is used. scaled down by 2**level. If None, global average pooling is used.
use_sync_bn: if True, use synchronized batch normalization. use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average. norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by norm_epsilon: A `float` added to variance to avoid dividing by zero.
zero. activation: A `str` activation to be used in ASPP.
activation: `str` activation to be used in ASPP. dropout_rate: A `float` rate for dropout regularization.
dropout_rate: `float` rate for dropout regularization. kernel_initializer: A `str` name of kernel_initializer for convolutional
kernel_initializer: kernel_initializer for convolutional layers. layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
interpolation: interpolation method, one of bilinear, nearest, bicubic, Conv2D. Default is None.
area, lanczos3, lanczos5, gaussian, or mitchellcubic. interpolation: A `str` of interpolation method. It should be one of
**kwargs: keyword arguments to be passed. `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
`gaussian`, or `mitchellcubic`.
**kwargs: Additional keyword arguments to be passed.
""" """
super(ASPP, self).__init__(**kwargs) super(ASPP, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
...@@ -96,20 +98,22 @@ class ASPP(tf.keras.layers.Layer): ...@@ -96,20 +98,22 @@ class ASPP(tf.keras.layers.Layer):
interpolation=self._config_dict['interpolation']) interpolation=self._config_dict['interpolation'])
def call(self, inputs): def call(self, inputs):
"""ASPP call method. """Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
The output of ASPP will be a dict of level, Tensor even if only one The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
level is present. Hence, this will be compatible with the rest of the level is present. Hence, this will be compatible with the rest of the
segmentation model interfaces.. segmentation model interfaces.
Args: Args:
inputs: A dict of tensors inputs: A `dict` of `tf.Tensor` where
- key: `str`, the level of the multilevel feature maps. - key: A `str` of the level of the multilevel feature maps.
- values: `Tensor`, [batch, height_l, width_l, filter_size]. - values: A `tf.Tensor` of shape [batch, height_l, width_l,
filter_size].
Returns: Returns:
A dict of tensors A `dict` of `tf.Tensor` where
- key: `str`, the level of the multilevel feature maps. - key: A `str` of the level of the multilevel feature maps.
- values: `Tensor`, output of ASPP module. - values: A `tf.Tensor` of output of ASPP module.
""" """
outputs = {} outputs = {}
level = str(self._config_dict['level']) level = str(self._config_dict['level'])
......
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""factory method.""" """Contains the factory method to create decoders."""
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
...@@ -26,13 +27,14 @@ def build_decoder(input_specs, ...@@ -26,13 +27,14 @@ def build_decoder(input_specs,
"""Builds decoder from a config. """Builds decoder from a config.
Args: Args:
input_specs: `dict` input specifications. A dictionary consists of input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. {level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config. model_config: A OneOfConfig. Model config.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None. l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns: Returns:
tf.keras.Model instance of the decoder. A `tf.keras.Model` instance of the decoder.
""" """
decoder_type = model_config.decoder.type decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get() decoder_cfg = model_config.decoder.get()
......
...@@ -12,13 +12,7 @@ ...@@ -12,13 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Feature Pyramid Networks. """Contains the definitions of Feature Pyramid Networks (FPN)."""
Feature Pyramid Networks were proposed in:
[1] Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan,
, and Serge Belongie
Feature Pyramid Networks for Object Detection. CVPR 2017.
"""
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
...@@ -29,7 +23,14 @@ from official.vision.beta.ops import spatial_transform_ops ...@@ -29,7 +23,14 @@ from official.vision.beta.ops import spatial_transform_ops
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class FPN(tf.keras.Model): class FPN(tf.keras.Model):
"""Feature pyramid network.""" """Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def __init__(self, def __init__(self,
input_specs, input_specs,
...@@ -45,25 +46,26 @@ class FPN(tf.keras.Model): ...@@ -45,25 +46,26 @@ class FPN(tf.keras.Model):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""FPN initialization function. """Initializes a Feature Pyramid Network (FPN).
Args: Args:
input_specs: `dict` input specifications. A dictionary consists of input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. {level: TensorShape} from a backbone.
min_level: `int` minimum level in FPN output feature maps. min_level: An `int` of minimum level in FPN output feature maps.
max_level: `int` maximum level in FPN output feature maps. max_level: An `int` of maximum level in FPN output feature maps.
num_filters: `int` number of filters in FPN layers. num_filters: An `int` number of filters in FPN layers.
use_separable_conv: `bool`, if True use separable convolution for use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers. convolution in FPN layers.
activation: `str` name of the activation function. activation: A `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization. use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average. norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by norm_epsilon: A `float` added to variance to avoid dividing by zero.
zero. kernel_initializer: A `str` name of kernel_initializer for convolutional
kernel_initializer: kernel_initializer for convolutional layers. layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. Conv2D. Default is None.
**kwargs: keyword arguments to be passed. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
""" """
self._config_dict = { self._config_dict = {
'input_specs': input_specs, 'input_specs': input_specs,
......
...@@ -12,12 +12,7 @@ ...@@ -12,12 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""NAS-FPN. """Contains definitions of NAS-FPN."""
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
https://arxiv.org/abs/1904.07392. CVPR 2019.
"""
# Import libraries # Import libraries
from absl import logging from absl import logging
...@@ -60,7 +55,13 @@ def build_block_specs(block_specs=None): ...@@ -60,7 +55,13 @@ def build_block_specs(block_specs=None):
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class NASFPN(tf.keras.Model): class NASFPN(tf.keras.Model):
"""NAS-FPN.""" """Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def __init__(self, def __init__(self,
input_specs, input_specs,
...@@ -78,29 +79,30 @@ class NASFPN(tf.keras.Model): ...@@ -78,29 +79,30 @@ class NASFPN(tf.keras.Model):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""FPN initialization function. """Initializes a NAS-FPN model.
Args: Args:
input_specs: `dict` input specifications. A dictionary consists of input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. {level: TensorShape} from a backbone.
min_level: `int` minimum level in FPN output feature maps. min_level: An `int` of minimum level in FPN output feature maps.
max_level: `int` maximum level in FPN output feature maps. max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is network topology. By default, the previously discovered architecture is
used. used.
num_filters: `int` number of filters in FPN layers. num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network. num_repeats: number of repeats for feature pyramid network.
use_separable_conv: `bool`, if True use separable convolution for use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers. convolution in FPN layers.
activation: `str` name of the activation function. activation: A `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization. use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average. norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by norm_epsilon: A `float` added to variance to avoid dividing by zero.
zero. kernel_initializer: A `str` name of kernel_initializer for convolutional
kernel_initializer: kernel_initializer for convolutional layers. layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. Conv2D. Default is None.
**kwargs: keyword arguments to be passed. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
""" """
self._config_dict = { self._config_dict = {
'input_specs': input_specs, 'input_specs': input_specs,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Dense prediction heads.""" """Contains definitions of dense prediction heads."""
# Import libraries # Import libraries
import numpy as np import numpy as np
...@@ -23,7 +23,7 @@ from official.modeling import tf_utils ...@@ -23,7 +23,7 @@ from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class RetinaNetHead(tf.keras.layers.Layer): class RetinaNetHead(tf.keras.layers.Layer):
"""RetinaNet head.""" """Creates a RetinaNet head."""
def __init__(self, def __init__(self,
min_level, min_level,
...@@ -40,31 +40,30 @@ class RetinaNetHead(tf.keras.layers.Layer): ...@@ -40,31 +40,30 @@ class RetinaNetHead(tf.keras.layers.Layer):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""Initialize params to build RetinaNet head. """Initializes a RetinaNet head.
Args: Args:
min_level: `int` number of minimum feature level. min_level: An `int` number of minimum feature level.
max_level: `int` number of maximum feature level. max_level: An `int` number of maximum feature level.
num_classes: `int` number of classes to predict. num_classes: An `int` number of classes to predict.
num_anchors_per_location: `int` number of number of anchors per pixel num_anchors_per_location: An `int` number of number of anchors per pixel
location. location.
num_convs: `int` number that represents the number of the intermediate num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction. conv layers before the prediction.
num_filters: `int` number that represents the number of filters of the num_filters: An `int` number that represents the number of filters of the
intermediate conv layers. intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: A `bool` that indicates whether the separable
is used. convolution layers is used.
activation: `string`, indicating which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc. 'swish', etc.
use_sync_bn: `bool`, whether to use synchronized batch normalization use_sync_bn: A `bool` that indicates whether to use synchronized batch
across different replicas. normalization across different replicas.
norm_momentum: `float`, the momentum parameter of the normalization norm_momentum: A `float` of normalization momentum for the moving average.
layers. norm_epsilon: A `float` added to variance to avoid dividing by zero.
norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer Conv2D. Default is None.
kernal. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. **kwargs: Additional keyword arguments to be passed.
**kwargs: other keyword arguments passed to Layer.
""" """
super(RetinaNetHead, self).__init__(**kwargs) super(RetinaNetHead, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
...@@ -209,21 +208,22 @@ class RetinaNetHead(tf.keras.layers.Layer): ...@@ -209,21 +208,22 @@ class RetinaNetHead(tf.keras.layers.Layer):
"""Forward pass of the RetinaNet head. """Forward pass of the RetinaNet head.
Args: Args:
features: a dict of tensors features: A `dict` of `tf.Tensor` where
- key: `str`, the level of the multilevel features. - key: A `str` of the level of the multilevel features.
- values: `Tensor`, the feature map tensors, whose shape is - values: A `tf.Tensor`, the feature map tensors, whose shape is
[batch, height_l, width_l, channels]. [batch, height_l, width_l, channels].
Returns: Returns:
scores: a dict of tensors which includes scores of the predictions. scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: `str`, the level of the multilevel predictions. - key: A `str` of the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature - values: A `tf.Tensor` of the box scores predicted from a particular
level, whose shape is feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location]. [batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: a dict of tensors which includes coordinates of the predictions. boxes: A `dict` of `tf.Tensor` which includes coordinates of the
- key: `str`, the level of the multilevel predictions. predictions.
- values: `Tensor`, the box scores predicted from a particular feature - key: A `str` of the level of the multilevel predictions.
level, whose shape is - values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location]. [batch, height_l, width_l, 4 * num_anchors_per_location].
""" """
scores = {} scores = {}
...@@ -260,7 +260,7 @@ class RetinaNetHead(tf.keras.layers.Layer): ...@@ -260,7 +260,7 @@ class RetinaNetHead(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class RPNHead(tf.keras.layers.Layer): class RPNHead(tf.keras.layers.Layer):
"""Region Proposal Network head.""" """Creates a Region Proposal Network (RPN) head."""
def __init__(self, def __init__(self,
min_level, min_level,
...@@ -276,29 +276,29 @@ class RPNHead(tf.keras.layers.Layer): ...@@ -276,29 +276,29 @@ class RPNHead(tf.keras.layers.Layer):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""Initialize params to build Region Proposal Network head. """Initializes a Region Proposal Network head.
Args: Args:
min_level: `int` number of minimum feature level. min_level: An `int` number of minimum feature level.
max_level: `int` number of maximum feature level. max_level: An `int` number of maximum feature level.
num_anchors_per_location: `int` number of number of anchors per pixel num_anchors_per_location: An `int` number of number of anchors per pixel
location. location.
num_convs: `int` number that represents the number of the intermediate num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction. convolution layers before the prediction.
num_filters: `int` number that represents the number of filters of the num_filters: An `int` number that represents the number of filters of the
intermediate conv layers. intermediate convolution layers.
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: A `bool` that indicates whether the separable
is used. convolution layers is used.
activation: `string`, indicating which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc. 'swish', etc.
use_sync_bn: `bool`, whether to use synchronized batch normalization use_sync_bn: A `bool` that indicates whether to use synchronized batch
across different replicas. normalization across different replicas.
norm_momentum: `float`, the momentum parameter of the normalizaton layers. norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: `float`, the epsilon parameter of the normalization layers. norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
kernel. Conv2D. Default is None.
bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: other keyword arguments passed to Layer. **kwargs: Additional keyword arguments to be passed.
""" """
super(RPNHead, self).__init__(**kwargs) super(RPNHead, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
...@@ -428,6 +428,27 @@ class RPNHead(tf.keras.layers.Layer): ...@@ -428,6 +428,27 @@ class RPNHead(tf.keras.layers.Layer):
super(RPNHead, self).build(input_shape) super(RPNHead, self).build(input_shape)
def call(self, features): def call(self, features):
"""Forward pass of the RPN head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
scores = {} scores = {}
boxes = {} boxes = {}
for i, level in enumerate( for i, level in enumerate(
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Instance prediction heads.""" """Contains definitions of instance prediction heads."""
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
...@@ -22,7 +22,7 @@ from official.modeling import tf_utils ...@@ -22,7 +22,7 @@ from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class DetectionHead(tf.keras.layers.Layer): class DetectionHead(tf.keras.layers.Layer):
"""Detection head.""" """Creates a detection head."""
def __init__(self, def __init__(self,
num_classes, num_classes,
...@@ -38,31 +38,30 @@ class DetectionHead(tf.keras.layers.Layer): ...@@ -38,31 +38,30 @@ class DetectionHead(tf.keras.layers.Layer):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""Initialize params to build the detection head. """Initializes a detection head.
Args: Args:
num_classes: a integer for the number of classes. num_classes: An `int` for the number of classes.
num_convs: `int` number that represents the number of the intermediate num_convs: An `int` number that represents the number of the intermediate
conv layers before the FC layers. convolution layers before the FC layers.
num_filters: `int` number that represents the number of filters of the num_filters: An `int` number that represents the number of filters of the
intermediate conv layers. intermediate convolution layers.
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: A `bool` that indicates whether the separable
is used. convolution layers is used.
num_fcs: `int` number that represents the number of FC layers before the num_fcs: An `int` number that represents the number of FC layers before
predictions. the predictions.
fc_dims: `int` number that represents the number of dimension of the FC fc_dims: An `int` number that represents the number of dimension of the FC
layers. layers.
activation: `string`, indicating which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc. 'swish', etc.
use_sync_bn: `bool`, whether to use synchronized batch normalization use_sync_bn: A `bool` that indicates whether to use synchronized batch
across different replicas. normalization across different replicas.
norm_momentum: `float`, the momentum parameter of the normalization norm_momentum: A `float` of normalization momentum for the moving average.
layers. norm_epsilon: A `float` added to variance to avoid dividing by zero.
norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer Conv2D. Default is None.
kernel. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. **kwargs: Additional keyword arguments to be passed.
**kwargs: other keyword arguments passed to Layer.
""" """
super(DetectionHead, self).__init__(**kwargs) super(DetectionHead, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
...@@ -165,18 +164,17 @@ class DetectionHead(tf.keras.layers.Layer): ...@@ -165,18 +164,17 @@ class DetectionHead(tf.keras.layers.Layer):
super(DetectionHead, self).build(input_shape) super(DetectionHead, self).build(input_shape)
def call(self, inputs, training=None): def call(self, inputs, training=None):
"""Box and class branches for the Mask-RCNN model. """Forward pass of box and class branches for the Mask-RCNN model.
Args: Args:
inputs: ROI features, a tensor of shape inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height,
[batch_size, num_instances, roi_height, roi_width, roi_channels], roi_width, roi_channels], representing the ROI features.
representing the ROI features. training: a `bool` indicating whether it is in `training` mode.
training: a boolean indicating whether it is in `training` mode.
Returns: Returns:
class_outputs: a tensor with a shape of class_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes], representing the class predictions. [batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of box_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes * 4], representing the box [batch_size, num_rois, num_classes * 4], representing the box
predictions. predictions.
""" """
...@@ -211,7 +209,7 @@ class DetectionHead(tf.keras.layers.Layer): ...@@ -211,7 +209,7 @@ class DetectionHead(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class MaskHead(tf.keras.layers.Layer): class MaskHead(tf.keras.layers.Layer):
"""Mask head.""" """Creates a mask head."""
def __init__(self, def __init__(self,
num_classes, num_classes,
...@@ -227,31 +225,30 @@ class MaskHead(tf.keras.layers.Layer): ...@@ -227,31 +225,30 @@ class MaskHead(tf.keras.layers.Layer):
bias_regularizer=None, bias_regularizer=None,
class_agnostic=False, class_agnostic=False,
**kwargs): **kwargs):
"""Initialize params to build the mask head. """Initializes a mask head.
Args: Args:
num_classes: `int`, the number of classes. num_classes: An `int` of the number of classes.
upsample_factor: `int`, >= 1, the upsample factor to generate the upsample_factor: An `int` that indicates the upsample factor to generate
final predicted masks. the final predicted masks. It should be >= 1.
num_convs: `int` number that represents the number of the intermediate num_convs: An `int` number that represents the number of the intermediate
conv layers before the mask prediction layers. convolution layers before the mask prediction layers.
num_filters: `int` number that represents the number of filters of the num_filters: An `int` number that represents the number of filters of the
intermediate conv layers. intermediate convolution layers.
use_separable_conv: `bool`, indicating whether the separable conv layers use_separable_conv: A `bool` that indicates whether the separable
is used. convolution layers is used.
activation: `string`, indicating which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc. 'swish', etc.
use_sync_bn: `bool`, whether to use synchronized batch normalization use_sync_bn: A `bool` that indicates whether to use synchronized batch
across different replicas. normalization across different replicas.
norm_momentum: `float`, the momentum parameter of the normalization norm_momentum: A `float` of normalization momentum for the moving average.
layers. norm_epsilon: A `float` added to variance to avoid dividing by zero.
norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer Conv2D. Default is None.
kernel. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. class_agnostic: A `bool`. If set, we use a single channel mask head that
class_agnostic: `bool`, if set, we use a single channel mask head that
is shared between all classes. is shared between all classes.
**kwargs: other keyword arguments passed to Layer. **kwargs: Additional keyword arguments to be passed.
""" """
super(MaskHead, self).__init__(**kwargs) super(MaskHead, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
...@@ -368,19 +365,18 @@ class MaskHead(tf.keras.layers.Layer): ...@@ -368,19 +365,18 @@ class MaskHead(tf.keras.layers.Layer):
super(MaskHead, self).build(input_shape) super(MaskHead, self).build(input_shape)
def call(self, inputs, training=None): def call(self, inputs, training=None):
"""Mask branch for the Mask-RCNN model. """Forward pass of mask branch for the Mask-RCNN model.
Args: Args:
inputs: a list of two tensors inputs: A `list` of two tensors where
inputs[0]: ROI features, a tensor of shape inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances,
[batch_size, num_instances, roi_height, roi_width, roi_channels], roi_height, roi_width, roi_channels], representing the ROI features.
representing the ROI features. inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances],
inputs[1]: ROI classes, a tensor of shape representing the classes of the ROIs.
[batch_size, num_instances], representing the classes of the ROIs. training: A `bool` indicating whether it is in `training` mode.
training: a boolean indicating whether it is in `training` mode.
Returns: Returns:
mask_outputs: a tensor of shape mask_outputs: A `tf.Tensor` of shape
[batch_size, num_instances, roi_height * upsample_factor, [batch_size, num_instances, roi_height * upsample_factor,
roi_width * upsample_factor], representing the mask predictions. roi_width * upsample_factor], representing the mask predictions.
""" """
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Segmentation heads.""" """Contains definitions of segmentation heads."""
import tensorflow as tf import tensorflow as tf
...@@ -23,7 +23,7 @@ from official.vision.beta.ops import spatial_transform_ops ...@@ -23,7 +23,7 @@ from official.vision.beta.ops import spatial_transform_ops
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class SegmentationHead(tf.keras.layers.Layer): class SegmentationHead(tf.keras.layers.Layer):
"""Segmentation head.""" """Creates a segmentation head."""
def __init__(self, def __init__(self,
num_classes, num_classes,
...@@ -41,38 +41,37 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -41,38 +41,37 @@ class SegmentationHead(tf.keras.layers.Layer):
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
"""Initialize params to build segmentation head. """Initializes a segmentation head.
Args: Args:
num_classes: `int` number of mask classification categories. The number of num_classes: An `int` number of mask classification categories. The number
classes does not include background class. of classes does not include background class.
level: `int` or `str`, level to use to build segmentation head. level: An `int` or `str`, level to use to build segmentation head.
num_convs: `int` number of stacked convolution before the last prediction num_convs: An `int` number of stacked convolution before the last
layer. prediction layer.
num_filters: `int` number to specify the number of filters used. num_filters: An `int` number to specify the number of filters used.
Default is 256. Default is 256.
upsample_factor: `int` number to specify the upsampling factor to generate upsample_factor: An `int` number to specify the upsampling factor to
finer mask. Default 1 means no upsampling is applied. generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If
`deeplabv3plus`, features from decoder_features[level] will be fused `deeplabv3plus`, features from decoder_features[level] will be fused
with low level feature maps from backbone. If `pyramid_fusion`, with low level feature maps from backbone. If `pyramid_fusion`,
multiscale features will be resized and fused at the target level. multiscale features will be resized and fused at the target level.
low_level: `int`, backbone level to be used for feature fusion. This arg low_level: An `int` of backbone level to be used for feature fusion. It is
is used when feature_fusion is set to deeplabv3plus. used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: `int`, reduced number of filters for the low low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. This args is level features before fusing it with higher level features. It is only
only used when feature_fusion is set to deeplabv3plus. used when feature_fusion is set to `deeplabv3plus`.
activation: `string`, indicating which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc. 'swish', etc.
use_sync_bn: `bool`, whether to use synchronized batch normalization use_sync_bn: A `bool` that indicates whether to use synchronized batch
across different replicas. normalization across different replicas.
norm_momentum: `float`, the momentum parameter of the normalization norm_momentum: A `float` of normalization momentum for the moving average.
layers. norm_epsilon: A `float` added to variance to avoid dividing by zero.
norm_epsilon: `float`, the epsilon parameter of the normalization layers. kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
kernel_regularizer: `tf.keras.regularizers.Regularizer` object for layer Conv2D. Default is None.
kernel. bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
bias_regularizer: `tf.keras.regularizers.Regularizer` object for bias. **kwargs: Additional keyword arguments to be passed.
**kwargs: other keyword arguments passed to Layer.
""" """
super(SegmentationHead, self).__init__(**kwargs) super(SegmentationHead, self).__init__(**kwargs)
...@@ -160,17 +159,17 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -160,17 +159,17 @@ class SegmentationHead(tf.keras.layers.Layer):
"""Forward pass of the segmentation head. """Forward pass of the segmentation head.
Args: Args:
backbone_output: a dict of tensors backbone_output: A `dict` of tensors
- key: `str`, the level of the multilevel features. - key: A `str` of the level of the multilevel features.
- values: `Tensor`, the feature map tensors, whose shape is - values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels]. [batch, height_l, width_l, channels].
decoder_output: a dict of tensors decoder_output: A `dict` of tensors
- key: `str`, the level of the multilevel features. - key: A `str` of the level of the multilevel features.
- values: `Tensor`, the feature map tensors, whose shape is - values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels]. [batch, height_l, width_l, channels].
Returns: Returns:
segmentation prediction mask: `Tensor`, the segmentation mask scores segmentation prediction mask: A `tf.Tensor` of the segmentation mask
predicted from input feature. scores predicted from input features.
""" """
if self._config_dict['feature_fusion'] == 'deeplabv3plus': if self._config_dict['feature_fusion'] == 'deeplabv3plus':
# deeplabv3+ feature fusion # deeplabv3+ feature fusion
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment