Merge branch 'master' of https://github.com/tensorflow/models

78c43ef1 · Gunho Park · 67cfc95b · e3c7e300 · 78c43ef1 · 78c43ef1
Commit 78c43ef1 authored Jul 26, 2021 by Gunho Park
20 changed files
--- a/official/vision/beta/modeling/backbones/spinenet.py
+++ b/official/vision/beta/modeling/backbones/spinenet.py
@@ -393,8 +393,10 @@ class SpineNet(tf.keras.Model):
              block_spec.level))
        if (block_spec.level < self._min_level or
            block_spec.level > self._max_level):
-          raise ValueError('Output level is out of range [{}, {}]'.format(
+          logging.warning(
-              self._min_level, self._max_level))
+              'SpineNet output level out of range [min_level, max_level] = '
+              '[%s, %s] will not be used for further processing.',
+              self._min_level, self._max_level)
        endpoints[str(block_spec.level)] = x
    return endpoints

--- a/official/vision/beta/modeling/backbones/spinenet_mobile.py
+++ b/official/vision/beta/modeling/backbones/spinenet_mobile.py
@@ -152,6 +152,7 @@ class SpineNetMobile(tf.keras.Model):
      use_sync_bn: bool = False,
      norm_momentum: float = 0.99,
      norm_epsilon: float = 0.001,
+      use_keras_upsampling_2d: bool = False,
      **kwargs):
    """Initializes a Mobile SpineNet model.
@@ -181,6 +182,7 @@ class SpineNetMobile(tf.keras.Model):
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
+      use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
      **kwargs: Additional keyword arguments to be passed.
    """
    self._input_specs = input_specs
@@ -200,12 +202,7 @@ class SpineNetMobile(tf.keras.Model):
    self._use_sync_bn = use_sync_bn
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
-    if activation == 'relu':
+    self._use_keras_upsampling_2d = use_keras_upsampling_2d
-      self._activation_fn = tf.nn.relu
-    elif activation == 'swish':
-      self._activation_fn = tf.nn.swish
-    else:
-      raise ValueError('Activation {} not implemented.'.format(activation))
    self._num_init_blocks = 2
    if use_sync_bn:
@@ -271,7 +268,7 @@ class SpineNetMobile(tf.keras.Model):
          norm_momentum=self._norm_momentum,
          norm_epsilon=self._norm_epsilon)(
              inputs)
-    return tf.identity(x, name=name)
+    return tf.keras.layers.Activation('linear', name=name)(x)
  def _build_stem(self, inputs):
    """Builds SpineNet stem."""
@@ -290,7 +287,7 @@ class SpineNetMobile(tf.keras.Model):
        momentum=self._norm_momentum,
        epsilon=self._norm_epsilon)(
            x)
-    x = tf_utils.get_activation(self._activation_fn)(x)
+    x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
    net = []
    stem_strides = [1, 2]
@@ -365,14 +362,15 @@ class SpineNetMobile(tf.keras.Model):
        parent_weights = [
            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
                i, j)), dtype=dtype)) for j in range(len(parents))]
-        weights_sum = tf.add_n(parent_weights)
+        weights_sum = layers.Add()(parent_weights)
        parents = [
            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
            for i in range(len(parents))
        ]
      # Fuse all parent nodes then build a new block.
-      x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
+      x = tf_utils.get_activation(
+          self._activation, use_keras_layer=True)(layers.Add()(parents))
      x = self._block_group(
          inputs=x,
          in_filters=target_num_filters,
@@ -421,7 +419,7 @@ class SpineNetMobile(tf.keras.Model):
          momentum=self._norm_momentum,
          epsilon=self._norm_epsilon)(
              x)
-      x = tf_utils.get_activation(self._activation_fn)(x)
+      x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
      endpoints[str(level)] = x
    return endpoints
@@ -446,11 +444,13 @@ class SpineNetMobile(tf.keras.Model):
            momentum=self._norm_momentum,
            epsilon=self._norm_epsilon)(
                x)
-        x = tf_utils.get_activation(self._activation_fn)(x)
+        x = tf_utils.get_activation(
+            self._activation, use_keras_layer=True)(x)
        input_width /= 2
    elif input_width < target_width:
      scale = target_width // input_width
-      x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)
    # Last 1x1 conv to match filter size.
    x = layers.Conv2D(
@@ -485,7 +485,8 @@ class SpineNetMobile(tf.keras.Model):
        'activation': self._activation,
        'use_sync_bn': self._use_sync_bn,
        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon
+        'norm_epsilon': self._norm_epsilon,
+        'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
    }
    return config_dict
@@ -531,4 +532,5 @@ def build_spinenet_mobile(
      activation=norm_activation_config.activation,
      use_sync_bn=norm_activation_config.use_sync_bn,
      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon)
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)
--- a/official/vision/beta/modeling/backbones/spinenet_mobile_test.py
+++ b/official/vision/beta/modeling/backbones/spinenet_mobile_test.py
@@ -90,6 +90,7 @@ class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
        kernel_initializer='VarianceScaling',
        kernel_regularizer=None,
        bias_regularizer=None,
+        use_keras_upsampling_2d=False,
    )
    network = spinenet_mobile.SpineNetMobile(**kwargs)

--- a/official/vision/beta/modeling/backbones/spinenet_test.py
+++ b/official/vision/beta/modeling/backbones/spinenet_test.py
@@ -24,17 +24,16 @@ from official.vision.beta.modeling.backbones import spinenet
 class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
  @parameterized.parameters(
-      (128, 0.65, 1, 0.5, 128),
+      (128, 0.65, 1, 0.5, 128, 4, 6),
-      (256, 1.0, 1, 0.5, 256),
+      (256, 1.0, 1, 0.5, 256, 3, 6),
-      (384, 1.0, 2, 0.5, 256),
+      (384, 1.0, 2, 0.5, 256, 4, 7),
-      (512, 1.0, 3, 1.0, 256),
+      (512, 1.0, 3, 1.0, 256, 3, 7),
-      (640, 1.3, 4, 1.0, 384),
+      (640, 1.3, 4, 1.0, 384, 3, 7),
  )
  def test_network_creation(self, input_size, filter_size_scale, block_repeats,
-                            resample_alpha, endpoints_num_filters):
+                            resample_alpha, endpoints_num_filters, min_level,
+                            max_level):
    """Test creation of SpineNet models."""
-    min_level = 3
-    max_level = 7
    tf.keras.backend.set_image_data_format('channels_last')

--- a/official/vision/beta/modeling/decoders/aspp.py
+++ b/official/vision/beta/modeling/decoders/aspp.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 """Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
-from typing import Any, List, Optional, Mapping
+from typing import Any, List, Mapping, Optional
 # Import libraries
 import tensorflow as tf
+from official.modeling import hyperparams
 from official.vision import keras_cv
+from official.vision.beta.modeling.decoders import factory
 @tf.keras.utils.register_keras_serializable(package='Vision')
@@ -128,3 +131,46 @@ class ASPP(tf.keras.layers.Layer):
  @classmethod
  def from_config(cls, config, custom_objects=None):
    return cls(**config)
+@factory.register_decoder_builder('aspp')
+def build_aspp_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds ASPP decoder from a config.
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone. Note this is for consistent
+        interface, and is not used by ASPP decoder.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+  Returns:
+    A `tf.keras.Model` instance of the ASPP decoder.
+  Raises:
+    ValueError: If the model_config.decoder.type is not `aspp`.
+  """
+  del input_specs  # input_specs is not used by ASPP decoder.
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'aspp':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `aspp`.')
+  norm_activation_config = model_config.norm_activation
+  return ASPP(
+      level=decoder_cfg.level,
+      dilation_rates=decoder_cfg.dilation_rates,
+      num_filters=decoder_cfg.num_filters,
+      pool_kernel_size=decoder_cfg.pool_kernel_size,
+      dropout_rate=decoder_cfg.dropout_rate,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      activation=norm_activation_config.activation,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/beta/modeling/decoders/factory.py
+++ b/official/vision/beta/modeling/decoders/factory.py
@@ -12,80 +12,124 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Lint as: python3
+"""Decoder registers and factory method.
-"""Contains the factory method to create decoders."""
-from typing import Mapping, Optional
+One can register a new decoder model by the following two steps:
+1 Import the factory and register the build in the decoder file.
+2 Import the decoder class and add a build in __init__.py.
+```
+# my_decoder.py
+from modeling.decoders import factory
+class MyDecoder():
+  ...
+@factory.register_decoder_builder('my_decoder')
+def build_my_decoder():
+  return MyDecoder()
+# decoders/__init__.py adds import
+from modeling.decoders.my_decoder import MyDecoder
+```
+If one wants the MyDecoder class to be used only by those binary
+then don't imported the decoder module in decoders/__init__.py, but import it
+in place that uses it.
+"""
+from typing import Any, Callable, Mapping, Optional, Union
 # Import libraries
 import tensorflow as tf
+from official.core import registry
 from official.modeling import hyperparams
-from official.vision.beta.modeling import decoders
+_REGISTERED_DECODER_CLS = {}
+def register_decoder_builder(key: str) -> Callable[..., Any]:
+  """Decorates a builder of decoder class.
+  The builder should be a Callable (a class or a function).
+  This decorator supports registration of decoder builder as follows:
+  ```
+  class MyDecoder(tf.keras.Model):
+    pass
+  @register_decoder_builder('mydecoder')
+  def builder(input_specs, config, l2_reg):
+    return MyDecoder(...)
+  # Builds a MyDecoder object.
+  my_decoder = build_decoder_3d(input_specs, config, l2_reg)
+  ```
+  Args:
+    key: A `str` of key to look up the builder.
+  Returns:
+    A callable for using as class decorator that registers the decorated class
+    for creation from an instance of task_config_cls.
+  """
+  return registry.register(_REGISTERED_DECODER_CLS, key)
+@register_decoder_builder('identity')
+def build_identity(
+    input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
+    model_config: Optional[hyperparams.Config] = None,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
+  """Builds identity decoder from a config.
+  All the input arguments are not used by identity decoder but kept here to
+  ensure the interface is consistent.
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A `OneOfConfig` of model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
+      None.
+  Returns:
+    An instance of the identity decoder.
+  """
+  del input_specs, model_config, l2_regularizer  # Unused by identity decoder.
 def build_decoder(
    input_specs: Mapping[str, tf.TensorShape],
    model_config: hyperparams.Config,
-    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+    l2_regularizer: tf.keras.regularizers.Regularizer = None,
-) -> tf.keras.Model:
+    **kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:
  """Builds decoder from a config.
+  A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
+  None, the decoder will take features from the backbone as input and generate
+  decoded feature maps. If it is None, such as an identity decoder, the decoder
+  is skipped and features from the backbone are regarded as model output.
  Args:
    input_specs: A `dict` of input specifications. A dictionary consists of
      {level: TensorShape} from a backbone.
-    model_config: A OneOfConfig. Model config.
+    model_config: A `OneOfConfig` of model config.
-    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
      None.
+    **kwargs: Additional keyword args to be passed to decoder builder.
  Returns:
-    A `tf.keras.Model` instance of the decoder.
+    An instance of the decoder.
  """
-  decoder_type = model_config.decoder.type
+  decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
-  decoder_cfg = model_config.decoder.get()
+                                    model_config.decoder.type)
-  norm_activation_config = model_config.norm_activation
+  return decoder_builder(
-  if decoder_type == 'identity':
+      input_specs=input_specs,
-    decoder = None
+      model_config=model_config,
-  elif decoder_type == 'fpn':
+      l2_regularizer=l2_regularizer,
-    decoder = decoders.FPN(
+      **kwargs)
-        input_specs=input_specs,
-        min_level=model_config.min_level,
-        max_level=model_config.max_level,
-        num_filters=decoder_cfg.num_filters,
-        use_separable_conv=decoder_cfg.use_separable_conv,
-        activation=norm_activation_config.activation,
-        use_sync_bn=norm_activation_config.use_sync_bn,
-        norm_momentum=norm_activation_config.norm_momentum,
-        norm_epsilon=norm_activation_config.norm_epsilon,
-        kernel_regularizer=l2_regularizer)
-  elif decoder_type == 'nasfpn':
-    decoder = decoders.NASFPN(
-        input_specs=input_specs,
-        min_level=model_config.min_level,
-        max_level=model_config.max_level,
-        num_filters=decoder_cfg.num_filters,
-        num_repeats=decoder_cfg.num_repeats,
-        use_separable_conv=decoder_cfg.use_separable_conv,
-        activation=norm_activation_config.activation,
-        use_sync_bn=norm_activation_config.use_sync_bn,
-        norm_momentum=norm_activation_config.norm_momentum,
-        norm_epsilon=norm_activation_config.norm_epsilon,
-        kernel_regularizer=l2_regularizer)
-  elif decoder_type == 'aspp':
-    decoder = decoders.ASPP(
-        level=decoder_cfg.level,
-        dilation_rates=decoder_cfg.dilation_rates,
-        num_filters=decoder_cfg.num_filters,
-        pool_kernel_size=decoder_cfg.pool_kernel_size,
-        dropout_rate=decoder_cfg.dropout_rate,
-        use_sync_bn=norm_activation_config.use_sync_bn,
-        norm_momentum=norm_activation_config.norm_momentum,
-        norm_epsilon=norm_activation_config.norm_epsilon,
-        activation=norm_activation_config.activation,
-        kernel_regularizer=l2_regularizer)
-  else:
-    raise ValueError('Decoder {!r} not implement'.format(decoder_type))
-  return decoder
--- a/official/vision/beta/modeling/decoders/factory_test.py
+++ b/official/vision/beta/modeling/decoders/factory_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for decoder factory functions."""
+from absl.testing import parameterized
+import tensorflow as tf
+from tensorflow.python.distribute import combinations
+from official.vision.beta import configs
+from official.vision.beta.configs import decoders as decoders_cfg
+from official.vision.beta.modeling import decoders
+from official.vision.beta.modeling.decoders import factory
+class FactoryTest(tf.test.TestCase, parameterized.TestCase):
+  @combinations.generate(
+      combinations.combine(
+          num_filters=[128, 256], use_separable_conv=[True, False]))
+  def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
+    """Test creation of FPN decoder."""
+    min_level = 3
+    max_level = 7
+    input_specs = {}
+    for level in range(min_level, max_level):
+      input_specs[str(level)] = tf.TensorShape(
+          [1, 128 // (2**level), 128 // (2**level), 3])
+    network = decoders.FPN(
+        input_specs=input_specs,
+        num_filters=num_filters,
+        use_separable_conv=use_separable_conv,
+        use_sync_bn=True)
+    model_config = configs.retinanet.RetinaNet()
+    model_config.min_level = min_level
+    model_config.max_level = max_level
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='fpn',
+        fpn=decoders_cfg.FPN(
+            num_filters=num_filters, use_separable_conv=use_separable_conv))
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+    self.assertEqual(network_config, factory_network_config)
+  @combinations.generate(
+      combinations.combine(
+          num_filters=[128, 256],
+          num_repeats=[3, 5],
+          use_separable_conv=[True, False]))
+  def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
+                                   use_separable_conv):
+    """Test creation of NASFPN decoder."""
+    min_level = 3
+    max_level = 7
+    input_specs = {}
+    for level in range(min_level, max_level):
+      input_specs[str(level)] = tf.TensorShape(
+          [1, 128 // (2**level), 128 // (2**level), 3])
+    network = decoders.NASFPN(
+        input_specs=input_specs,
+        num_filters=num_filters,
+        num_repeats=num_repeats,
+        use_separable_conv=use_separable_conv,
+        use_sync_bn=True)
+    model_config = configs.retinanet.RetinaNet()
+    model_config.min_level = min_level
+    model_config.max_level = max_level
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='nasfpn',
+        nasfpn=decoders_cfg.NASFPN(
+            num_filters=num_filters,
+            num_repeats=num_repeats,
+            use_separable_conv=use_separable_conv))
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+    self.assertEqual(network_config, factory_network_config)
+  @combinations.generate(
+      combinations.combine(
+          level=[3, 4],
+          dilation_rates=[[6, 12, 18], [6, 12]],
+          num_filters=[128, 256]))
+  def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
+    """Test creation of ASPP decoder."""
+    input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
+    network = decoders.ASPP(
+        level=level,
+        dilation_rates=dilation_rates,
+        num_filters=num_filters,
+        use_sync_bn=True)
+    model_config = configs.semantic_segmentation.SemanticSegmentationModel()
+    model_config.num_classes = 10
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='aspp',
+        aspp=decoders_cfg.ASPP(
+            level=level, dilation_rates=dilation_rates,
+            num_filters=num_filters))
+    factory_network = factory.build_decoder(
+        input_specs=input_specs, model_config=model_config)
+    network_config = network.get_config()
+    factory_network_config = factory_network.get_config()
+    self.assertEqual(network_config, factory_network_config)
+  def test_identity_decoder_creation(self):
+    """Test creation of identity decoder."""
+    model_config = configs.retinanet.RetinaNet()
+    model_config.num_classes = 2
+    model_config.input_size = [None, None, 3]
+    model_config.decoder = decoders_cfg.Decoder(
+        type='identity', identity=decoders_cfg.Identity())
+    factory_network = factory.build_decoder(
+        input_specs=None, model_config=model_config)
+    self.assertIsNone(factory_network)
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/modeling/decoders/fpn.py
+++ b/official/vision/beta/modeling/decoders/fpn.py
@@ -16,9 +16,12 @@
 from typing import Any, Mapping, Optional
 # Import libraries
 import tensorflow as tf
+from official.modeling import hyperparams
 from official.modeling import tf_utils
+from official.vision.beta.modeling.decoders import factory
 from official.vision.beta.ops import spatial_transform_ops
@@ -187,3 +190,43 @@ class FPN(tf.keras.Model):
  def output_specs(self) -> Mapping[str, tf.TensorShape]:
    """A dict of {level: TensorShape} pairs for the model output."""
    return self._output_specs
+@factory.register_decoder_builder('fpn')
+def build_fpn_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds FPN decoder from a config.
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+  Returns:
+    A `tf.keras.Model` instance of the FPN decoder.
+  Raises:
+    ValueError: If the model_config.decoder.type is not `fpn`.
+  """
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'fpn':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `fpn`.')
+  norm_activation_config = model_config.norm_activation
+  return FPN(
+      input_specs=input_specs,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_filters=decoder_cfg.num_filters,
+      use_separable_conv=decoder_cfg.use_separable_conv,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/beta/modeling/decoders/fpn_test.py
+++ b/official/vision/beta/modeling/decoders/fpn_test.py
@@ -19,6 +19,7 @@
 from absl.testing import parameterized
 import tensorflow as tf
+from official.vision.beta.modeling.backbones import mobilenet
 from official.vision.beta.modeling.backbones import resnet
 from official.vision.beta.modeling.decoders import fpn
@@ -52,6 +53,33 @@ class FPNTest(parameterized.TestCase, tf.test.TestCase):
          [1, input_size // 2**level, input_size // 2**level, 256],
          feats[str(level)].shape.as_list())
+  @parameterized.parameters(
+      (256, 3, 7, False),
+      (256, 3, 7, True),
+  )
+  def test_network_creation_with_mobilenet(self, input_size, min_level,
+                                           max_level, use_separable_conv):
+    """Test creation of FPN with mobilenet backbone."""
+    tf.keras.backend.set_image_data_format('channels_last')
+    inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
+    backbone = mobilenet.MobileNet(model_id='MobileNetV2')
+    network = fpn.FPN(
+        input_specs=backbone.output_specs,
+        min_level=min_level,
+        max_level=max_level,
+        use_separable_conv=use_separable_conv)
+    endpoints = backbone(inputs)
+    feats = network(endpoints)
+    for level in range(min_level, max_level + 1):
+      self.assertIn(str(level), feats)
+      self.assertAllEqual(
+          [1, input_size // 2**level, input_size // 2**level, 256],
+          feats[str(level)].shape.as_list())
  def test_serialize_deserialize(self):
    # Create a network object that sets all of its config options.
    kwargs = dict(

--- a/official/vision/beta/modeling/decoders/nasfpn.py
+++ b/official/vision/beta/modeling/decoders/nasfpn.py
@@ -13,12 +13,16 @@
 # limitations under the License.
 """Contains definitions of NAS-FPN."""
-from typing import Any, Mapping, List, Tuple, Optional
+from typing import Any, List, Mapping, Optional, Tuple
 # Import libraries
 from absl import logging
 import tensorflow as tf
+from official.modeling import hyperparams
+from official.vision.beta.modeling.decoders import factory
 from official.vision.beta.ops import spatial_transform_ops
@@ -316,3 +320,45 @@ class NASFPN(tf.keras.Model):
  def output_specs(self) -> Mapping[str, tf.TensorShape]:
    """A dict of {level: TensorShape} pairs for the model output."""
    return self._output_specs
+@factory.register_decoder_builder('nasfpn')
+def build_nasfpn_decoder(
+    input_specs: Mapping[str, tf.TensorShape],
+    model_config: hyperparams.Config,
+    l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
+) -> tf.keras.Model:
+  """Builds NASFPN decoder from a config.
+  Args:
+    input_specs: A `dict` of input specifications. A dictionary consists of
+      {level: TensorShape} from a backbone.
+    model_config: A OneOfConfig. Model config.
+    l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
+      None.
+  Returns:
+    A `tf.keras.Model` instance of the NASFPN decoder.
+  Raises:
+    ValueError: If the model_config.decoder.type is not `nasfpn`.
+  """
+  decoder_type = model_config.decoder.type
+  decoder_cfg = model_config.decoder.get()
+  if decoder_type != 'nasfpn':
+    raise ValueError(f'Inconsistent decoder type {decoder_type}. '
+                     'Need to be `nasfpn`.')
+  norm_activation_config = model_config.norm_activation
+  return NASFPN(
+      input_specs=input_specs,
+      min_level=model_config.min_level,
+      max_level=model_config.max_level,
+      num_filters=decoder_cfg.num_filters,
+      num_repeats=decoder_cfg.num_repeats,
+      use_separable_conv=decoder_cfg.use_separable_conv,
+      activation=norm_activation_config.activation,
+      use_sync_bn=norm_activation_config.use_sync_bn,
+      norm_momentum=norm_activation_config.norm_momentum,
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      kernel_regularizer=l2_regularizer)
--- a/official/vision/beta/modeling/factory.py
+++ b/official/vision/beta/modeling/factory.py
@@ -24,10 +24,10 @@ from official.vision.beta.configs import retinanet as retinanet_cfg
 from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
 from official.vision.beta.modeling import backbones
 from official.vision.beta.modeling import classification_model
+from official.vision.beta.modeling import decoders
 from official.vision.beta.modeling import maskrcnn_model
 from official.vision.beta.modeling import retinanet_model
 from official.vision.beta.modeling import segmentation_model
-from official.vision.beta.modeling.decoders import factory as decoder_factory
 from official.vision.beta.modeling.heads import dense_prediction_heads
 from official.vision.beta.modeling.heads import instance_heads
 from official.vision.beta.modeling.heads import segmentation_heads
@@ -78,7 +78,7 @@ def build_maskrcnn(
      l2_regularizer=l2_regularizer)
  backbone(tf.keras.Input(input_specs.shape[1:]))
-  decoder = decoder_factory.build_decoder(
+  decoder = decoders.factory.build_decoder(
      input_specs=backbone.output_specs,
      model_config=model_config,
      l2_regularizer=l2_regularizer)
@@ -253,7 +253,7 @@ def build_retinanet(
      l2_regularizer=l2_regularizer)
  backbone(tf.keras.Input(input_specs.shape[1:]))
-  decoder = decoder_factory.build_decoder(
+  decoder = decoders.factory.build_decoder(
      input_specs=backbone.output_specs,
      model_config=model_config,
      l2_regularizer=l2_regularizer)
@@ -313,7 +313,7 @@ def build_segmentation_model(
      norm_activation_config=norm_activation_config,
      l2_regularizer=l2_regularizer)
-  decoder = decoder_factory.build_decoder(
+  decoder = decoders.factory.build_decoder(
      input_specs=backbone.output_specs,
      model_config=model_config,
      l2_regularizer=l2_regularizer)

--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -68,6 +68,39 @@ def round_filters(filters: int,
  return int(new_filters)
+def hard_swish(x: tf.Tensor) -> tf.Tensor:
+  """A Swish6/H-Swish activation function.
+  Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
+  https://arxiv.org/pdf/1905.02244.pdf
+  Args:
+    x: the input tensor.
+  Returns:
+    The activation output.
+  """
+  return x * tf.nn.relu6(x + 3.) * (1. / 6.)
+tf.keras.utils.get_custom_objects().update({'hard_swish': hard_swish})
+def simple_swish(x: tf.Tensor) -> tf.Tensor:
+  """A swish/silu activation function without custom gradients.
+  Useful for exporting to SavedModel to avoid custom gradient warnings.
+  Args:
+    x: the input tensor.
+  Returns:
+    The activation output.
+  """
+  return x * tf.math.sigmoid(x)
+tf.keras.utils.get_custom_objects().update({'simple_swish': simple_swish})
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
  """Creates a squeeze and excitation layer."""
@@ -706,9 +739,10 @@ class CausalConvMixin:
    self._use_buffered_input = variable
  def _compute_buffered_causal_padding(self,
-                                       inputs: Optional[tf.Tensor] = None,
+                                       inputs: tf.Tensor,
                                       use_buffered_input: bool = False,
-                                       time_axis: int = 1) -> List[List[int]]:
+                                       time_axis: int = 1,
+                                       ) -> List[List[int]]:
    """Calculates padding for 'causal' option for conv layers.
    Args:
@@ -720,7 +754,7 @@ class CausalConvMixin:
    Returns:
      A list of paddings for `tf.pad`.
    """
-    del inputs
+    input_shape = tf.shape(inputs)[1:-1]
    if tf.keras.backend.image_data_format() == 'channels_first':
      raise ValueError('"channels_first" mode is unsupported.')
@@ -730,7 +764,10 @@ class CausalConvMixin:
         (self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1))
        for i in range(self.rank)
    ]
-    pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
+    pad_total = [kernel_size_effective[0] - 1]
+    for i in range(1, self.rank):
+      overlap = (input_shape[i] - 1) % self.strides[i] + 1
+      pad_total.append(tf.maximum(kernel_size_effective[i] - overlap, 0))
    pad_beg = [pad_total[i] // 2 for i in range(self.rank)]
    pad_end = [pad_total[i] - pad_beg[i] for i in range(self.rank)]
    padding = [[pad_beg[i], pad_end[i]] for i in range(self.rank)]
@@ -763,7 +800,8 @@ class CausalConvMixin:
    # across time should be the input shape minus any padding, assuming
    # the stride across time is 1.
    if self._use_buffered_input and spatial_output_shape[0] is not None:
-      padding = self._compute_buffered_causal_padding(use_buffered_input=False)
+      padding = self._compute_buffered_causal_padding(
+          tf.zeros([1] + spatial_output_shape + [1]), use_buffered_input=False)
      spatial_output_shape[0] -= sum(padding[1])
    return spatial_output_shape
@@ -911,15 +949,13 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
    base_config = super(Conv3D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
-  def build(self, input_shape):
+  def call(self, inputs):
-    """Builds the layer with the given input shape."""
+    """Call the layer with the given inputs."""
-    super(Conv3D, self).build(input_shape)
+    # Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
+    # supported when compiling with TF graph (XLA) using tf.function, so it
-    # TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU
+    # is compiled by default here (b/186463870).
-    # in eager mode may produce incorrect output or cause a segfault.
+    conv_fn = tf.function(super(Conv3D, self).call, jit_compile=True)
-    # To avoid this issue, compile the op to TF graph using tf.function.
+    return conv_fn(inputs)
-    self._convolution_op = tf.function(
-        self._convolution_op, experimental_compile=True)
  def _compute_causal_padding(self, inputs):
    """Computes causal padding dimensions for the given inputs."""

--- a/official/vision/beta/modeling/layers/nn_layers_test.py
+++ b/official/vision/beta/modeling/layers/nn_layers_test.py
@@ -24,6 +24,11 @@ from official.vision.beta.modeling.layers import nn_layers
 class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
+  def test_hard_swish(self):
+    activation = tf.keras.layers.Activation('hard_swish')
+    output = activation(tf.constant([-3, -1.5, 0, 3]))
+    self.assertAllEqual(output, [0., -0.375, 0., 3.])
  def test_scale(self):
    scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.))
    output = scale(3.)
@@ -274,14 +279,14 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    predicted = conv3d(padded_inputs)
    expected = tf.constant(
-        [[[[[12., 12., 12.],
+        [[[[[27., 27., 27.],
            [18., 18., 18.]],
           [[18., 18., 18.],
-            [27., 27., 27.]]],
+            [12., 12., 12.]]],
-          [[[24., 24., 24.],
+          [[[54., 54., 54.],
            [36., 36., 36.]],
           [[36., 36., 36.],
-            [54., 54., 54.]]]]])
+            [24., 24., 24.]]]]])
    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)
@@ -311,14 +316,17 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    predicted = conv3d(padded_inputs)
    expected = tf.constant(
-        [[[[[4.0, 4.0, 4.0],
+        [[[[[9.0, 9.0, 9.0],
            [6.0, 6.0, 6.0]],
           [[6.0, 6.0, 6.0],
-            [9.0, 9.0, 9.0]]],
+            [4.0, 4.0, 4.0]]],
-          [[[8.0, 8.0, 8.0],
+          [[[18.0, 18.0, 18.0],
            [12., 12., 12.]],
           [[12., 12., 12.],
-            [18., 18., 18.]]]]])
+            [8., 8., 8.]]]]])
+    output_shape = conv3d._spatial_output_shape([4, 4, 4])
+    self.assertAllClose(output_shape, [2, 2, 2])
    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)
@@ -329,5 +337,74 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)
+  def test_conv3d_causal_padding_2d(self):
+    """Test to ensure causal padding works like standard padding."""
+    conv3d = nn_layers.Conv3D(
+        filters=1,
+        kernel_size=(1, 3, 3),
+        strides=(1, 2, 2),
+        padding='causal',
+        use_buffered_input=False,
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+    keras_conv3d = tf.keras.layers.Conv3D(
+        filters=1,
+        kernel_size=(1, 3, 3),
+        strides=(1, 2, 2),
+        padding='same',
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+    inputs = tf.ones([1, 1, 4, 4, 1])
+    predicted = conv3d(inputs)
+    expected = keras_conv3d(inputs)
+    self.assertEqual(predicted.shape, expected.shape)
+    self.assertAllClose(predicted, expected)
+    self.assertAllClose(predicted,
+                        [[[[[9.],
+                            [6.]],
+                           [[6.],
+                            [4.]]]]])
+  def test_conv3d_causal_padding_1d(self):
+    """Test to ensure causal padding works like standard padding."""
+    conv3d = nn_layers.Conv3D(
+        filters=1,
+        kernel_size=(3, 1, 1),
+        strides=(2, 1, 1),
+        padding='causal',
+        use_buffered_input=False,
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+    keras_conv1d = tf.keras.layers.Conv1D(
+        filters=1,
+        kernel_size=3,
+        strides=2,
+        padding='causal',
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+    inputs = tf.ones([1, 4, 1, 1, 1])
+    predicted = conv3d(inputs)
+    expected = keras_conv1d(tf.squeeze(inputs, axis=[2, 3]))
+    expected = tf.reshape(expected, [1, 2, 1, 1, 1])
+    self.assertEqual(predicted.shape, expected.shape)
+    self.assertAllClose(predicted, expected)
+    self.assertAllClose(predicted,
+                        [[[[[1.]]],
+                          [[[3.]]]]])
 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/modeling/maskrcnn_model.py
+++ b/official/vision/beta/modeling/maskrcnn_model.py
@@ -16,7 +16,6 @@
 from typing import Any, List, Mapping, Optional, Union
-# Import libraries
 import tensorflow as tf
 from official.vision.beta.ops import anchor
@@ -147,14 +146,18 @@ class MaskRCNNModel(tf.keras.Model):
    model_outputs = {}
    # Feature extraction.
-    features = self.backbone(images)
+    backbone_features = self.backbone(images)
    if self.decoder:
-      features = self.decoder(features)
+      features = self.decoder(backbone_features)
+    else:
+      features = backbone_features
    # Region proposal network.
    rpn_scores, rpn_boxes = self.rpn_head(features)
    model_outputs.update({
+        'backbone_features': backbone_features,
+        'decoder_features': features,
        'rpn_boxes': rpn_boxes,
        'rpn_scores': rpn_scores
    })

--- a/official/vision/beta/ops/augment.py
+++ b/official/vision/beta/ops/augment.py
@@ -1205,7 +1205,8 @@ class RandAugment(ImageAugment):
    self.magnitude = float(magnitude)
    self.cutout_const = float(cutout_const)
    self.translate_const = float(translate_const)
-    self.prob_to_apply = prob_to_apply
+    self.prob_to_apply = (
+        float(prob_to_apply) if prob_to_apply is not None else None)
    self.available_ops = [
        'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize',
        'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY',

--- a/official/vision/beta/ops/spatial_transform_ops.py
+++ b/official/vision/beta/ops/spatial_transform_ops.py
@@ -198,7 +198,8 @@ def multilevel_crop_and_resize(features,
    # Assigns boxes to the right level.
    box_width = boxes[:, :, 3] - boxes[:, :, 1]
    box_height = boxes[:, :, 2] - boxes[:, :, 0]
-    areas_sqrt = tf.cast(tf.sqrt(box_height * box_width), tf.float32)
+    areas_sqrt = tf.sqrt(
+        tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))
    levels = tf.cast(
        tf.math.floordiv(
            tf.math.log(tf.divide(areas_sqrt, 224.0)),
@@ -456,6 +457,12 @@ def crop_mask_in_target_box(masks,
    [batch_size, num_boxes, output_size, output_size].
  """
  with tf.name_scope('crop_mask_in_target_box'):
+    # Cast to float32, as the y_transform and other transform variables may
+    # overflow in float16
+    masks = tf.cast(masks, tf.float32)
+    boxes = tf.cast(boxes, tf.float32)
+    target_boxes = tf.cast(target_boxes, tf.float32)
    batch_size, num_masks, height, width = masks.get_shape().as_list()
    if batch_size is None:
      batch_size = tf.shape(masks)[0]
@@ -504,18 +511,22 @@ def crop_mask_in_target_box(masks,
  return cropped_masks
-def nearest_upsampling(data, scale):
+def nearest_upsampling(data, scale, use_keras_layer=False):
  """Nearest neighbor upsampling implementation.
  Args:
    data: A tensor with a shape of [batch, height_in, width_in, channels].
    scale: An integer multiple to scale resolution of input data.
+    use_keras_layer: If True, use keras Upsampling2D layer.
  Returns:
    data_up: A tensor with a shape of
      [batch, height_in*scale, width_in*scale, channels]. Same dtype as input
      data.
  """
+  if use_keras_layer:
+    return tf.keras.layers.UpSampling2D(size=(scale, scale),
+                                        interpolation='nearest')(data)
  with tf.name_scope('nearest_upsampling'):
    bs, _, _, c = data.get_shape().as_list()
    shape = tf.shape(input=data)

--- a/official/vision/beta/projects/example/example_config.py
+++ b/official/vision/beta/projects/example/example_config.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Example experiment configuration definition."""
+from typing import List
+import dataclasses
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+@dataclasses.dataclass
+class ExampleDataConfig(cfg.DataConfig):
+  """Input config for training. Add more fields as needed."""
+  input_path: str = ''
+  global_batch_size: int = 0
+  is_training: bool = True
+  dtype: str = 'float32'
+  shuffle_buffer_size: int = 10000
+  cycle_length: int = 10
+  file_type: str = 'tfrecord'
+@dataclasses.dataclass
+class ExampleModel(hyperparams.Config):
+  """The model config. Used by build_example_model function."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  l2_weight_decay: float = 0.0
+@dataclasses.dataclass
+class Evaluation(hyperparams.Config):
+  top_k: int = 5
+@dataclasses.dataclass
+class ExampleTask(cfg.TaskConfig):
+  """The task config."""
+  model: ExampleModel = ExampleModel()
+  train_data: ExampleDataConfig = ExampleDataConfig(is_training=True)
+  validation_data: ExampleDataConfig = ExampleDataConfig(is_training=False)
+  losses: Losses = Losses()
+  evaluation: Evaluation = Evaluation()
+@exp_factory.register_config_factory('tf_vision_example_experiment')
+def tf_vision_example_experiment() -> cfg.ExperimentConfig:
+  """Definition of a full example experiment."""
+  train_batch_size = 256
+  eval_batch_size = 256
+  steps_per_epoch = 10
+  config = cfg.ExperimentConfig(
+      task=ExampleTask(
+          model=ExampleModel(num_classes=10, input_size=[128, 128, 3]),
+          losses=Losses(l2_weight_decay=1e-4),
+          train_data=ExampleDataConfig(
+              input_path='/path/to/train*',
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=ExampleDataConfig(
+              input_path='/path/to/valid*',
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=90 * steps_per_epoch,
+          validation_steps=steps_per_epoch,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'learning_rate': {
+                  'type': 'cosine',
+                  'cosine': {
+                      'initial_learning_rate': 1.6,
+                      'decay_steps': 350 * steps_per_epoch
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  return config
--- a/official/vision/beta/projects/example/example_config_local.yaml
+++ b/official/vision/beta/projects/example/example_config_local.yaml
+task:
+  model:
+    num_classes: 1001
+    input_size: [128, 128, 3]
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 64
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [18750, 37500, 50000]
+        values: [0.1, 0.01, 0.001, 0.0001]
--- a/official/vision/beta/projects/example/example_config_tpu.yaml
+++ b/official/vision/beta/projects/example/example_config_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [128, 128, 3]
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [18750, 37500, 50000]
+        values: [0.1, 0.01, 0.001, 0.0001]
--- a/official/vision/beta/projects/example/example_input.py
+++ b/official/vision/beta/projects/example/example_input.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Example classification decoder and parser.
+This file defines the Decoder and Parser to load data. The example is shown on
+loading standard tf.Example data but non-standard tf.Example or other data
+format can be supported by implementing proper decoder and parser.
+"""
+from typing import Mapping, List, Tuple
+# Import libraries
+import tensorflow as tf
+from official.vision.beta.dataloaders import decoder
+from official.vision.beta.dataloaders import parser
+from official.vision.beta.ops import preprocess_ops
+MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
+STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
+class Decoder(decoder.Decoder):
+  """A tf.Example decoder for classification task."""
+  def __init__(self):
+    """Initializes the decoder.
+    The constructor defines the mapping between the field name and the value
+    from an input tf.Example. For example, we define two fields for image bytes
+    and labels. There is no limit on the number of fields to decode.
+    """
+    self._keys_to_features = {
+        'image/encoded':
+            tf.io.FixedLenFeature((), tf.string, default_value=''),
+        'image/class/label':
+            tf.io.FixedLenFeature((), tf.int64, default_value=-1)
+    }
+  def decode(self,
+             serialized_example: tf.train.Example) -> Mapping[str, tf.Tensor]:
+    """Decodes a tf.Example to a dictionary.
+    This function decodes a serialized tf.Example to a dictionary. The output
+    will be consumed by `_parse_train_data` and `_parse_validation_data` in
+    Parser.
+    Args:
+      serialized_example: A serialized tf.Example.
+    Returns:
+      A dictionary of field key name and decoded tensor mapping.
+    """
+    return tf.io.parse_single_example(
+        serialized_example, self._keys_to_features)
+class Parser(parser.Parser):
+  """Parser to parse an image and its annotations.
+  To define own Parser, client should override _parse_train_data and
+  _parse_eval_data functions, where decoded tensors are parsed with optional
+  pre-processing steps. The output from the two functions can be any structure
+  like tuple, list or dictionary.
+  """
+  def __init__(self, output_size: List[int], num_classes: float):
+    """Initializes parameters for parsing annotations in the dataset.
+    This example only takes two arguments but one can freely add as many
+    arguments as needed. For example, pre-processing and augmentations usually
+    happen in Parser, and related parameters can be passed in by this
+    constructor.
+    Args:
+      output_size: `Tensor` or `list` for [height, width] of output image.
+      num_classes: `float`, number of classes.
+    """
+    self._output_size = output_size
+    self._num_classes = num_classes
+    self._dtype = tf.float32
+  def _parse_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
+    image_bytes = decoded_tensors['image/encoded']
+    image = tf.io.decode_jpeg(image_bytes, channels=3)
+    image = tf.image.resize(
+        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+    image = tf.ensure_shape(image, self._output_size + [3])
+    # Normalizes image with mean and std pixel values.
+    image = preprocess_ops.normalize_image(
+        image, offset=MEAN_RGB, scale=STDDEV_RGB)
+    image = tf.image.convert_image_dtype(image, self._dtype)
+    return image, label
+  def _parse_train_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    """Parses data for training.
+    Args:
+      decoded_tensors: A dictionary of field key name and decoded tensor mapping
+        from Decoder.
+    Returns:
+      A tuple of (image, label) tensors.
+    """
+    return self._parse_data(decoded_tensors)
+  def _parse_eval_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    """Parses data for evaluation.
+    Args:
+      decoded_tensors: A dictionary of field key name and decoded tensor mapping
+        from Decoder.
+    Returns:
+      A tuple of (image, label) tensors.
+    """
+    return self._parse_data(decoded_tensors)