Merge branch 'purdue-yolo' into detection_generator_pr

5ffcc5b6 · Anirudh Vegesana · GitHub · 0b81a843 · 76e0c014 · 5ffcc5b6
Unverified Commit 5ffcc5b6 authored Jul 21, 2021 by Anirudh Vegesana Committed by GitHub Jul 21, 2021
20 changed files
--- a/official/vision/beta/dataloaders/tfds_factory_test.py
+++ b/official/vision/beta/dataloaders/tfds_factory_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for tfds factory functions."""
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.vision.beta.dataloaders import decoder as base_decoder
+from official.vision.beta.dataloaders import tfds_factory
+
+
+class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      ('imagenet2012'),
+      ('cifar10'),
+      ('cifar100'),
+  )
+  def test_classification_decoder(self, tfds_name):
+    decoder = tfds_factory.get_classification_decoder(tfds_name)
+    self.assertIsInstance(decoder, base_decoder.Decoder)
+
+  @parameterized.parameters(
+      ('flowers'),
+      ('coco'),
+  )
+  def test_doesnt_exit_classification_decoder(self, tfds_name):
+    with self.assertRaises(ValueError):
+      _ = tfds_factory.get_classification_decoder(tfds_name)
+
+  @parameterized.parameters(
+      ('coco'),
+      ('coco/2014'),
+      ('coco/2017'),
+  )
+  def test_detection_decoder(self, tfds_name):
+    decoder = tfds_factory.get_detection_decoder(tfds_name)
+    self.assertIsInstance(decoder, base_decoder.Decoder)
+
+  @parameterized.parameters(
+      ('pascal'),
+      ('cityscapes'),
+  )
+  def test_doesnt_exit_detection_decoder(self, tfds_name):
+    with self.assertRaises(ValueError):
+      _ = tfds_factory.get_detection_decoder(tfds_name)
+
+  @parameterized.parameters(
+      ('cityscapes'),
+      ('cityscapes/semantic_segmentation'),
+      ('cityscapes/semantic_segmentation_extra'),
+  )
+  def test_segmentation_decoder(self, tfds_name):
+    decoder = tfds_factory.get_segmentation_decoder(tfds_name)
+    self.assertIsInstance(decoder, base_decoder.Decoder)
+
+  @parameterized.parameters(
+      ('coco'),
+      ('imagenet'),
+  )
+  def test_doesnt_exit_segmentation_decoder(self, tfds_name):
+    with self.assertRaises(ValueError):
+      _ = tfds_factory.get_segmentation_decoder(tfds_name)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/official/vision/beta/dataloaders/tfexample_utils.py
+++ b/official/vision/beta/dataloaders/tfexample_utils.py
@@ -143,3 +143,24 @@ def create_classification_example(
                  int64_list=tf.train.Int64List(value=labels))),
          })).SerializeToString()
  return serialized_example
+
+
+def create_3d_image_test_example(image_height: int, image_width: int,
+                                 image_volume: int,
+                                 image_channel: int) -> tf.train.Example:
+  """Creates 3D image and label."""
+  images = np.random.rand(image_height, image_width, image_volume,
+                          image_channel)
+  images = images.astype(np.float32)
+
+  labels = np.random.randint(
+      low=2, size=(image_height, image_width, image_volume, image_channel))
+  labels = labels.astype(np.float32)
+
+  feature = {
+      IMAGE_KEY: (tf.train.Feature(
+          bytes_list=tf.train.BytesList(value=[images.tobytes()]))),
+      CLASSIFICATION_LABEL_KEY: (tf.train.Feature(
+          bytes_list=tf.train.BytesList(value=[labels.tobytes()])))
+  }
+  return tf.train.Example(features=tf.train.Features(feature=feature))
--- a/official/vision/beta/modeling/backbones/spinenet.py
+++ b/official/vision/beta/modeling/backbones/spinenet.py
@@ -393,8 +393,10 @@ class SpineNet(tf.keras.Model):
              block_spec.level))
        if (block_spec.level < self._min_level or
            block_spec.level > self._max_level):
-          raise ValueError('Output level is out of range [{}, {}]'.format(
-              self._min_level, self._max_level))
+          logging.warning(
+              'SpineNet output level out of range [min_level, max_level] = '
+              '[%s, %s] will not be used for further processing.',
+              self._min_level, self._max_level)
        endpoints[str(block_spec.level)] = x

    return endpoints

--- a/official/vision/beta/modeling/backbones/spinenet_mobile.py
+++ b/official/vision/beta/modeling/backbones/spinenet_mobile.py
@@ -152,6 +152,7 @@ class SpineNetMobile(tf.keras.Model):
      use_sync_bn: bool = False,
      norm_momentum: float = 0.99,
      norm_epsilon: float = 0.001,
+      use_keras_upsampling_2d: bool = False,
      **kwargs):
    """Initializes a Mobile SpineNet model.

@@ -181,6 +182,7 @@ class SpineNetMobile(tf.keras.Model):
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A small `float` added to variance to avoid dividing by zero.
+      use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
      **kwargs: Additional keyword arguments to be passed.
    """
    self._input_specs = input_specs
@@ -200,12 +202,7 @@ class SpineNetMobile(tf.keras.Model):
    self._use_sync_bn = use_sync_bn
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
-    if activation == 'relu':
-      self._activation_fn = tf.nn.relu
-    elif activation == 'swish':
-      self._activation_fn = tf.nn.swish
-    else:
-      raise ValueError('Activation {} not implemented.'.format(activation))
+    self._use_keras_upsampling_2d = use_keras_upsampling_2d
    self._num_init_blocks = 2

    if use_sync_bn:
@@ -271,7 +268,7 @@ class SpineNetMobile(tf.keras.Model):
          norm_momentum=self._norm_momentum,
          norm_epsilon=self._norm_epsilon)(
              inputs)
-    return tf.identity(x, name=name)
+    return tf.keras.layers.Activation('linear', name=name)(x)

  def _build_stem(self, inputs):
    """Builds SpineNet stem."""
@@ -290,7 +287,7 @@ class SpineNetMobile(tf.keras.Model):
        momentum=self._norm_momentum,
        epsilon=self._norm_epsilon)(
            x)
-    x = tf_utils.get_activation(self._activation_fn)(x)
+    x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)

    net = []
    stem_strides = [1, 2]
@@ -365,14 +362,15 @@ class SpineNetMobile(tf.keras.Model):
        parent_weights = [
            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
                i, j)), dtype=dtype)) for j in range(len(parents))]
-        weights_sum = tf.add_n(parent_weights)
+        weights_sum = layers.Add()(parent_weights)
        parents = [
            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
            for i in range(len(parents))
        ]

      # Fuse all parent nodes then build a new block.
-      x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
+      x = tf_utils.get_activation(
+          self._activation, use_keras_layer=True)(layers.Add()(parents))
      x = self._block_group(
          inputs=x,
          in_filters=target_num_filters,
@@ -421,7 +419,7 @@ class SpineNetMobile(tf.keras.Model):
          momentum=self._norm_momentum,
          epsilon=self._norm_epsilon)(
              x)
-      x = tf_utils.get_activation(self._activation_fn)(x)
+      x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
      endpoints[str(level)] = x
    return endpoints

@@ -446,11 +444,13 @@ class SpineNetMobile(tf.keras.Model):
            momentum=self._norm_momentum,
            epsilon=self._norm_epsilon)(
                x)
-        x = tf_utils.get_activation(self._activation_fn)(x)
+        x = tf_utils.get_activation(
+            self._activation, use_keras_layer=True)(x)
        input_width /= 2
    elif input_width < target_width:
      scale = target_width // input_width
-      x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
+      x = spatial_transform_ops.nearest_upsampling(
+          x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)

    # Last 1x1 conv to match filter size.
    x = layers.Conv2D(
@@ -485,7 +485,8 @@ class SpineNetMobile(tf.keras.Model):
        'activation': self._activation,
        'use_sync_bn': self._use_sync_bn,
        'norm_momentum': self._norm_momentum,
-        'norm_epsilon': self._norm_epsilon
+        'norm_epsilon': self._norm_epsilon,
+        'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
    }
    return config_dict

@@ -531,4 +532,5 @@ def build_spinenet_mobile(
      activation=norm_activation_config.activation,
      use_sync_bn=norm_activation_config.use_sync_bn,
      norm_momentum=norm_activation_config.norm_momentum,
-      norm_epsilon=norm_activation_config.norm_epsilon)
+      norm_epsilon=norm_activation_config.norm_epsilon,
+      use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)
--- a/official/vision/beta/modeling/backbones/spinenet_mobile_test.py
+++ b/official/vision/beta/modeling/backbones/spinenet_mobile_test.py
@@ -90,6 +90,7 @@ class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
        kernel_initializer='VarianceScaling',
        kernel_regularizer=None,
        bias_regularizer=None,
+        use_keras_upsampling_2d=False,
    )
    network = spinenet_mobile.SpineNetMobile(**kwargs)


--- a/official/vision/beta/modeling/backbones/spinenet_test.py
+++ b/official/vision/beta/modeling/backbones/spinenet_test.py
@@ -24,17 +24,16 @@ from official.vision.beta.modeling.backbones import spinenet
 class SpineNetTest(parameterized.TestCase, tf.test.TestCase):

  @parameterized.parameters(
-      (128, 0.65, 1, 0.5, 128),
-      (256, 1.0, 1, 0.5, 256),
-      (384, 1.0, 2, 0.5, 256),
-      (512, 1.0, 3, 1.0, 256),
-      (640, 1.3, 4, 1.0, 384),
+      (128, 0.65, 1, 0.5, 128, 4, 6),
+      (256, 1.0, 1, 0.5, 256, 3, 6),
+      (384, 1.0, 2, 0.5, 256, 4, 7),
+      (512, 1.0, 3, 1.0, 256, 3, 7),
+      (640, 1.3, 4, 1.0, 384, 3, 7),
  )
  def test_network_creation(self, input_size, filter_size_scale, block_repeats,
-                            resample_alpha, endpoints_num_filters):
+                            resample_alpha, endpoints_num_filters, min_level,
+                            max_level):
    """Test creation of SpineNet models."""
-    min_level = 3
-    max_level = 7

    tf.keras.backend.set_image_data_format('channels_last')


--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -26,6 +26,10 @@ from official.modeling import tf_utils
 States = Dict[str, tf.Tensor]
 Activation = Union[str, Callable]

+# TODO(dankondratyuk): keep legacy padding until new checkpoints are trained.
+# Otherwise, accuracy will be affected.
+LEGACY_PADDING = True
+

 def make_divisible(value: float,
                   divisor: int,
@@ -68,6 +72,23 @@ def round_filters(filters: int,
  return int(new_filters)


+def hard_swish(x: tf.Tensor) -> tf.Tensor:
+  """A Swish6/H-Swish activation function.
+
+  Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
+  https://arxiv.org/pdf/1905.02244.pdf
+
+  Args:
+    x: the input tensor.
+
+  Returns:
+    The activation output.
+  """
+  return x * tf.nn.relu6(x + 3.) * (1. / 6.)
+
+tf.keras.utils.get_custom_objects().update({'hard_swish': hard_swish})
+
+
 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
  """Creates a squeeze and excitation layer."""
@@ -706,9 +727,10 @@ class CausalConvMixin:
    self._use_buffered_input = variable

  def _compute_buffered_causal_padding(self,
-                                       inputs: Optional[tf.Tensor] = None,
+                                       inputs: tf.Tensor,
                                       use_buffered_input: bool = False,
-                                       time_axis: int = 1) -> List[List[int]]:
+                                       time_axis: int = 1,
+                                       ) -> List[List[int]]:
    """Calculates padding for 'causal' option for conv layers.

    Args:
@@ -720,7 +742,7 @@ class CausalConvMixin:
    Returns:
      A list of paddings for `tf.pad`.
    """
-    del inputs
+    input_shape = tf.shape(inputs)[1:-1]

    if tf.keras.backend.image_data_format() == 'channels_first':
      raise ValueError('"channels_first" mode is unsupported.')
@@ -730,7 +752,14 @@ class CausalConvMixin:
         (self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1))
        for i in range(self.rank)
    ]
-    pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
+    if LEGACY_PADDING:
+      # Apply legacy padding that does not take into account spatial strides
+      pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
+    else:
+      pad_total = [kernel_size_effective[0] - 1]
+      for i in range(1, self.rank):
+        overlap = (input_shape[i] - 1) % self.strides[i] + 1
+        pad_total.append(tf.maximum(kernel_size_effective[i] - overlap, 0))
    pad_beg = [pad_total[i] // 2 for i in range(self.rank)]
    pad_end = [pad_total[i] - pad_beg[i] for i in range(self.rank)]
    padding = [[pad_beg[i], pad_end[i]] for i in range(self.rank)]
@@ -763,7 +792,8 @@ class CausalConvMixin:
    # across time should be the input shape minus any padding, assuming
    # the stride across time is 1.
    if self._use_buffered_input and spatial_output_shape[0] is not None:
-      padding = self._compute_buffered_causal_padding(use_buffered_input=False)
+      padding = self._compute_buffered_causal_padding(
+          tf.zeros([1] + spatial_output_shape + [1]), use_buffered_input=False)
      spatial_output_shape[0] -= sum(padding[1])
    return spatial_output_shape

@@ -911,15 +941,13 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
    base_config = super(Conv3D, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

-  def build(self, input_shape):
-    """Builds the layer with the given input shape."""
-    super(Conv3D, self).build(input_shape)
-
-    # TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU
-    # in eager mode may produce incorrect output or cause a segfault.
-    # To avoid this issue, compile the op to TF graph using tf.function.
-    self._convolution_op = tf.function(
-        self._convolution_op, experimental_compile=True)
+  def call(self, inputs):
+    """Call the layer with the given inputs."""
+    # Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
+    # supported when compiling with TF graph (XLA) using tf.function, so it
+    # is compiled by default here (b/186463870).
+    conv_fn = tf.function(super(Conv3D, self).call, jit_compile=True)
+    return conv_fn(inputs)

  def _compute_causal_padding(self, inputs):
    """Computes causal padding dimensions for the given inputs."""

--- a/official/vision/beta/modeling/layers/nn_layers_test.py
+++ b/official/vision/beta/modeling/layers/nn_layers_test.py
@@ -24,6 +24,15 @@ from official.vision.beta.modeling.layers import nn_layers

 class NNLayersTest(parameterized.TestCase, tf.test.TestCase):

+  def setUp(self):
+    super().setUp()
+    nn_layers.LEGACY_PADDING = False
+
+  def test_hard_swish(self):
+    activation = tf.keras.layers.Activation('hard_swish')
+    output = activation(tf.constant([-3, -1.5, 0, 3]))
+    self.assertAllEqual(output, [0., -0.375, 0., 3.])
+
  def test_scale(self):
    scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.))
    output = scale(3.)
@@ -274,14 +283,14 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    predicted = conv3d(padded_inputs)

    expected = tf.constant(
-        [[[[[12., 12., 12.],
+        [[[[[27., 27., 27.],
            [18., 18., 18.]],
           [[18., 18., 18.],
-            [27., 27., 27.]]],
-          [[[24., 24., 24.],
+            [12., 12., 12.]]],
+          [[[54., 54., 54.],
            [36., 36., 36.]],
           [[36., 36., 36.],
-            [54., 54., 54.]]]]])
+            [24., 24., 24.]]]]])

    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)
@@ -311,14 +320,17 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    predicted = conv3d(padded_inputs)

    expected = tf.constant(
-        [[[[[4.0, 4.0, 4.0],
+        [[[[[9.0, 9.0, 9.0],
            [6.0, 6.0, 6.0]],
           [[6.0, 6.0, 6.0],
-            [9.0, 9.0, 9.0]]],
-          [[[8.0, 8.0, 8.0],
+            [4.0, 4.0, 4.0]]],
+          [[[18.0, 18.0, 18.0],
            [12., 12., 12.]],
           [[12., 12., 12.],
-            [18., 18., 18.]]]]])
+            [8., 8., 8.]]]]])
+
+    output_shape = conv3d._spatial_output_shape([4, 4, 4])
+    self.assertAllClose(output_shape, [2, 2, 2])

    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)
@@ -329,5 +341,74 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
    self.assertEqual(predicted.shape, expected.shape)
    self.assertAllClose(predicted, expected)

+  def test_conv3d_causal_padding_2d(self):
+    """Test to ensure causal padding works like standard padding."""
+    conv3d = nn_layers.Conv3D(
+        filters=1,
+        kernel_size=(1, 3, 3),
+        strides=(1, 2, 2),
+        padding='causal',
+        use_buffered_input=False,
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+
+    keras_conv3d = tf.keras.layers.Conv3D(
+        filters=1,
+        kernel_size=(1, 3, 3),
+        strides=(1, 2, 2),
+        padding='same',
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+
+    inputs = tf.ones([1, 1, 4, 4, 1])
+
+    predicted = conv3d(inputs)
+    expected = keras_conv3d(inputs)
+
+    self.assertEqual(predicted.shape, expected.shape)
+    self.assertAllClose(predicted, expected)
+
+    self.assertAllClose(predicted,
+                        [[[[[9.],
+                            [6.]],
+                           [[6.],
+                            [4.]]]]])
+
+  def test_conv3d_causal_padding_1d(self):
+    """Test to ensure causal padding works like standard padding."""
+    conv3d = nn_layers.Conv3D(
+        filters=1,
+        kernel_size=(3, 1, 1),
+        strides=(2, 1, 1),
+        padding='causal',
+        use_buffered_input=False,
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+
+    keras_conv1d = tf.keras.layers.Conv1D(
+        filters=1,
+        kernel_size=3,
+        strides=2,
+        padding='causal',
+        kernel_initializer='ones',
+        use_bias=False,
+    )
+
+    inputs = tf.ones([1, 4, 1, 1, 1])
+
+    predicted = conv3d(inputs)
+    expected = keras_conv1d(tf.squeeze(inputs, axis=[2, 3]))
+    expected = tf.reshape(expected, [1, 2, 1, 1, 1])
+
+    self.assertEqual(predicted.shape, expected.shape)
+    self.assertAllClose(predicted, expected)
+
+    self.assertAllClose(predicted,
+                        [[[[[1.]]],
+                          [[[3.]]]]])
+
 if __name__ == '__main__':
  tf.test.main()
--- a/official/vision/beta/modeling/maskrcnn_model.py
+++ b/official/vision/beta/modeling/maskrcnn_model.py
@@ -16,7 +16,6 @@

 from typing import Any, List, Mapping, Optional, Union

-# Import libraries
 import tensorflow as tf

 from official.vision.beta.ops import anchor
@@ -147,14 +146,18 @@ class MaskRCNNModel(tf.keras.Model):
    model_outputs = {}

    # Feature extraction.
-    features = self.backbone(images)
+    backbone_features = self.backbone(images)
    if self.decoder:
-      features = self.decoder(features)
+      features = self.decoder(backbone_features)
+    else:
+      features = backbone_features

    # Region proposal network.
    rpn_scores, rpn_boxes = self.rpn_head(features)

    model_outputs.update({
+        'backbone_features': backbone_features,
+        'decoder_features': features,
        'rpn_boxes': rpn_boxes,
        'rpn_scores': rpn_scores
    })

--- a/official/vision/beta/ops/augment.py
+++ b/official/vision/beta/ops/augment.py
@@ -1205,7 +1205,8 @@ class RandAugment(ImageAugment):
    self.magnitude = float(magnitude)
    self.cutout_const = float(cutout_const)
    self.translate_const = float(translate_const)
-    self.prob_to_apply = prob_to_apply
+    self.prob_to_apply = (
+        float(prob_to_apply) if prob_to_apply is not None else None)
    self.available_ops = [
        'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize',
        'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY',

--- a/official/vision/beta/ops/spatial_transform_ops.py
+++ b/official/vision/beta/ops/spatial_transform_ops.py
@@ -198,7 +198,8 @@ def multilevel_crop_and_resize(features,
    # Assigns boxes to the right level.
    box_width = boxes[:, :, 3] - boxes[:, :, 1]
    box_height = boxes[:, :, 2] - boxes[:, :, 0]
-    areas_sqrt = tf.cast(tf.sqrt(box_height * box_width), tf.float32)
+    areas_sqrt = tf.sqrt(
+        tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))
    levels = tf.cast(
        tf.math.floordiv(
            tf.math.log(tf.divide(areas_sqrt, 224.0)),
@@ -456,6 +457,12 @@ def crop_mask_in_target_box(masks,
    [batch_size, num_boxes, output_size, output_size].
  """
  with tf.name_scope('crop_mask_in_target_box'):
+    # Cast to float32, as the y_transform and other transform variables may
+    # overflow in float16
+    masks = tf.cast(masks, tf.float32)
+    boxes = tf.cast(boxes, tf.float32)
+    target_boxes = tf.cast(target_boxes, tf.float32)
+
    batch_size, num_masks, height, width = masks.get_shape().as_list()
    if batch_size is None:
      batch_size = tf.shape(masks)[0]
@@ -504,18 +511,22 @@ def crop_mask_in_target_box(masks,
  return cropped_masks


-def nearest_upsampling(data, scale):
+def nearest_upsampling(data, scale, use_keras_layer=False):
  """Nearest neighbor upsampling implementation.

  Args:
    data: A tensor with a shape of [batch, height_in, width_in, channels].
    scale: An integer multiple to scale resolution of input data.
+    use_keras_layer: If True, use keras Upsampling2D layer.

  Returns:
    data_up: A tensor with a shape of
      [batch, height_in*scale, width_in*scale, channels]. Same dtype as input
      data.
  """
+  if use_keras_layer:
+    return tf.keras.layers.UpSampling2D(size=(scale, scale),
+                                        interpolation='nearest')(data)
  with tf.name_scope('nearest_upsampling'):
    bs, _, _, c = data.get_shape().as_list()
    shape = tf.shape(input=data)

--- a/official/vision/beta/projects/example/example_config.py
+++ b/official/vision/beta/projects/example/example_config.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Example experiment configuration definition."""
+from typing import List
+
+import dataclasses
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+
+
+@dataclasses.dataclass
+class ExampleDataConfig(cfg.DataConfig):
+  """Input config for training. Add more fields as needed."""
+  input_path: str = ''
+  global_batch_size: int = 0
+  is_training: bool = True
+  dtype: str = 'float32'
+  shuffle_buffer_size: int = 10000
+  cycle_length: int = 10
+  file_type: str = 'tfrecord'
+
+
+@dataclasses.dataclass
+class ExampleModel(hyperparams.Config):
+  """The model config. Used by build_example_model function."""
+  num_classes: int = 0
+  input_size: List[int] = dataclasses.field(default_factory=list)
+
+
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  l2_weight_decay: float = 0.0
+
+
+@dataclasses.dataclass
+class Evaluation(hyperparams.Config):
+  top_k: int = 5
+
+
+@dataclasses.dataclass
+class ExampleTask(cfg.TaskConfig):
+  """The task config."""
+  model: ExampleModel = ExampleModel()
+  train_data: ExampleDataConfig = ExampleDataConfig(is_training=True)
+  validation_data: ExampleDataConfig = ExampleDataConfig(is_training=False)
+  losses: Losses = Losses()
+  evaluation: Evaluation = Evaluation()
+
+
+@exp_factory.register_config_factory('tf_vision_example_experiment')
+def tf_vision_example_experiment() -> cfg.ExperimentConfig:
+  """Definition of a full example experiment."""
+  train_batch_size = 256
+  eval_batch_size = 256
+  steps_per_epoch = 10
+  config = cfg.ExperimentConfig(
+      task=ExampleTask(
+          model=ExampleModel(num_classes=10, input_size=[128, 128, 3]),
+          losses=Losses(l2_weight_decay=1e-4),
+          train_data=ExampleDataConfig(
+              input_path='/path/to/train*',
+              is_training=True,
+              global_batch_size=train_batch_size),
+          validation_data=ExampleDataConfig(
+              input_path='/path/to/valid*',
+              is_training=False,
+              global_batch_size=eval_batch_size)),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=90 * steps_per_epoch,
+          validation_steps=steps_per_epoch,
+          validation_interval=steps_per_epoch,
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'learning_rate': {
+                  'type': 'cosine',
+                  'cosine': {
+                      'initial_learning_rate': 1.6,
+                      'decay_steps': 350 * steps_per_epoch
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config
--- a/official/vision/beta/projects/example/example_config_local.yaml
+++ b/official/vision/beta/projects/example/example_config_local.yaml
+task:
+  model:
+    num_classes: 1001
+    input_size: [128, 128, 3]
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 64
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [18750, 37500, 50000]
+        values: [0.1, 0.01, 0.001, 0.0001]
--- a/official/vision/beta/projects/example/example_config_tpu.yaml
+++ b/official/vision/beta/projects/example/example_config_tpu.yaml
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    num_classes: 1001
+    input_size: [128, 128, 3]
+  train_data:
+    input_path: 'imagenet-2012-tfrecord/train*'
+    is_training: true
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+  validation_data:
+    input_path: 'imagenet-2012-tfrecord/valid*'
+    is_training: false
+    global_batch_size: 4096
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  train_steps: 62400
+  validation_steps: 13
+  validation_interval: 312
+  steps_per_loop: 312
+  summary_interval: 312
+  checkpoint_interval: 312
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'stepwise'
+      stepwise:
+        boundaries: [18750, 37500, 50000]
+        values: [0.1, 0.01, 0.001, 0.0001]
--- a/official/vision/beta/projects/example/example_input.py
+++ b/official/vision/beta/projects/example/example_input.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Example classification decoder and parser.
+
+This file defines the Decoder and Parser to load data. The example is shown on
+loading standard tf.Example data but non-standard tf.Example or other data
+format can be supported by implementing proper decoder and parser.
+"""
+from typing import Mapping, List, Tuple
+# Import libraries
+import tensorflow as tf
+
+from official.vision.beta.dataloaders import decoder
+from official.vision.beta.dataloaders import parser
+from official.vision.beta.ops import preprocess_ops
+
+MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
+STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
+
+
+class Decoder(decoder.Decoder):
+  """A tf.Example decoder for classification task."""
+
+  def __init__(self):
+    """Initializes the decoder.
+
+    The constructor defines the mapping between the field name and the value
+    from an input tf.Example. For example, we define two fields for image bytes
+    and labels. There is no limit on the number of fields to decode.
+    """
+    self._keys_to_features = {
+        'image/encoded':
+            tf.io.FixedLenFeature((), tf.string, default_value=''),
+        'image/class/label':
+            tf.io.FixedLenFeature((), tf.int64, default_value=-1)
+    }
+
+  def decode(self,
+             serialized_example: tf.train.Example) -> Mapping[str, tf.Tensor]:
+    """Decodes a tf.Example to a dictionary.
+
+    This function decodes a serialized tf.Example to a dictionary. The output
+    will be consumed by `_parse_train_data` and `_parse_validation_data` in
+    Parser.
+
+    Args:
+      serialized_example: A serialized tf.Example.
+
+    Returns:
+      A dictionary of field key name and decoded tensor mapping.
+    """
+    return tf.io.parse_single_example(
+        serialized_example, self._keys_to_features)
+
+
+class Parser(parser.Parser):
+  """Parser to parse an image and its annotations.
+
+  To define own Parser, client should override _parse_train_data and
+  _parse_eval_data functions, where decoded tensors are parsed with optional
+  pre-processing steps. The output from the two functions can be any structure
+  like tuple, list or dictionary.
+  """
+
+  def __init__(self, output_size: List[int], num_classes: float):
+    """Initializes parameters for parsing annotations in the dataset.
+
+    This example only takes two arguments but one can freely add as many
+    arguments as needed. For example, pre-processing and augmentations usually
+    happen in Parser, and related parameters can be passed in by this
+    constructor.
+
+    Args:
+      output_size: `Tensor` or `list` for [height, width] of output image.
+      num_classes: `float`, number of classes.
+    """
+    self._output_size = output_size
+    self._num_classes = num_classes
+    self._dtype = tf.float32
+
+  def _parse_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
+    image_bytes = decoded_tensors['image/encoded']
+    image = tf.io.decode_jpeg(image_bytes, channels=3)
+    image = tf.image.resize(
+        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+    image = tf.ensure_shape(image, self._output_size + [3])
+
+    # Normalizes image with mean and std pixel values.
+    image = preprocess_ops.normalize_image(
+        image, offset=MEAN_RGB, scale=STDDEV_RGB)
+
+    image = tf.image.convert_image_dtype(image, self._dtype)
+    return image, label
+
+  def _parse_train_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    """Parses data for training.
+
+    Args:
+      decoded_tensors: A dictionary of field key name and decoded tensor mapping
+        from Decoder.
+
+    Returns:
+      A tuple of (image, label) tensors.
+
+    """
+    return self._parse_data(decoded_tensors)
+
+  def _parse_eval_data(
+      self, decoded_tensors: Mapping[str,
+                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
+    """Parses data for evaluation.
+
+    Args:
+      decoded_tensors: A dictionary of field key name and decoded tensor mapping
+        from Decoder.
+
+    Returns:
+      A tuple of (image, label) tensors.
+    """
+    return self._parse_data(decoded_tensors)
--- a/official/vision/beta/projects/example/example_model.py
+++ b/official/vision/beta/projects/example/example_model.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A sample model implementation.
+
+This is only a dummy example to showcase how a model is composed. It is usually
+not needed to implement a modedl from scratch. Most SoTA models can be found and
+directly used from `official/vision/beta/modeling` directory.
+"""
+
+from typing import Any, Mapping
+# Import libraries
+import tensorflow as tf
+from official.vision.beta.projects.example import example_config as example_cfg
+
+
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class ExampleModel(tf.keras.Model):
+  """A example model class.
+
+  A model is a subclass of tf.keras.Model where layers are built in the
+  constructor.
+  """
+
+  def __init__(
+      self,
+      num_classes: int,
+      input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
+          shape=[None, None, None, 3]),
+      **kwargs):
+    """Initializes the example model.
+
+    All layers are defined in the constructor, and config is recorded in the
+    `_config_dict` object for serialization.
+
+    Args:
+      num_classes: The number of classes in classification task.
+      input_specs: A `tf.keras.layers.InputSpec` spec of the input tensor.
+      **kwargs: Additional keyword arguments to be passed.
+    """
+    inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
+    outputs = tf.keras.layers.Conv2D(
+        filters=16, kernel_size=3, strides=2, padding='same', use_bias=False)(
+            inputs)
+    outputs = tf.keras.layers.Conv2D(
+        filters=32, kernel_size=3, strides=2, padding='same', use_bias=False)(
+            outputs)
+    outputs = tf.keras.layers.Conv2D(
+        filters=64, kernel_size=3, strides=2, padding='same', use_bias=False)(
+            outputs)
+    outputs = tf.keras.layers.GlobalAveragePooling2D()(outputs)
+    outputs = tf.keras.layers.Dense(1024, activation='relu')(outputs)
+    outputs = tf.keras.layers.Dense(num_classes)(outputs)
+
+    super().__init__(inputs=inputs, outputs=outputs, **kwargs)
+    self._input_specs = input_specs
+    self._config_dict = {'num_classes': num_classes, 'input_specs': input_specs}
+
+  def get_config(self) -> Mapping[str, Any]:
+    """Gets the config of this model."""
+    return self._config_dict
+
+  @classmethod
+  def from_config(cls, config, custom_objects=None):
+    """Constructs an instance of this model from input config."""
+    return cls(**config)
+
+
+def build_example_model(input_specs: tf.keras.layers.InputSpec,
+                        model_config: example_cfg.ExampleModel,
+                        **kwargs) -> tf.keras.Model:
+  """Builds and returns the example model.
+
+  This function is the main entry point to build a model. Commonly, it build a
+  model by building a backbone, decoder and head. An example of building a
+  classification model is at
+  third_party/tensorflow_models/official/vision/beta/modeling/backbones/resnet.py.
+  However, it is not mandatory for all models to have these three pieces
+  exactly. Depending on the task, model can be as simple as the example model
+  here or more complex, such as multi-head architecture.
+
+  Args:
+    input_specs: The specs of the input layer that defines input size.
+    model_config: The config containing parameters to build a model.
+    **kwargs: Additional keyword arguments to be passed.
+
+  Returns:
+    A tf.keras.Model object.
+  """
+  return ExampleModel(
+      num_classes=model_config.num_classes, input_specs=input_specs, **kwargs)
--- a/official/vision/beta/projects/example/example_task.py
+++ b/official/vision/beta/projects/example/example_task.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""An example task definition for image classification."""
+from typing import Any, List, Optional, Tuple, Sequence, Mapping
+
+import tensorflow as tf
+
+from official.common import dataset_fn
+from official.core import base_task
+from official.core import task_factory
+from official.modeling import tf_utils
+from official.vision.beta.dataloaders import input_reader_factory
+from official.vision.beta.projects.example import example_config as exp_cfg
+from official.vision.beta.projects.example import example_input
+from official.vision.beta.projects.example import example_model
+
+
+@task_factory.register_task_cls(exp_cfg.ExampleTask)
+class ExampleTask(base_task.Task):
+  """Class of an example task.
+
+  A task is a subclass of base_task.Task that defines model, input, loss, metric
+  and one training and evaluation step, etc.
+  """
+
+  def build_model(self) -> tf.keras.Model:
+    """Builds a model."""
+    input_specs = tf.keras.layers.InputSpec(shape=[None] +
+                                            self.task_config.model.input_size)
+
+    model = example_model.build_example_model(
+        input_specs=input_specs, model_config=self.task_config.model)
+    return model
+
+  def build_inputs(
+      self,
+      params: exp_cfg.ExampleDataConfig,
+      input_context: Optional[tf.distribute.InputContext] = None
+  ) -> tf.data.Dataset:
+    """Builds input.
+
+    The input from this function is a tf.data.Dataset that has gone through
+    pre-processing steps, such as augmentation, batching, shuffuling, etc.
+
+    Args:
+      params: The experiment config.
+      input_context: An optional InputContext used by input reader.
+
+    Returns:
+      A tf.data.Dataset object.
+    """
+
+    num_classes = self.task_config.model.num_classes
+    input_size = self.task_config.model.input_size
+    decoder = example_input.Decoder()
+
+    parser = example_input.Parser(
+        output_size=input_size[:2], num_classes=num_classes)
+
+    reader = input_reader_factory.input_reader_generator(
+        params,
+        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
+        decoder_fn=decoder.decode,
+        parser_fn=parser.parse_fn(params.is_training))
+
+    dataset = reader.read(input_context=input_context)
+
+    return dataset
+
+  def build_losses(self,
+                   labels: tf.Tensor,
+                   model_outputs: tf.Tensor,
+                   aux_losses: Optional[Any] = None) -> tf.Tensor:
+    """Builds losses for training and validation.
+
+    Args:
+      labels: Input groundtruth labels.
+      model_outputs: Output of the model.
+      aux_losses: The auxiliarly loss tensors, i.e. `losses` in tf.keras.Model.
+
+    Returns:
+      The total loss tensor.
+    """
+    total_loss = tf.keras.losses.sparse_categorical_crossentropy(
+        labels, model_outputs, from_logits=True)
+    total_loss = tf_utils.safe_mean(total_loss)
+
+    if aux_losses:
+      total_loss += tf.add_n(aux_losses)
+
+    return total_loss
+
+  def build_metrics(self,
+                    training: bool = True) -> Sequence[tf.keras.metrics.Metric]:
+    """Gets streaming metrics for training/validation.
+
+    This function builds and returns a list of metrics to compute during
+    training and validation. The list contains objects of subclasses of
+    tf.keras.metrics.Metric. Training and validation can have different metrics.
+
+    Args:
+      training: Whether the metric is for training or not.
+
+    Returns:
+     A list of tf.keras.metrics.Metric objects.
+    """
+    k = self.task_config.evaluation.top_k
+    metrics = [
+        tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
+        tf.keras.metrics.SparseTopKCategoricalAccuracy(
+            k=k, name='top_{}_accuracy'.format(k))
+    ]
+    return metrics
+
+  def train_step(self,
+                 inputs: Tuple[Any, Any],
+                 model: tf.keras.Model,
+                 optimizer: tf.keras.optimizers.Optimizer,
+                 metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
+    """Does forward and backward.
+
+    This example assumes input is a tuple of (features, labels), which follows
+    the output from data loader, i.e., Parser. The output from Parser is fed
+    into train_step to perform one step forward and backward pass. Other data
+    structure, such as dictionary, can also be used, as long as it is consistent
+    between output from Parser and input used here.
+
+    Args:
+      inputs: A tuple of of input tensors of (features, labels).
+      model: A tf.keras.Model instance.
+      optimizer: The optimizer for this training step.
+      metrics: A nested structure of metrics objects.
+
+    Returns:
+      A dictionary of logs.
+    """
+    features, labels = inputs
+    num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
+    with tf.GradientTape() as tape:
+      outputs = model(features, training=True)
+      # Casting output layer as float32 is necessary when mixed_precision is
+      # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
+      outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
+
+      # Computes per-replica loss.
+      loss = self.build_losses(
+          model_outputs=outputs, labels=labels, aux_losses=model.losses)
+      # Scales loss as the default gradients allreduce performs sum inside the
+      # optimizer.
+      scaled_loss = loss / num_replicas
+
+      # For mixed_precision policy, when LossScaleOptimizer is used, loss is
+      # scaled for numerical stability.
+      if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
+        scaled_loss = optimizer.get_scaled_loss(scaled_loss)
+
+    tvars = model.trainable_variables
+    grads = tape.gradient(scaled_loss, tvars)
+    # Scales back gradient before apply_gradients when LossScaleOptimizer is
+    # used.
+    if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
+      grads = optimizer.get_unscaled_gradients(grads)
+    optimizer.apply_gradients(list(zip(grads, tvars)))
+
+    logs = {self.loss: loss}
+    if metrics:
+      self.process_metrics(metrics, labels, outputs)
+    return logs
+
+  def validation_step(self,
+                      inputs: Tuple[Any, Any],
+                      model: tf.keras.Model,
+                      metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
+    """Runs validatation step.
+
+    Args:
+      inputs: A tuple of of input tensors of (features, labels).
+      model: A tf.keras.Model instance.
+      metrics: A nested structure of metrics objects.
+
+    Returns:
+      A dictionary of logs.
+    """
+    features, labels = inputs
+    outputs = self.inference_step(features, model)
+    outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
+    loss = self.build_losses(
+        model_outputs=outputs, labels=labels, aux_losses=model.losses)
+
+    logs = {self.loss: loss}
+    if metrics:
+      self.process_metrics(metrics, labels, outputs)
+    return logs
+
+  def inference_step(self, inputs: tf.Tensor, model: tf.keras.Model) -> Any:
+    """Performs the forward step. It is used in validation_step."""
+    return model(inputs, training=False)
--- a/official/vision/beta/projects/example/registry_imports.py
+++ b/official/vision/beta/projects/example/registry_imports.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""All necessary imports for registration.
+
+Custom models, task, configs, etc need to be imported to registry so they can be
+picked up by the trainer. They can be included in this file so you do not need
+to handle each file separately.
+"""
+
+# pylint: disable=unused-import
+from official.common import registry_imports
+from official.vision.beta.projects.example import example_config
+from official.vision.beta.projects.example import example_input
+from official.vision.beta.projects.example import example_model
+from official.vision.beta.projects.example import example_task
--- a/official/vision/beta/projects/example/train.py
+++ b/official/vision/beta/projects/example/train.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""TensorFlow Model Garden Vision trainer.
+
+All custom registry are imported from registry_imports. Here we use default
+trainer so we directly call train.main. If you need to customize the trainer,
+branch from `official/vision/beta/train.py` and make changes.
+"""
+from absl import app
+
+from official.common import flags as tfm_flags
+from official.vision.beta import train
+from official.vision.beta.projects.example import registry_imports  # pylint: disable=unused-import
+
+
+if __name__ == '__main__':
+  tfm_flags.define_flags()
+  app.run(train.main)
--- a/official/vision/beta/projects/movinet/configs/movinet.py
+++ b/official/vision/beta/projects/movinet/configs/movinet.py
@@ -44,6 +44,13 @@ class Movinet(hyperparams.Config):
  # 2plus1d: (2+1)D convolution with Conv2D (2D reshaping)
  # 3d_2plus1d: (2+1)D convolution with Conv3D (no 2D reshaping)
  conv_type: str = '3d'
+  # Choose from ['3d', '2d', '2plus3d']
+  # 3d: default 3D global average pooling.
+  # 2d: 2D global average pooling.
+  # 2plus3d: concatenation of 2D and 3D global average pooling.
+  se_type: str = '3d'
+  activation: str = 'swish'
+  gating_activation: str = 'sigmoid'
  stochastic_depth_drop_rate: float = 0.2
  use_external_states: bool = False