Merge branch 'tensorflow:master' into panoptic-deeplab

8b641b13 · Srihari Humbarwadi · GitHub · 7cffacfe · 357fa547 · 8b641b13
Unverified Commit 8b641b13 authored Mar 26, 2022 by Srihari Humbarwadi Committed by GitHub Mar 26, 2022
20 changed files
--- a/official/projects/volumetric_models/configs/decoders.py
+++ b/official/projects/volumetric_models/configs/decoders.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Decoders configurations."""
 import dataclasses
 from typing import Optional, Sequence

--- a/official/projects/volumetric_models/configs/semantic_segmentation_3d.py
+++ b/official/projects/volumetric_models/configs/semantic_segmentation_3d.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Semantic segmentation configuration definition."""

 import dataclasses

--- a/official/projects/volumetric_models/configs/semantic_segmentation_3d_test.py
+++ b/official/projects/volumetric_models/configs/semantic_segmentation_3d_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for semantic_segmentation."""

 # pylint: disable=unused-import

--- a/official/projects/volumetric_models/modeling/backbones/__init__.py
+++ b/official/projects/volumetric_models/modeling/backbones/__init__.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Backbones package definition."""

 from official.projects.volumetric_models.modeling.backbones.unet_3d import UNet3D
--- a/official/projects/volumetric_models/modeling/backbones/unet_3d_test.py
+++ b/official/projects/volumetric_models/modeling/backbones/unet_3d_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for 3D UNet backbone."""

 # Import libraries

--- a/official/projects/volumetric_models/modeling/decoders/__init__.py
+++ b/official/projects/volumetric_models/modeling/decoders/__init__.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Decoders package definition."""

 from official.projects.volumetric_models.modeling.decoders.unet_3d_decoder import UNet3DDecoder
--- a/official/projects/volumetric_models/modeling/decoders/unet_3d_decoder_test.py
+++ b/official/projects/volumetric_models/modeling/decoders/unet_3d_decoder_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for 3D UNet decoder."""

 # Import libraries

--- a/official/projects/volumetric_models/modeling/heads/segmentation_heads_3d_test.py
+++ b/official/projects/volumetric_models/modeling/heads/segmentation_heads_3d_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for segmentation_heads.py."""

 from absl.testing import parameterized

--- a/official/projects/volumetric_models/modeling/nn_blocks_3d_test.py
+++ b/official/projects/volumetric_models/modeling/nn_blocks_3d_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for 3D volumeric convoluion blocks."""

 # Import libraries

--- a/official/projects/volumetric_models/modeling/segmentation_model_test.py
+++ b/official/projects/volumetric_models/modeling/segmentation_model_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for segmentation network."""

 from absl.testing import parameterized

--- a/official/projects/volumetric_models/tasks/semantic_segmentation_3d.py
+++ b/official/projects/volumetric_models/tasks/semantic_segmentation_3d.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Image segmentation task definition."""
 from typing import Any, Dict, Mapping, Optional, Sequence, Union


--- a/official/projects/volumetric_models/tasks/semantic_segmentation_3d_test.py
+++ b/official/projects/volumetric_models/tasks/semantic_segmentation_3d_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3
 """Tests for semantic segmentation task."""

 # pylint: disable=unused-import

--- a/official/projects/yt8m/train_test.py
+++ b/official/projects/yt8m/train_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Lint as: python3

 import json
 import os

--- a/official/recommendation/README.md
+++ b/official/recommendation/README.md
@@ -17,7 +17,7 @@ Some abbreviations used the code base include:
  - ml-20m: MovieLens 20 million dataset

 ## Dataset
-The [MovieLens datasets](http://files.grouplens.org/datasets/movielens/) are used for model training and evaluation. Specifically, we use two datasets: **ml-1m** (short for MovieLens 1 million) and **ml-20m** (short for MovieLens 20 million).
+The [MovieLens datasets](https://files.grouplens.org/datasets/movielens/) are used for model training and evaluation. Specifically, we use two datasets: **ml-1m** (short for MovieLens 1 million) and **ml-20m** (short for MovieLens 20 million).

 ### ml-1m
 ml-1m dataset contains 1,000,209 anonymous ratings of approximately 3,706 movies made by 6,040 users who joined MovieLens in 2000. All ratings are contained in the file "ratings.dat" without header row, and are in the following format:

--- a/official/recommendation/movielens.py
+++ b/official/recommendation/movielens.py
@@ -49,7 +49,7 @@ RATINGS_FILE = "ratings.csv"
 MOVIES_FILE = "movies.csv"

 # URL to download dataset
-_DATA_URL = "http://files.grouplens.org/datasets/movielens/"
+_DATA_URL = "https://files.grouplens.org/datasets/movielens/"

 GENRE_COLUMN = "genres"
 ITEM_COLUMN = "item_id"  # movies

--- a/official/utils/docs/build_all_api_docs.py
+++ b/official/utils/docs/build_all_api_docs.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+r"""Tool to generate api_docs for tensorflow_models/official library.
+
+Example:
+
+$> pip install -U git+https://github.com/tensorflow/docs
+$> python build_nlp_api_docs \
+ --output_dir=/tmp/api_docs
+"""
+
+import pathlib
+
+from absl import app
+from absl import flags
+from absl import logging
+from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator import public_api
+
+import tensorflow_models as tfm
+from official.utils.docs import build_api_docs_lib
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('output_dir', None, 'Where to write the resulting docs to.')
+flags.DEFINE_string(
+    'code_url_prefix',
+    'https://github.com/tensorflow/models/blob/master/tensorflow_models',
+    'The url prefix for links to code.')
+
+flags.DEFINE_bool('search_hints', True,
+                  'Include metadata search hints in the generated files')
+
+flags.DEFINE_string('site_path', '/api_docs/python',
+                    'Path prefix in the _toc.yaml')
+
+
+PROJECT_SHORT_NAME = 'tfm'
+PROJECT_FULL_NAME = 'TensorFlow Official Models - Modeling Library'
+
+
+def custom_filter(path, parent, children):
+  if len(path) <= 2:
+    # Don't filter the contents of the top level `tfm.vision` package.
+    return children
+  else:
+    return public_api.explicit_package_contents_filter(path, parent, children)
+
+
+def gen_api_docs(code_url_prefix, site_path, output_dir, project_short_name,
+                 project_full_name, search_hints):
+  """Generates api docs for the tensorflow docs package."""
+  build_api_docs_lib.hide_module_model_and_layer_methods()
+  del tfm.nlp.layers.MultiHeadAttention
+  del tfm.nlp.layers.EinsumDense
+
+  url_parts = code_url_prefix.strip('/').split('/')
+  url_parts = url_parts[:url_parts.index('tensorflow_models')]
+  url_parts.append('official')
+
+  official_url_prefix = '/'.join(url_parts)
+
+  tfm_base_dir = pathlib.Path(tfm.__file__).parent
+
+  # The `layers` submodule (and others) are actually defined in the `official`
+  # package. Find the path to `official`.
+  official_base_dir = [
+      p for p in pathlib.Path(tfm.vision.layers.__file__).parents
+      if p.name == 'official'
+  ][0]
+
+  doc_generator = generate_lib.DocGenerator(
+      root_title=project_full_name,
+      py_modules=[(project_short_name, tfm)],
+      base_dir=[tfm_base_dir, official_base_dir],
+      code_url_prefix=[
+          code_url_prefix,
+          official_url_prefix,
+      ],
+      search_hints=search_hints,
+      site_path=site_path,
+      callbacks=[custom_filter],
+  )
+
+  doc_generator.build(output_dir)
+  logging.info('Output docs to: %s', output_dir)
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  gen_api_docs(
+      code_url_prefix=FLAGS.code_url_prefix,
+      site_path=FLAGS.site_path,
+      output_dir=FLAGS.output_dir,
+      project_short_name=PROJECT_SHORT_NAME,
+      project_full_name=PROJECT_FULL_NAME,
+      search_hints=FLAGS.search_hints)
+
+
+if __name__ == '__main__':
+  flags.mark_flag_as_required('output_dir')
+  app.run(main)
--- a/official/vision/beta/losses/loss_utils.py
+++ b/official/vision/beta/losses/loss_utils.py
@@ -12,31 +12,39 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-"""Losses utilities for detection models."""
+"""Tests for official.tools.build_docs."""
+
+import os
+import shutil

 import tensorflow as tf

+from official.utils.docs import build_all_api_docs
+
+
+class BuildDocsTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(BuildDocsTest, self).setUp()
+    self.workdir = self.get_temp_dir()
+    if os.path.exists(self.workdir):
+      shutil.rmtree(self.workdir)
+    os.makedirs(self.workdir)
+
+  def test_api_gen(self):
+    build_all_api_docs.gen_api_docs(
+        code_url_prefix="https://github.com/tensorflow/models/blob/master/tensorflow_models",
+        site_path="tf_modeling/api_docs/python",
+        output_dir=self.workdir,
+        project_short_name="tfm",
+        project_full_name="TensorFlow Modeling",
+        search_hints=True)
+
+    # Check that the "defined in" section is working
+    with open(os.path.join(self.workdir, "tfm.md")) as f:
+      content = f.read()
+    self.assertIn("__init__.py", content)
+

-def multi_level_flatten(multi_level_inputs, last_dim=None):
-  """Flattens a multi-level input.
-
-  Args:
-    multi_level_inputs: Ordered Dict with level to [batch, d1, ..., dm].
-    last_dim: Whether the output should be [batch_size, None], or [batch_size,
-      None, last_dim]. Defaults to `None`.
-
-  Returns:
-    Concatenated output [batch_size, None], or [batch_size, None, dm]
-  """
-  flattened_inputs = []
-  batch_size = None
-  for level in multi_level_inputs.keys():
-    single_input = multi_level_inputs[level]
-    if batch_size is None:
-      batch_size = single_input.shape[0] or tf.shape(single_input)[0]
-    if last_dim is not None:
-      flattened_input = tf.reshape(single_input, [batch_size, -1, last_dim])
-    else:
-      flattened_input = tf.reshape(single_input, [batch_size, -1])
-    flattened_inputs.append(flattened_input)
-  return tf.concat(flattened_inputs, axis=1)
+if __name__ == "__main__":
+  tf.test.main()
--- a/official/vision/beta/MODEL_GARDEN.md
+++ b/official/vision/beta/MODEL_GARDEN.md
@@ -24,10 +24,10 @@ segmentation.

 | Model        | Resolution    | Epochs  |  Top-1  |  Top-5  | Download |
 | ------------ |:-------------:|--------:|--------:|--------:|---------:|
-| ResNet-50    | 224x224       |    90    | 76.1 | 92.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
-| ResNet-50    | 224x224       |    200   | 77.1 | 93.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
-| ResNet-101   | 224x224       |    200   | 78.3 | 94.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml) |
-| ResNet-152   | 224x224       |    200   | 78.7 | 94.3 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml) |
+| ResNet-50    | 224x224       |    90    | 76.1 | 92.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
+| ResNet-50    | 224x224       |    200   | 77.1 | 93.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
+| ResNet-101   | 224x224       |    200   | 78.3 | 94.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml) |
+| ResNet-152   | 224x224       |    200   | 78.7 | 94.3 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml) |

 #### ResNet-RS models trained with various settings

@@ -40,20 +40,20 @@ classification models with features:
 depth, label smoothing and dropout.
 * New training methods including a 350-epoch schedule, cosine learning rate and
  EMA.
-* Configs are in this [directory](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification).
+* Configs are in this [directory](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification).

 | Model     | Resolution | Params (M) | Top-1 | Top-5 | Download |
 | --------- | :--------: | ---------: | ----: | ----: | --------:|
-| ResNet-RS-50 | 160x160    | 35.7    | 79.1  | 94.5  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-50-i160.tar.gz) |
-| ResNet-RS-101 | 160x160    | 63.7    | 80.2  | 94.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i160.tar.gz) |
-| ResNet-RS-101 | 192x192    | 63.7    | 81.3  | 95.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i192.tar.gz) |
-| ResNet-RS-152 | 192x192    | 86.8    | 81.9  | 95.8  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i192.tar.gz) |
-| ResNet-RS-152 | 224x224    | 86.8    | 82.5  | 96.1  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i224.tar.gz) |
-| ResNet-RS-152 | 256x256    | 86.8    | 83.1  | 96.3  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i256.tar.gz) |
-| ResNet-RS-200 | 256x256    | 93.4    | 83.5  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-200-i256.tar.gz) |
-| ResNet-RS-270 | 256x256    | 130.1    | 83.6  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-270-i256.tar.gz) |
-| ResNet-RS-350 | 256x256    |  164.3   | 83.7  | 96.7  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i256.tar.gz) |
-| ResNet-RS-350 | 320x320    | 164.3   | 84.2  | 96.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i320.tar.gz) |
+| ResNet-RS-50 | 160x160    | 35.7    | 79.1  | 94.5  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-50-i160.tar.gz) |
+| ResNet-RS-101 | 160x160    | 63.7    | 80.2  | 94.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i160.tar.gz) |
+| ResNet-RS-101 | 192x192    | 63.7    | 81.3  | 95.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i192.tar.gz) |
+| ResNet-RS-152 | 192x192    | 86.8    | 81.9  | 95.8  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i192.tar.gz) |
+| ResNet-RS-152 | 224x224    | 86.8    | 82.5  | 96.1  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i224.tar.gz) |
+| ResNet-RS-152 | 256x256    | 86.8    | 83.1  | 96.3  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i256.tar.gz) |
+| ResNet-RS-200 | 256x256    | 93.4    | 83.5  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-200-i256.tar.gz) |
+| ResNet-RS-270 | 256x256    | 130.1    | 83.6  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-270-i256.tar.gz) |
+| ResNet-RS-350 | 256x256    |  164.3   | 83.7  | 96.7  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i256.tar.gz) |
+| ResNet-RS-350 | 320x320    | 164.3   | 84.2  | 96.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i320.tar.gz) |


 #### Vision Transformer (ViT)
@@ -109,16 +109,16 @@ evaluated on [COCO](https://cocodataset.org/) val2017.

 | Backbone     | Resolution    | Epochs  | FLOPs (B)     | Params (M) |  Box AP | Download |
 | ------------ |:-------------:| -------:|--------------:|-----------:|--------:|---------:|
-| SpineNet-49  | 640x640       |    500    | 85.4| 28.5 | 44.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
-| SpineNet-96  | 1024x1024     |    500    | 265.4 | 43.0 | 48.5 |  [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
-| SpineNet-143 | 1280x1280     |    500    | 524.0 | 67.0 | 50.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+| SpineNet-49  | 640x640       |    500    | 85.4| 28.5 | 44.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+| SpineNet-96  | 1024x1024     |    500    | 265.4 | 43.0 | 48.5 |  [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+| SpineNet-143 | 1280x1280     |    500    | 524.0 | 67.0 | 50.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|

 #### Mobile-size RetinaNet (Trained from scratch):

 | Backbone    | Resolution | Epochs | FLOPs (B) | Params (M) | Box AP | Download |
 | ----------- | :--------: | -----: | --------: | ---------: | -----: | --------:|
-| MobileNetv2 | 256x256    | 600    | -         | 2.27       | 23.5   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml) |
-| Mobile SpineNet-49  | 384x384    | 600    | 1.0      | 2.32       | 28.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/retinanet/spinenet49mobile.tar.gz) |
+| MobileNetv2 | 256x256    | 600    | -         | 2.27       | 23.5   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml) |
+| Mobile SpineNet-49  | 384x384    | 600    | 1.0      | 2.32       | 28.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/retinanet/spinenet49mobile.tar.gz) |

 ### Instance Segmentation Baselines

@@ -126,19 +126,19 @@ evaluated on [COCO](https://cocodataset.org/) val2017.

 | Backbone     | Resolution    | Epochs  | FLOPs (B)  | Params (M) | Box AP | Mask AP | Download |
 | ------------ |:-------------:| -------:|-----------:|-----------:|-------:|--------:|---------:|
-| ResNet50-FPN | 640x640    | 350    | 227.7     | 46.3       | 42.3   | 37.6    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml) |
-| SpineNet-49  | 640x640       |  350    | 215.7      | 40.8       | 42.6   | 37.9    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml) |
-| SpineNet-96  | 1024x1024  | 500    | 315.0     | 55.2       | 48.1   | 42.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml) |
-| SpineNet-143 | 1280x1280  | 500    | 498.8     | 79.2       | 49.3   | 43.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml) |
+| ResNet50-FPN | 640x640    | 350    | 227.7     | 46.3       | 42.3   | 37.6    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml) |
+| SpineNet-49  | 640x640       |  350    | 215.7      | 40.8       | 42.6   | 37.9    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml) |
+| SpineNet-96  | 1024x1024  | 500    | 315.0     | 55.2       | 48.1   | 42.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml) |
+| SpineNet-143 | 1280x1280  | 500    | 498.8     | 79.2       | 49.3   | 43.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml) |


 #### Cascade RCNN-RS (Trained from scratch)

 | Backbone     | Resolution | Epochs | Params (M) | Box AP | Mask AP | Download
 ------------ | :--------: | -----: | ---------: | -----: | ------: | -------:
-| SpineNet-49  | 640x640    | 500    | 56.4       | 46.4   | 40.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_cascadercnn_tpu.yaml)|
-| SpineNet-96 | 1024x1024  | 500    | 70.8   | 50.9   | 43.8    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_cascadercnn_tpu.yaml)|
-| SpineNet-143 | 1280x1280  | 500    | 94.9       | 51.9   | 45.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_cascadercnn_tpu.yaml)|
+| SpineNet-49  | 640x640    | 500    | 56.4       | 46.4   | 40.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet49_cascadercnn_tpu.yaml)|
+| SpineNet-96 | 1024x1024  | 500    | 70.8   | 50.9   | 43.8    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet96_cascadercnn_tpu.yaml)|
+| SpineNet-143 | 1280x1280  | 500    | 94.9       | 51.9   | 45.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet143_cascadercnn_tpu.yaml)|

 ## Semantic Segmentation

@@ -171,8 +171,10 @@ evaluated on [COCO](https://cocodataset.org/) val2017.
        [Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800).
    *   ResNet-3D-RS (R3D-RS) in
        [Revisiting 3D ResNets for Video Recognition](https://arxiv.org/pdf/2109.01696.pdf).
+    *   Mobile Video Networks (MoViNets) in
+        [MoViNets: Mobile Video Networks for Efficient Video Recognition](https://arxiv.org/abs/2103.11511).

-* Training and evaluation details:
+* Training and evaluation details (SlowFast and ResNet):
  * All models are trained from scratch with vision modality (RGB) for 200
    epochs.
  * We use batch size of 1024 and cosine learning rate decay with linear warmup
@@ -184,20 +186,32 @@ evaluated on [COCO](https://cocodataset.org/) val2017.

 | Model    | Input (frame x stride) |  Top-1  |  Top-5  | Download |
 | -------- |:----------------------:|--------:|--------:|---------:|
-| SlowOnly | 8 x 8                  |  74.1   |  91.4   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
-| SlowOnly | 16 x 4                 |  75.6   |  92.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
-| R3D-50   | 32 x 2                 |  77.0   |  93.0   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
-| R3D-RS-50   | 32 x 2                 |  78.2   |  93.7   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml) |
+| SlowOnly | 8 x 8                  |  74.1   |  91.4   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
+| SlowOnly | 16 x 4                 |  75.6   |  92.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  77.0   |  93.0   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
+| R3D-RS-50   | 32 x 2                 |  78.2   |  93.7   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml) |
 | R3D-RS-101 | 32 x 2                 | 79.5  | 94.2  | -
 | R3D-RS-152 | 32 x 2                 | 79.9  | 94.3  | -
 | R3D-RS-200 | 32 x 2                 | 80.4  | 94.4  | -
 | R3D-RS-200 | 48 x 2                 | 81.0  | -     | -
+| MoViNet-A0-Base | 50 x 5            | 69.40 | 89.18 | -
+| MoViNet-A1-Base | 50 x 5            | 74.57 | 92.03 | -
+| MoViNet-A2-Base | 50 x 5            | 75.91 | 92.63 | -
+| MoViNet-A3-Base | 120 x 2           | 79.34 | 94.52 | -
+| MoViNet-A4-Base | 80 x 3            | 80.64 | 94.93 | -
+| MoViNet-A5-Base | 120 x 2           | 81.39 | 95.06 | -

 ### Kinetics-600 Action Recognition Baselines

 | Model    | Input (frame x stride) |  Top-1  |  Top-5  | Download |
 | -------- |:----------------------:|--------:|--------:|---------:|
-| SlowOnly | 8 x 8                  |  77.3   |  93.6   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
-| R3D-50   | 32 x 2                 |  79.5   |  94.8   | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
+| SlowOnly | 8 x 8                  |  77.3   |  93.6   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  79.5   |  94.8   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
 | R3D-RS-200 | 32 x 2                 | 83.1  | -     | -
 | R3D-RS-200 | 48 x 2                 | 83.8  | -     | -
+| MoViNet-A0-Base | 50 x 5            | 72.05 | 90.92 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a0_k600_8x8.yaml) |
+| MoViNet-A1-Base | 50 x 5            | 76.69 | 93.40 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a1_k600_8x8.yaml) |
+| MoViNet-A2-Base | 50 x 5            | 78.62 | 94.17 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a2_k600_8x8.yaml) |
+| MoViNet-A3-Base | 120 x 2           | 81.79 | 95.67 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a3_k600_8x8.yaml) |
+| MoViNet-A4-Base | 80 x 3            | 83.48 | 96.16 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a4_k600_8x8.yaml) |
+| MoViNet-A5-Base | 120 x 2           | 84.27 | 96.39 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a5_k600_8x8.yaml) |
--- a/official/vision/README.md
+++ b/official/vision/README.md
+# TF-Vision Model Garden
+
+⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or
+distributed by Google. The dataset is made available by third parties.
+Please review the terms and conditions made available by the third parties
+before using the data.
+
+## Table of Contents
+
+- [Introduction](#introduction)
+- [Image Classification](#image-classification)
+  * [ResNet models trained with vanilla settings](#resnet-models-trained-with-vanilla-settings)
+  * [ResNet-RS models trained with various settings](#resnet-rs-models-trained-with-various-settings)
+  * [Vision Transformer (ViT)](#vision-transformer-ViT)
+- [Object Detection and Instance Segmentation](#object-detection-and-instance-segmentation)
+  * [Common Settings and Notes](#Common-Settings-and-Notes)
+- [COCO Object Detection Baselines](#COCO-Object-Detection-Baselines)
+  * [RetinaNet (ImageNet pretrained)](#RetinaNet-ImageNet-pretrained)
+  * [RetinaNet (Trained from scratch)](#RetinaNet-Trained-from-scratch)
+  * [Mobile-size RetinaNet (Trained from scratch)](#Mobile-size-RetinaNet-Trained-from-scratch))
+- [Instance Segmentation Baselines](#Instance-Segmentation-Baselines)
+  * [Mask R-CNN (Trained from scratch)](#Mask-R-CNN-Trained-from-scratch)
+  * [Cascade RCNN-RS (Trained from scratch)](#Cascade-RCNN-RS-Trained-from-scratch)
+- [Semantic Segmentation](#semantic-segmentation)
+  * [PASCAL-VOC](#PASCAL-VOC)
+  * [CITYSCAPES](#CITYSCAPES)
+- [Video Classification](#video-classification)
+  * [Common Settings and Notes](#Common-Settings-and-Notes)
+  * [Kinetics-400 Action Recognition Baselines](#Kinetics-400-Action-Recognition-Baselines)
+  * [Kinetics-600 Action Recognition Baselines](#Kinetics-600-Action-Recognition-Baselines)
+
+## Introduction
+
+TF-Vision modeling library for computer vision provides a collection of
+baselines and checkpoints for image classification, object detection, and
+segmentation.
+
+## Image Classification
+
+### ResNet models trained with vanilla settings
+
+<details>
+
+* Models are trained from scratch with batch size 4096 and 1.6 initial learning
+  rate.
+* Linear warmup is applied for the first 5 epochs.
+* Models trained with l2 weight regularization and ReLU activation.
+
+| Model        | Resolution    | Epochs  |  Top-1  |  Top-5  | Download |
+| ------------ |:-------------:|--------:|--------:|--------:|---------:|
+| ResNet-50    | 224x224       |    90    | 76.1 | 92.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
+| ResNet-50    | 224x224       |    200   | 77.1 | 93.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) |
+| ResNet-101   | 224x224       |    200   | 78.3 | 94.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml) |
+| ResNet-152   | 224x224       |    200   | 78.7 | 94.3 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml) |
+
+</details>
+
+### ResNet-RS models trained with various settings
+
+<details>
+
+We support state-of-the-art [ResNet-RS](https://arxiv.org/abs/2103.07579) image
+classification models with features:
+
+* ResNet-RS architectural changes and Swish activation. (Note that ResNet-RS
+  adopts ReLU activation in the paper.)
+* Regularization methods including Random Augment, 4e-5 weight decay, stochastic
+depth, label smoothing and dropout.
+* New training methods including a 350-epoch schedule, cosine learning rate and
+  EMA.
+* Configs are in this [directory](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification).
+
+| Model     | Resolution | Params (M) | Top-1 | Top-5 | Download |
+| --------- | :--------: | ---------: | ----: | ----: | --------:|
+| ResNet-RS-50 | 160x160    | 35.7    | 79.1  | 94.5  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-50-i160.tar.gz) |
+| ResNet-RS-101 | 160x160    | 63.7    | 80.2  | 94.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i160.tar.gz) |
+| ResNet-RS-101 | 192x192    | 63.7    | 81.3  | 95.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-101-i192.tar.gz) |
+| ResNet-RS-152 | 192x192    | 86.8    | 81.9  | 95.8  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i192.tar.gz) |
+| ResNet-RS-152 | 224x224    | 86.8    | 82.5  | 96.1  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i224.tar.gz) |
+| ResNet-RS-152 | 256x256    | 86.8    | 83.1  | 96.3  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-152-i256.tar.gz) |
+| ResNet-RS-200 | 256x256    | 93.4    | 83.5  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-200-i256.tar.gz) |
+| ResNet-RS-270 | 256x256    | 130.1    | 83.6  | 96.6  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-270-i256.tar.gz) |
+| ResNet-RS-350 | 256x256    |  164.3   | 83.7  | 96.7  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i256.tar.gz) |
+| ResNet-RS-350 | 320x320    | 164.3   | 84.2  | 96.9  | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i320.tar.gz) |
+
+</details>
+
+### Vision Transformer (ViT)
+
+<details>
+
+We support [ViT](https://arxiv.org/abs/2010.11929) and [DEIT](https://arxiv.org/abs/2012.12877) implementations in a TF
+Vision
+[project](https://github.com/tensorflow/models/tree/master/official/projects/vit). ViT models trained under the DEIT settings:
+
+model     | resolution | Top-1 | Top-5 |
+--------- | :--------: | ----: | ----: |
+ViT-s16  | 224x224    | 79.4  | 94.7  |
+ViT-b16  | 224x224    | 81.8  | 95.8  |
+ViT-l16  | 224x224    | 82.2  | 95.8  |
+
+</details>
+
+## Object Detection and Instance Segmentation
+
+### Common Settings and Notes
+
+<details>
+
+*   We provide models adopting [ResNet-FPN](https://arxiv.org/abs/1612.03144)
+    and [SpineNet](https://arxiv.org/abs/1912.05027) backbones based on
+    detection frameworks:
+    *   [RetinaNet](https://arxiv.org/abs/1708.02002) and
+        [RetinaNet-RS](https://arxiv.org/abs/2107.00057)
+    *   [Mask R-CNN](https://arxiv.org/abs/1703.06870)
+    *   [Cascade RCNN](https://arxiv.org/abs/1712.00726) and
+        [Cascade RCNN-RS](https://arxiv.org/abs/2107.00057)
+*   Models are all trained on [COCO](https://cocodataset.org/) train2017 and
+    evaluated on [COCO](https://cocodataset.org/) val2017.
+*   Training details:
+    *   Models finetuned from [ImageNet](https://www.image-net.org/) pretrained
+        checkpoints adopt the 12 or 36 epochs schedule. Models trained from
+        scratch adopt the 350 epochs schedule.
+    *   The default training data augmentation implements horizontal flipping
+        and scale jittering with a random scale between [0.5, 2.0].
+    *   Unless noted, all models are trained with l2 weight regularization and
+        ReLU activation.
+    *   We use batch size 256 and stepwise learning rate that decays at the last
+        30 and 10 epoch.
+    *   We use square image as input by resizing the long side of an image to
+        the target size then padding the short side with zeros.
+
+</details>
+
+## COCO Object Detection Baselines
+
+### RetinaNet (ImageNet pretrained)
+
+<details>
+
+| Backbone     | Resolution    | Epochs  | FLOPs (B)     | Params (M) | Box AP | Download |
+| ------------ |:-------------:| -------:|--------------:|-----------:|-------:|---------:|
+| R50-FPN      | 640x640       |    12   | 97.0 | 34.0 | 34.3 | config|
+| R50-FPN      | 640x640       |    72   | 97.0 | 34.0 | 36.8 | config \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/retinanet/retinanet-resnet50fpn.tar.gz) |
+
+</details>
+
+### RetinaNet (Trained from scratch)
+
+<details>
+
+training features including:
+* Stochastic depth with drop rate 0.2.
+* Swish activation.
+
+| Backbone     | Resolution    | Epochs  | FLOPs (B)     | Params (M) |  Box AP | Download |
+| ------------ |:-------------:| -------:|--------------:|-----------:|--------:|---------:|
+| SpineNet-49  | 640x640       |    500    | 85.4| 28.5 | 44.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet49_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+| SpineNet-96  | 1024x1024     |    500    | 265.4 | 43.0 | 48.5 |  [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet96_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+| SpineNet-143 | 1280x1280     |    500    | 524.0 | 67.0 | 50.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet143_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)|
+
+</details>
+
+### Mobile-size RetinaNet (Trained from scratch):
+
+<details>
+
+| Backbone    | Resolution | Epochs | FLOPs (B) | Params (M) | Box AP | Download |
+| ----------- | :--------: | -----: | --------: | ---------: | -----: | --------:|
+| MobileNetv2 | 256x256    | 600    | -         | 2.27       | 23.5   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_mobilenetv2_tpu.yaml) |
+| Mobile SpineNet-49  | 384x384    | 600    | 1.0      | 2.32       | 28.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/retinanet/spinenet49mobile.tar.gz) |
+
+</details>
+
+## Instance Segmentation Baselines
+
+### Mask R-CNN (Trained from scratch)
+
+<details>
+
+| Backbone     | Resolution    | Epochs  | FLOPs (B)  | Params (M) | Box AP | Mask AP | Download |
+| ------------ |:-------------:| -------:|-----------:|-----------:|-------:|--------:|---------:|
+| ResNet50-FPN | 640x640    | 350    | 227.7     | 46.3       | 42.3   | 37.6    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml) |
+| SpineNet-49  | 640x640       |  350    | 215.7      | 40.8       | 42.6   | 37.9    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml) |
+| SpineNet-96  | 1024x1024  | 500    | 315.0     | 55.2       | 48.1   | 42.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml) |
+| SpineNet-143 | 1280x1280  | 500    | 498.8     | 79.2       | 49.3   | 43.4    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml) |
+
+</details>
+
+### Cascade RCNN-RS (Trained from scratch)
+
+<details>
+
+| Backbone     | Resolution | Epochs | Params (M) | Box AP | Mask AP | Download
+------------ | :--------: | -----: | ---------: | -----: | ------: | -------:
+| SpineNet-49  | 640x640    | 500    | 56.4       | 46.4   | 40.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet49_cascadercnn_tpu.yaml)|
+| SpineNet-96 | 1024x1024  | 500    | 70.8   | 50.9   | 43.8    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet96_cascadercnn_tpu.yaml)|
+| SpineNet-143 | 1280x1280  | 500    | 94.9       | 51.9   | 45.0    | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/maskrcnn/coco_spinenet143_cascadercnn_tpu.yaml)|
+
+</details>
+
+## Semantic Segmentation
+
+* We support [DeepLabV3](https://arxiv.org/pdf/1706.05587.pdf) and
+  [DeepLabV3+](https://arxiv.org/pdf/1802.02611.pdf) architectures, with
+  Dilated ResNet backbones.
+* Backbones are pre-trained on ImageNet.
+
+### PASCAL-VOC
+
+<details>
+
+| Model      | Backbone           | Resolution | Steps | mIoU | Download |
+| ---------- | :----------------: | :--------: | ----: | ---: | --------:|
+| DeepLabV3  | Dilated Resnet-101 | 512x512    | 30k   | 78.7 |          |
+| DeepLabV3+ | Dilated Resnet-101 | 512x512    | 30k   | 79.2 |          |
+
+</details>
+
+### CITYSCAPES
+
+<details>
+
+| Model      | Backbone           | Resolution | Steps | mIoU  | Download |
+| ---------- | :----------------: | :--------: | ----: | ----: | --------:|
+| DeepLabV3+ | Dilated Resnet-101 | 1024x2048  | 90k   | 78.79 |          |
+
+</details>
+
+## Video Classification
+
+### Common Settings and Notes
+
+<details>
+
+*   We provide models for video classification with backbones:
+    *   SlowOnly in
+        [SlowFast Networks for Video Recognition](https://arxiv.org/abs/1812.03982).
+    *   ResNet-3D (R3D) in
+        [Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800).
+    *   ResNet-3D-RS (R3D-RS) in
+        [Revisiting 3D ResNets for Video Recognition](https://arxiv.org/pdf/2109.01696.pdf).
+    *   Mobile Video Networks (MoViNets) in
+        [MoViNets: Mobile Video Networks for Efficient Video Recognition](https://arxiv.org/abs/2103.11511).
+
+* Training and evaluation details (SlowFast and ResNet):
+  * All models are trained from scratch with vision modality (RGB) for 200
+    epochs.
+  * We use batch size of 1024 and cosine learning rate decay with linear warmup
+    in first 5 epochs.
+  * We follow [SlowFast](https://arxiv.org/abs/1812.03982) to perform 30-view
+    evaluation.
+
+</details>
+
+### Kinetics-400 Action Recognition Baselines
+
+<details>
+
+| Model    | Input (frame x stride) |  Top-1  |  Top-5  | Download |
+| -------- |:----------------------:|--------:|--------:|---------:|
+| SlowOnly | 8 x 8                  |  74.1   |  91.4   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
+| SlowOnly | 16 x 4                 |  75.6   |  92.1   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  77.0   |  93.0   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
+| R3D-RS-50   | 32 x 2                 |  78.2   |  93.7   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml) |
+| R3D-RS-101 | 32 x 2                 | 79.5  | 94.2  | -
+| R3D-RS-152 | 32 x 2                 | 79.9  | 94.3  | -
+| R3D-RS-200 | 32 x 2                 | 80.4  | 94.4  | -
+| R3D-RS-200 | 48 x 2                 | 81.0  | -     | -
+| MoViNet-A0-Base | 50 x 5            | 69.40 | 89.18 | -
+| MoViNet-A1-Base | 50 x 5            | 74.57 | 92.03 | -
+| MoViNet-A2-Base | 50 x 5            | 75.91 | 92.63 | -
+| MoViNet-A3-Base | 120 x 2           | 79.34 | 94.52 | -
+| MoViNet-A4-Base | 80 x 3            | 80.64 | 94.93 | -
+| MoViNet-A5-Base | 120 x 2           | 81.39 | 95.06 | -
+
+</details>
+
+### Kinetics-600 Action Recognition Baselines
+
+<details>
+
+| Model    | Input (frame x stride) |  Top-1  |  Top-5  | Download |
+| -------- |:----------------------:|--------:|--------:|---------:|
+| SlowOnly | 8 x 8                  |  77.3   |  93.6   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
+| R3D-50   | 32 x 2                 |  79.5   |  94.8   | [config](https://github.com/tensorflow/models/blob/master/official/vision/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
+| R3D-RS-200 | 32 x 2                 | 83.1  | -     | -
+| R3D-RS-200 | 48 x 2                 | 83.8  | -     | -
+| MoViNet-A0-Base | 50 x 5            | 72.05 | 90.92 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a0_k600_8x8.yaml) |
+| MoViNet-A1-Base | 50 x 5            | 76.69 | 93.40 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a1_k600_8x8.yaml) |
+| MoViNet-A2-Base | 50 x 5            | 78.62 | 94.17 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a2_k600_8x8.yaml) |
+| MoViNet-A3-Base | 120 x 2           | 81.79 | 95.67 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a3_k600_8x8.yaml) |
+| MoViNet-A4-Base | 80 x 3            | 83.48 | 96.16 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a4_k600_8x8.yaml) |
+| MoViNet-A5-Base | 120 x 2           | 84.27 | 96.39 | [config](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/yaml/movinet_a5_k600_8x8.yaml) |
+</details>
--- a/official/vision/beta/README.md
+++ b/official/vision/beta/README.md
-This directory contains the new design of TF model garden vision framework.