merged

a04d9e0e · Vishnu Banna · 64f16d61 · bcbce005 · a04d9e0e · a04d9e0e
Commit a04d9e0e authored Jun 14, 2021 by Vishnu Banna
20 changed files
--- a/official/vision/image_classification/preprocessing.py
+++ b/official/vision/image_classification/preprocessing.py
@@ -329,7 +329,7 @@ def load_eval_image(filename: Text, image_size: int = IMAGE_SIZE) -> tf.Tensor:


 def build_eval_dataset(filenames: List[Text],
-                       labels: List[int] = None,
+                       labels: Optional[List[int]] = None,
                       image_size: int = IMAGE_SIZE,
                       batch_size: int = 1) -> tf.Tensor:
  """Builds a tf.data.Dataset from a list of filenames and labels.

--- a/orbit/__init__.py
+++ b/orbit/__init__.py
@@ -14,8 +14,10 @@

 """Defines exported symbols for the `orbit` package."""

+from orbit import actions
 from orbit import utils

+from orbit.controller import Action
 from orbit.controller import Controller

 from orbit.runner import AbstractEvaluator

--- a/orbit/actions/__init__.py
+++ b/orbit/actions/__init__.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Defines an "action" abstraction for use with `orbit.Controller`.
+
+"Actions" are simply arbitrary callables that are applied by the `Controller`
+to the output of train steps (after each inner loop of `steps_per_loop` steps)
+or an evaluation. This provides a hook mechanism, enabling things like reporting
+metrics to Vizier, model exporting, additional logging, etc.
+
+The basic `Action` abstraction (just a type alias) is defined in the
+`controller` module. This `actions` module adds a `ConditionalAction` utility
+class to make it easy to trigger actions conditionally based on reusable
+predicates, as well as a small handful of predefined conditions/actions (in
+particular, a `NewBestMetric` condition and an `ExportSavedModel` action).
+
+One example of using actions to do metric-conditional export:
+
+    new_best_metric = orbit.actions.NewBestMetric('accuracy')
+    export_action = orbit.actions.ConditionalAction(
+        condition=lambda x: x['accuracy'] > 0.9 and new_best_metric(x),
+        action=orbit.actions.ExportSavedModel(
+            model,
+            orbit.actions.ExportFileManager(
+                base_name=f'{FLAGS.model_dir}/saved_model',
+                next_id_fn=trainer.global_step.numpy),
+            signatures=model.infer))
+
+    controller = orbit.Controller(
+        strategy=strategy,
+        trainer=trainer,
+        evaluator=evaluator,
+        eval_actions=[export_action],
+        global_step=trainer.global_step,
+        steps_per_loop=FLAGS.steps_per_loop,
+        checkpoint_manager=checkpoint_manager,
+        summary_interval=1000)
+
+Note: In multi-client settings where each client runs its own `Controller`
+instance, some care should be taken in deciding which clients should run certain
+actions. Isolating actions to an individual client (say client 0) can be
+achieved using `ConditionalAction` as follows:
+
+    client_0_actions = orbit.actions.ConditionalAction(
+        condition=lambda _: client_id() == 0,
+        action=[
+            ...
+        ])
+
+In particular, the `NewBestMetric` condition may be used in multi-client
+settings if all clients are guaranteed to compute the same metric (ensuring this
+is up to client code, not Orbit). However, when saving metrics it may be helpful
+to avoid unnecessary writes by setting the `write_value` parameter to `False`
+for most clients.
+"""
+
+from orbit.actions.conditional_action import ConditionalAction
+
+from orbit.actions.export_saved_model import ExportFileManager
+from orbit.actions.export_saved_model import ExportSavedModel
+
+from orbit.actions.new_best_metric import JSONPersistedValue
+from orbit.actions.new_best_metric import NewBestMetric
--- a/orbit/actions/conditional_action.py
+++ b/orbit/actions/conditional_action.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides a `ConditionalAction` abstraction."""
+
+from typing import Any, Callable, Sequence, Union
+
+from orbit import controller
+from orbit import runner
+
+import tensorflow as tf
+
+Condition = Callable[[runner.Output], Union[bool, tf.Tensor]]
+
+
+def _as_sequence(maybe_sequence: Union[Any, Sequence[Any]]) -> Sequence[Any]:
+  if isinstance(maybe_sequence, Sequence):
+    return maybe_sequence
+  return [maybe_sequence]
+
+
+class ConditionalAction:
+  """Represents an action that is only taken when a given condition is met.
+
+  This class is itself an `Action` (a callable that can be applied to train or
+  eval outputs), but is intended to make it easier to write modular and reusable
+  conditions by decoupling "when" something whappens (the condition) from "what"
+  happens (the action).
+  """
+
+  def __init__(
+      self,
+      condition: Condition,
+      action: Union[controller.Action, Sequence[controller.Action]],
+  ):
+    """Initializes the instance.
+
+    Args:
+      condition: A callable accepting train or eval outputs and returing a bool.
+      action: The action (or optionally sequence of actions) to perform when
+        `condition` is met.
+    """
+    self.condition = condition
+    self.action = action
+
+  def __call__(self, output: runner.Output) -> None:
+    if self.condition(output):
+      for action in _as_sequence(self.action):
+        action(output)
--- a/orbit/actions/conditional_action_test.py
+++ b/orbit/actions/conditional_action_test.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for orbit.actions.conditional_action."""
+
+from orbit import actions
+
+import tensorflow as tf
+
+
+class ConditionalActionTest(tf.test.TestCase):
+
+  def test_conditional_action(self):
+    # Define a function to raise an AssertionError, since we can't in a lambda.
+    def raise_assertion(arg):
+      raise AssertionError(str(arg))
+
+    conditional_action = actions.ConditionalAction(
+        condition=lambda x: x['value'], action=raise_assertion)
+
+    conditional_action({'value': False})  # Nothing is raised.
+    with self.assertRaises(AssertionError) as ctx:
+      conditional_action({'value': True})
+      self.assertEqual(ctx.exception.message, "{'value': True}")
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/orbit/actions/export_saved_model.py
+++ b/orbit/actions/export_saved_model.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides the `ExportSavedModel` action and associated helper classes."""
+
+from typing import Callable, Optional
+
+import tensorflow as tf
+
+
+class _CounterIdFn:
+  """Implements a counter-based ID function for `ExportFileManager`."""
+
+  def __init__(self, base_name: str):
+    filenames = tf.io.gfile.glob(f'{base_name}-*')
+    max_counter = -1
+    for filename in filenames:
+      try:
+        _, file_number = filename.rsplit('-', maxsplit=1)
+        max_counter = max(max_counter, int(file_number))
+      except ValueError:
+        continue
+    self.value = max_counter + 1
+
+  def __call__(self):
+    output = self.value
+    self.value += 1
+    return output
+
+
+class ExportFileManager:
+  """Utility class that manages a group of files with a shared base name.
+
+  For actions like SavedModel exporting, there are potentially many different
+  file naming and cleanup strategies that may be desirable. This class provides
+  a basic interface allowing SavedModel export to be decoupled from these
+  details, and a default implementation that should work for many basic
+  scenarios. Users may subclass this class to alter behavior and define more
+  customized naming and cleanup strategies.
+  """
+
+  def __init__(self,
+               base_name: str,
+               max_to_keep: int = 5,
+               next_id_fn: Optional[Callable[[], int]] = None):
+    """Initializes the instance.
+
+    Args:
+      base_name: A shared base name for file names generated by this class.
+      max_to_keep: The maximum number of files matching `base_name` to keep
+        after each call to `cleanup`. The most recent (as determined by file
+        modification time) `max_to_keep` files are preserved; the rest are
+        deleted. If < 0, all files are preserved.
+      next_id_fn: An optional callable that returns integer IDs to append to
+        base name (formatted as `'{base_name}-{id}'`). The order of integers is
+        used to sort files to determine the oldest ones deleted by `clean_up`.
+        If not supplied, a default ID based on an incrementing counter is used.
+        One common alternative maybe be to use the current global step count,
+        for instance passing `next_id_fn=global_step.numpy`.
+    """
+    self._base_name = base_name
+    self._max_to_keep = max_to_keep
+    self._next_id_fn = next_id_fn or _CounterIdFn(base_name)
+
+  @property
+  def managed_files(self):
+    """Returns all files managed by this instance, in sorted order.
+
+    Returns:
+      The list of files matching the `base_name` provided when constructing this
+      `ExportFileManager` instance, sorted in increasing integer order of the
+      IDs returned by `next_id_fn`.
+    """
+
+    def id_key(name):
+      _, id_num = name.rsplit('-', maxsplit=1)
+      return int(id_num)
+
+    filenames = tf.io.gfile.glob(f'{self._base_name}-*')
+    return sorted(filenames, key=id_key)
+
+  def clean_up(self):
+    """Cleans up old files matching `{base_name}-*`.
+
+    The most recent `max_to_keep` files are preserved.
+    """
+    if self._max_to_keep < 0:
+      return
+
+    for filename in self.managed_files[:-self._max_to_keep]:
+      tf.io.gfile.rmtree(filename)
+
+  def next_name(self) -> str:
+    """Returns a new file name based on `base_name` and `next_id_fn()`."""
+    return f'{self._base_name}-{self._next_id_fn()}'
+
+
+class ExportSavedModel:
+  """Action that exports the given model as a SavedModel."""
+
+  def __init__(self,
+               model: tf.Module,
+               file_manager: ExportFileManager,
+               signatures,
+               options: Optional[tf.saved_model.SaveOptions] = None):
+    """Initializes the instance.
+
+    Args:
+      model: The model to export.
+      file_manager: An instance of `ExportFileManager` (or a subclass), that
+        provides file naming and cleanup functionality.
+      signatures: The signatures to forward to `tf.saved_model.save()`.
+      options: Optional options to forward to `tf.saved_model.save()`.
+    """
+    self.model = model
+    self.file_manager = file_manager
+    self.signatures = signatures
+    self.options = options
+
+  def __call__(self, _):
+    """Exports the SavedModel."""
+    export_dir = self.file_manager.next_name()
+    tf.saved_model.save(self.model, export_dir, self.signatures, self.options)
+    self.file_manager.clean_up()
--- a/orbit/actions/export_saved_model_test.py
+++ b/orbit/actions/export_saved_model_test.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for orbit.actions.export_saved_model."""
+
+import os
+
+from orbit import actions
+
+import tensorflow as tf
+
+
+def _id_key(name):
+  _, id_num = name.rsplit('-', maxsplit=1)
+  return int(id_num)
+
+
+def _id_sorted_file_base_names(dir_path):
+  return sorted(tf.io.gfile.listdir(dir_path), key=_id_key)
+
+
+class TestModel(tf.Module):
+
+  def __init__(self):
+    self.value = tf.Variable(0)
+
+  @tf.function(input_signature=[])
+  def __call__(self):
+    return self.value
+
+
+class ExportSavedModelTest(tf.test.TestCase):
+
+  def test_export_file_manager_default_ids(self):
+    directory = self.create_tempdir()
+    base_name = os.path.join(directory.full_path, 'basename')
+    manager = actions.ExportFileManager(base_name, max_to_keep=3)
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 0)
+    directory.create_file(manager.next_name())
+    manager.clean_up()  # Shouldn't do anything...
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 1)
+    directory.create_file(manager.next_name())
+    manager.clean_up()  # Shouldn't do anything...
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
+    directory.create_file(manager.next_name())
+    manager.clean_up()  # Shouldn't do anything...
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 3)
+    directory.create_file(manager.next_name())
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 4)
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path),
+        ['basename-0', 'basename-1', 'basename-2', 'basename-3'])
+    manager.clean_up()  # Should delete file with lowest ID.
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path),
+        ['basename-1', 'basename-2', 'basename-3'])
+    manager = actions.ExportFileManager(base_name, max_to_keep=3)
+    self.assertEqual(os.path.basename(manager.next_name()), 'basename-4')
+
+  def test_export_file_manager_custom_ids(self):
+    directory = self.create_tempdir()
+    base_name = os.path.join(directory.full_path, 'basename')
+
+    id_num = 0
+
+    def next_id():
+      return id_num
+
+    manager = actions.ExportFileManager(
+        base_name, max_to_keep=2, next_id_fn=next_id)
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 0)
+    id_num = 30
+    directory.create_file(manager.next_name())
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 1)
+    manager.clean_up()  # Shouldn't do anything...
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path), ['basename-30'])
+    id_num = 200
+    directory.create_file(manager.next_name())
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
+    manager.clean_up()  # Shouldn't do anything...
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path),
+        ['basename-30', 'basename-200'])
+    id_num = 1000
+    directory.create_file(manager.next_name())
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 3)
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path),
+        ['basename-30', 'basename-200', 'basename-1000'])
+    manager.clean_up()  # Should delete file with lowest ID.
+    self.assertLen(tf.io.gfile.listdir(directory.full_path), 2)
+    self.assertEqual(
+        _id_sorted_file_base_names(directory.full_path),
+        ['basename-200', 'basename-1000'])
+
+  def test_export_saved_model(self):
+    directory = self.create_tempdir()
+    base_name = os.path.join(directory.full_path, 'basename')
+    file_manager = actions.ExportFileManager(base_name, max_to_keep=2)
+    model = TestModel()
+    export_action = actions.ExportSavedModel(
+        model, file_manager=file_manager, signatures=model.__call__)
+
+    model.value.assign(3)
+    self.assertEqual(model(), 3)
+    self.assertEmpty(file_manager.managed_files)
+    export_action({})
+    self.assertLen(file_manager.managed_files, 1)
+    reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
+    self.assertEqual(reloaded_model(), 3)
+
+    model.value.assign(5)
+    self.assertEqual(model(), 5)
+    export_action({})
+    self.assertLen(file_manager.managed_files, 2)
+    reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
+    self.assertEqual(reloaded_model(), 5)
+
+    model.value.assign(7)
+    self.assertEqual(model(), 7)
+    export_action({})
+    self.assertLen(file_manager.managed_files, 2)  # Still 2, due to clean up.
+    reloaded_model = tf.saved_model.load(file_manager.managed_files[-1])
+    self.assertEqual(reloaded_model(), 7)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/orbit/actions/new_best_metric.py
+++ b/orbit/actions/new_best_metric.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides the `NewBestMetric` condition and associated helper classes."""
+
+import json
+import os
+import sys
+from typing import Any, Callable, Optional, Union
+import uuid
+
+from orbit import runner
+from orbit import utils
+
+import tensorflow as tf
+
+MetricFn = Callable[[runner.Output], Union[float, tf.Tensor]]
+
+
+class NewBestMetric:
+  """Condition that is satisfied when a new best metric is achieved.
+
+  This class keeps track of the best metric value seen so far, optionally in a
+  persistent (preemption-safe) way.
+
+  Two methods are provided, which each satisfy the `Action` protocol: `test` for
+  only testing whether a new best metric is achieved by a given train/eval
+  output, and `commit`, which both tests and records the new best metric value
+  if it is achieved. These separate methods enable the same `NewBestMetric`
+  instance to be reused as a condition multiple times, and can also provide
+  additional preemption/failure safety. For example, to avoid updating the best
+  metric if a model export fails or is pre-empted:
+
+      new_best_metric = orbit.actions.NewBestMetric(
+        'accuracy', filename='/model/dir/best_metric')
+      action = orbit.actions.ConditionalAction(
+          condition=new_best_metric.test,
+          action=[
+            orbit.actions.ExportSavedModel(...),
+            new_best_metric.commit
+          ])
+
+  The default `__call__` implementation is equivalent to `commit`.
+
+  This class is safe to use in multi-client settings if all clients can be
+  guaranteed to compute the same metric. However when saving metrics it may be
+  helpful to avoid unnecessary writes by setting the `write_value` parameter to
+  `False` for most clients.
+
+  Attributes:
+    metric: The metric passed to __init__ (may be a string key or a callable
+      that can be applied to train/eval output).
+    higher_is_better: Whether higher metric values are better.
+  """
+
+  def __init__(self,
+               metric: Union[str, MetricFn],
+               higher_is_better: bool = True,
+               filename: Optional[str] = None,
+               write_metric=True):
+    """Initializes the instance.
+
+    Args:
+      metric: Either a string key name to use to look up a metric (assuming the
+        train/eval output is a dictionary), or a callable that accepts the
+        train/eval output and returns a metric value.
+      higher_is_better: Whether higher metric values are better. If `True`, a
+        new best metric is achieved when the metric value is strictly greater
+        than the previous best metric. If `False`, a new best metric is achieved
+        when the metric value is strictly less than the previous best metric.
+      filename: A filename to use for storage of the best metric value seen so
+        far, to allow peristence of the value across preemptions. If `None`
+        (default), values aren't persisted.
+      write_metric: If `filename` is set, this controls whether this instance
+        will write new best metric values to the file, or just read from the
+        file to obtain the initial value. Setting this to `False` for most
+        clients in some multi-client setups can avoid unnecessary file writes.
+        Has no effect if `filename` is `None`.
+    """
+    self.metric = metric
+    self.higher_is_better = higher_is_better
+    float_max = sys.float_info.max
+    self._best_value = JSONPersistedValue(
+        initial_value=-float_max if higher_is_better else float_max,
+        filename=filename,
+        write_value=write_metric)
+
+  def __call__(self, output: runner.Output) -> bool:
+    """Tests `output` and updates the current best value if necessary.
+
+    This is equivalent to `commit` below.
+
+    Args:
+      output: The train or eval output to test.
+
+    Returns:
+      `True` if `output` contains a new best metric value, `False` otherwise.
+    """
+    return self.commit(output)
+
+  def metric_value(self, output: runner.Output) -> float:
+    """Computes the metric value for the given `output`."""
+    if callable(self.metric):
+      value = self.metric(output)
+    else:
+      value = output[self.metric]
+    return float(utils.get_value(value))
+
+  @property
+  def best_value(self) -> float:
+    """Returns the best metric value seen so far."""
+    return self._best_value.read()
+
+  def test(self, output: runner.Output) -> bool:
+    """Tests `output` to see if it contains a new best metric value.
+
+    If `output` does contain a new best metric value, this method does *not*
+    save it (i.e., calling this method multiple times in a row with the same
+    `output` will continue to return `True`).
+
+    Args:
+      output: The train or eval output to test.
+
+    Returns:
+      `True` if `output` contains a new best metric value, `False` otherwise.
+    """
+    metric_value = self.metric_value(output)
+    if self.higher_is_better:
+      if metric_value > self.best_value:
+        return True
+    else:  # Lower is better.
+      if metric_value < self.best_value:
+        return True
+    return False
+
+  def commit(self, output: runner.Output) -> bool:
+    """Tests `output` and updates the current best value if necessary.
+
+    Unlike `test` above, if `output` does contain a new best metric value, this
+    method *does* save it (i.e., subsequent calls to this method with the same
+    `output` will return `False`).
+
+    Args:
+      output: The train or eval output to test.
+
+    Returns:
+      `True` if `output` contains a new best metric value, `False` otherwise.
+    """
+
+    if self.test(output):
+      self._best_value.write(self.metric_value(output))
+      return True
+    return False
+
+
+class JSONPersistedValue:
+  """Represents a value that is persisted via a file-based backing store.
+
+  The value must be JSON-serializable. Each time the value is updated, it will
+  be written to the backing file. It is only read from the file at
+  initialization.
+  """
+
+  def __init__(self,
+               initial_value: Any,
+               filename: str,
+               write_value: bool = True):
+    """Initializes the instance.
+
+    Args:
+      initial_value: The initial value to use if no backing file exists or was
+        given. This must be a JSON-serializable value (possibly nested
+        combination of lists, dicts, and primitive values).
+      filename: The path to use for persistent storage of the value. This may be
+        `None`, in which case the value is not stable across preemptions.
+      write_value: If `True`, new values will be written to `filename` on calls
+        to `write()`. If `False`, `filename` is only read once to restore any
+        persisted value, and new values will not be written to it. This can be
+        useful in certain multi-client settings to avoid race conditions or
+        excessive file writes. If `filename` is `None`, this parameter has no
+        effect.
+    """
+    self._value = None
+    self._filename = filename
+    self._write_value = write_value
+
+    if self._filename is not None:
+      if tf.io.gfile.exists(self._filename):
+        if tf.io.gfile.stat(self._filename).length > 0:
+          with tf.io.gfile.GFile(self._filename, 'r') as f:
+            self._value = json.load(f)
+      elif self._write_value:
+        tf.io.gfile.makedirs(os.path.dirname(self._filename))
+
+    if self._value is None:
+      self.write(initial_value)
+
+  def read(self):
+    """Returns the value."""
+    return self._value
+
+  def write(self, value):
+    """Writes the value, updating the backing store if one was provided."""
+    self._value = value
+    if self._filename is not None and self._write_value:
+      # To achieve atomic writes, we first write to a temporary file, and then
+      # rename it to `self._filename`.
+      tmp_filename = f'{self._filename}.tmp.{uuid.uuid4().hex}'
+      with tf.io.gfile.GFile(tmp_filename, 'w') as f:
+        json.dump(self._value, f)
+      tf.io.gfile.rename(tmp_filename, self._filename, overwrite=True)
--- a/orbit/actions/new_best_metric_test.py
+++ b/orbit/actions/new_best_metric_test.py
+# Copyright 2021 The Orbit Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for orbit.actions.new_best_metric."""
+
+import os
+
+from orbit import actions
+
+import tensorflow as tf
+
+
+class NewBestMetricTest(tf.test.TestCase):
+
+  def test_new_best_metric_higher_is_better(self):
+    new_best_metric = actions.NewBestMetric(
+        lambda x: x['value'], higher_is_better=True)
+    self.assertTrue(new_best_metric.test({'value': 0.0}))
+    self.assertTrue(new_best_metric.commit({'value': 0.0}))
+    self.assertFalse(new_best_metric.test({'value': 0.0}))
+    self.assertTrue(new_best_metric.test({'value': 1.0}))
+
+  def test_new_best_metric_lower_is_better(self):
+    new_best_metric = actions.NewBestMetric('value', higher_is_better=False)
+    self.assertTrue(new_best_metric.test({'value': 0.0}))
+    self.assertTrue(new_best_metric.commit({'value': 0.0}))
+    self.assertFalse(new_best_metric.test({'value': 0.0}))
+    self.assertTrue(new_best_metric.test({'value': -1.0}))
+
+  def test_new_best_metric_persistence(self):
+    backing_file = self.create_tempfile()
+    new_best_metric = actions.NewBestMetric(
+        'value',
+        higher_is_better=True,
+        filename=backing_file.full_path,
+        write_metric=False)
+    self.assertTrue(new_best_metric.test({'value': 0.0}))
+    self.assertTrue(new_best_metric.commit({'value': 0.0}))
+    self.assertFalse(new_best_metric.test({'value': 0.0}))
+    new_best_metric = actions.NewBestMetric(
+        'value', higher_is_better=True, filename=backing_file.full_path)
+    self.assertLess(new_best_metric.best_value, 0.0)
+    self.assertTrue(new_best_metric.commit({'value': 5.0}))
+    self.assertEqual(new_best_metric.best_value, 5.0)
+    new_best_metric = actions.NewBestMetric(
+        'value', higher_is_better=True, filename=backing_file.full_path)
+    self.assertEqual(new_best_metric.best_value, 5.0)
+
+  def test_json_persisted_value(self):
+    tempfile = self.create_tempfile().full_path
+    value = {'a': 1, 'b': 2}
+    persisted_value = actions.JSONPersistedValue(value, tempfile)
+    # The inital value is used since tempfile is empty.
+    self.assertEqual(persisted_value.read(), value)
+    persisted_value = actions.JSONPersistedValue('ignored', tempfile)
+    # Initial value of 'ignored' is ignored, since there's a value in tempfile.
+    self.assertEqual(persisted_value.read(), value)
+    value = [1, 2, 3]
+    persisted_value.write(value)
+    # Now that a new value is written, it gets read on initialization.
+    persisted_value = actions.JSONPersistedValue(['also ignored'], tempfile)
+    self.assertEqual(persisted_value.read(), value)
+    # Writes can be disabled.
+    persisted_value = actions.JSONPersistedValue(
+        'ignored', tempfile, write_value=False)
+    self.assertEqual(persisted_value.read(), value)
+    persisted_value.write("won't get persisted")
+    persisted_value = actions.JSONPersistedValue(
+        'ignored', tempfile, write_value=False)
+    self.assertEqual(persisted_value.read(), value)
+
+  def test_json_persisted_value_create_dirs(self):
+    tempfile = os.path.join(self.create_tempdir().full_path, 'subdir/value')
+    value = {'a': 1, 'b': 2}
+    # The directory is not created if write_value=False.
+    actions.JSONPersistedValue(value, tempfile, write_value=False)
+    self.assertFalse(tf.io.gfile.exists(os.path.dirname(tempfile)))
+    actions.JSONPersistedValue(value, tempfile)
+    self.assertTrue(tf.io.gfile.exists(tempfile))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/orbit/controller.py
+++ b/orbit/controller.py
@@ -17,7 +17,7 @@
 import pprint
 import time

-from typing import Callable, Optional, Union
+from typing import Callable, List, Optional, Union

 from absl import logging

@@ -46,6 +46,9 @@ def _format_output(output, indent=4):
  return "\n" + "\n".join(lines)


+Action = Callable[[runner.Output], None]
+
+
 class Controller:
  """Class that controls the outer loop of model training and evaluation.

@@ -53,10 +56,9 @@ class Controller:
  loops are implemented by users in the form of `AbstractTrainer` and
  `AbstractEvaluator` subclasses, and define how to run a given number of
  training or evaluation steps. The outer loop is provided by this `Controller`,
-  and interleaves calls to the user provided inner loops with additional actions
-  such as saving checkpoints, running evaluations, and writing summaries
-  (depending on the arguments passed to `Controller.__init__` and the method
-  being called).
+  and interleaves calls to the user-provided inner loops with additional actions
+  such as saving checkpoints, running evaluations, writing summaries, as well as
+  (optionally) user provided `Action`s (see below).

  There are four top-level "outer loops" provided:

@@ -70,6 +72,15 @@ class Controller:
  training and evaluation use cases, the internal details and method
  implementations are also intended to be simple enough to make subclassing or
  other custom outer loop implementations easy to achieve.
+
+  Some additional customization can be achieved by supplying `train_actions` or
+  `eval_actions` when constructing the `Controller`. These are just lists of
+  arbitrary callables that are applied by the `Controller` to the output of
+  train steps (after each inner loop of `steps_per_loop` steps) or an
+  evaluation. This provides a hook mechanism, enabling things like reporting
+  metrics to Vizier, model exporting, additional logging, etc. See the
+  `orbit.actions` package for a small handful of predefined actions and some
+  utility classes that may be useful in defining your own.
  """

  def __init__(
@@ -79,6 +90,9 @@ class Controller:
      trainer: Optional[runner.AbstractTrainer] = None,
      evaluator: Optional[runner.AbstractEvaluator] = None,
      strategy: Optional[tf.distribute.Strategy] = None,
+      # Actions
+      train_actions: Optional[List[Action]] = None,
+      eval_actions: Optional[List[Action]] = None,
      # Train related
      steps_per_loop: Optional[int] = None,
      checkpoint_manager: Optional[tf.train.CheckpointManager] = None,
@@ -86,7 +100,8 @@ class Controller:
      summary_interval: Optional[int] = None,
      summary_dir: Optional[str] = None,
      # Evaluation related
-      eval_summary_dir: Optional[str] = None):
+      eval_summary_dir: Optional[str] = None,
+  ):
    """Initializes a `Controller` instance.

    Note that if `checkpoint_manager` is provided and there are checkpoints in
@@ -110,6 +125,12 @@ class Controller:
      strategy: An instance of `tf.distribute.Strategy`. If not provided, the
        strategy will be initialized from the current in-scope strategy using
        `tf.distribute.get_strategy()`.
+      train_actions: An optional list of `orbit.Action`s to call after each
+        block of `steps_per_loop` training steps are run. These will be called
+        with the output of `trainer.train`.
+      eval_actions: An optional list of `orbit.Action`s to call after each
+        evaluation. These will be called with the output of
+        `evaluator.evaluate`.
      steps_per_loop: The number of steps to run in each inner loop of training
        (passed as the `num_steps` parameter of `trainer.train`).
      checkpoint_manager: An instance of `tf.train.CheckpointManager`. If
@@ -138,6 +159,7 @@ class Controller:
    """
    if trainer is None and evaluator is None:
      raise ValueError("`trainer` and `evaluator` should not both be `None`.")
+
    if trainer is not None:
      if steps_per_loop is None:
        raise ValueError(
@@ -163,6 +185,9 @@ class Controller:

    self.strategy = strategy or tf.distribute.get_strategy()

+    self.train_actions = train_actions or []
+    self.eval_actions = eval_actions or []
+
    self.global_step = global_step
    self.checkpoint_manager = checkpoint_manager

@@ -255,9 +280,13 @@ class Controller:
    with self.eval_summary_manager.summary_writer().as_default():
      steps_tensor = tf.convert_to_tensor(steps, dtype=tf.int32)
      eval_output = self.evaluator.evaluate(steps_tensor)
-    eval_output = tf.nest.map_structure(utils.get_value, eval_output or {})
    elapsed = time.time() - start

+    eval_output = eval_output or {}
+    for action in self.eval_actions:
+      action(eval_output)
+    eval_output = tf.nest.map_structure(utils.get_value, eval_output)
+
    _log(f" eval | step: {current_step: 6d} | "
         f"eval time: {elapsed: 6.1f} sec | "
         f"output: {_format_output(eval_output)}")
@@ -338,7 +367,7 @@ class Controller:
      self.restore_checkpoint(checkpoint_path)
      self.evaluate(steps)

-  def restore_checkpoint(self, checkpoint_path: str = None):
+  def restore_checkpoint(self, checkpoint_path: Optional[str] = None):
    """Restores the model from a checkpoint.

    Args:
@@ -408,7 +437,6 @@ class Controller:
      with tf.summary.record_if(should_record):
        num_steps_tensor = tf.convert_to_tensor(num_steps, dtype=tf.int32)
        train_output = self.trainer.train(num_steps_tensor)
-    train_output = tf.nest.map_structure(utils.get_value, train_output or {})

    # Verify that global_step was updated properly, then update current_step.
    expected_step = current_step + num_steps
@@ -420,6 +448,11 @@ class Controller:
      logging.warning(message)
      return

+    train_output = train_output or {}
+    for action in self.train_actions:
+      action(train_output)
+    train_output = tf.nest.map_structure(utils.get_value, train_output)
+
    current_step = expected_step
    steps_per_second = self.step_timer.steps_per_second()
    _log(f"train | step: {current_step: 6d} | "

--- a/orbit/controller_test.py
+++ b/orbit/controller_test.py
@@ -583,7 +583,7 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase):
    test_runner = TestRunner()

    class EarlyStopController(controller.Controller):
-      """A subclass of Controller supports early stopping."""
+      """A subclass of Controller that supports early stopping."""

      def train_and_evaluate(self,
                             train_steps: int = None,
@@ -724,5 +724,52 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase):
        summaries_with_matching_keyword(
            "accuracy", os.path.join(self.model_dir, "dataset2")))

+  def test_actions(self):
+    test_runner = TestRunner()
+    checkpoint = tf.train.Checkpoint(
+        model=test_runner.model, optimizer=test_runner.optimizer)
+    checkpoint_manager = tf.train.CheckpointManager(
+        checkpoint,
+        self.model_dir,
+        max_to_keep=None,
+        step_counter=test_runner.global_step,
+        checkpoint_interval=10)
+
+    class OutputRecorderAction:
+      """Simple `Action` that just saves the outputs passed to `__call__`."""
+
+      def __init__(self):
+        self.outputs = []
+
+      def __call__(self, output):
+        self.outputs.append(output)
+
+    train_output_recorder = OutputRecorderAction()
+    eval_output_recorder = OutputRecorderAction()
+
+    test_controller = controller.Controller(
+        trainer=test_runner,
+        evaluator=test_runner,
+        train_actions=[train_output_recorder],
+        eval_actions=[eval_output_recorder],
+        global_step=test_runner.global_step,
+        steps_per_loop=2,
+        summary_dir=os.path.join(self.model_dir, "summaries/train"),
+        checkpoint_manager=checkpoint_manager,
+        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
+    test_controller.train_and_evaluate(
+        train_steps=10, eval_steps=2, eval_interval=6)
+
+    self.assertLen(train_output_recorder.outputs, 5)
+    for output in train_output_recorder.outputs:
+      self.assertIn("loss", output)
+      self.assertGreaterEqual(output["loss"], 0)
+
+    self.assertLen(eval_output_recorder.outputs, 2)
+    for output in eval_output_recorder.outputs:
+      self.assertIn("eval_loss", output)
+      self.assertGreaterEqual(output["eval_loss"], 0)
+
+
 if __name__ == "__main__":
  tf.test.main()
--- a/orbit/standard_runner.py
+++ b/orbit/standard_runner.py
@@ -83,7 +83,9 @@ class StandardTrainer(runner.AbstractTrainer, metaclass=abc.ABCMeta):
  `tf.function`, as determined by the `options` passed to `__init__`.
  """

-  def __init__(self, train_dataset, options: StandardTrainerOptions = None):
+  def __init__(self,
+               train_dataset,
+               options: Optional[StandardTrainerOptions] = None):
    """Initializes the `StandardTrainer` instance.

    Args:
@@ -256,7 +258,9 @@ class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta):
  is recommended in this case.
  """

-  def __init__(self, eval_dataset, options: StandardEvaluatorOptions = None):
+  def __init__(self,
+               eval_dataset,
+               options: Optional[StandardEvaluatorOptions] = None):
    """Initializes the `StandardEvaluator` instance.

    Args:
@@ -403,7 +407,7 @@ class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta):
    pass

  def eval_reduce(self,
-                  state: Any = None,
+                  state: Optional[Any] = None,
                  step_outputs: Optional[runner.Output] = None) -> Any:
    """A function to perform per-step reduction on the evaluation outputs.


--- a/research/audioset/vggish/README.md
+++ b/research/audioset/vggish/README.md
@@ -170,8 +170,7 @@ the postprocessor can be run after inference.
 If you don't need to use the released embeddings or YouTube-8M, then you could
 skip postprocessing and use raw embeddings.

-A [Colab](https://colab.research.google.com/)
-showing how to download the model and calculate the embeddings on your
+A Colab showing how to download the model and calculate the embeddings on your
 own sound data is available here:
-[AudioSet Embedding Colab](https://colab.research.google.com/drive/1TbX92UL9sYWbdwdGE0rJ9owmezB-Rl1C).
+[VGGish Embedding Colab](https://colab.research.google.com/drive/1E3CaPAqCai9P9QhJ3WYPNCVmrJU4lAhF).

--- a/research/delf/delf/python/training/model/delf_model.py
+++ b/research/delf/delf/python/training/model/delf_model.py
@@ -35,6 +35,8 @@ class AttentionModel(tf.keras.Model):
  Uses two [kernel_size x kernel_size] convolutions and softplus as activation
  to compute an attention map with the same resolution as the featuremap.
  Features l2-normalized and aggregated using attention probabilites as weights.
+  The features (targets) to be aggregated can be the input featuremap, or a
+  different one with the same resolution.
  """

  def __init__(self, kernel_size=1, decay=_DECAY, name='attention'):
@@ -65,7 +67,7 @@ class AttentionModel(tf.keras.Model):
        name='attn_conv2')
    self.activation_layer = layers.Activation('softplus')

-  def call(self, inputs, training=True):
+  def call(self, inputs, targets=None, training=True):
    x = self.conv1(inputs)
    x = self.bn_conv1(x, training=training)
    x = tf.nn.relu(x)
@@ -73,9 +75,13 @@ class AttentionModel(tf.keras.Model):
    score = self.conv2(x)
    prob = self.activation_layer(score)

+    # Aggregate inputs if targets is None.
+    if targets is None:
+      targets = inputs
+
    # L2-normalize the featuremap before pooling.
-    inputs = tf.nn.l2_normalize(inputs, axis=-1)
-    feat = tf.reduce_mean(tf.multiply(inputs, prob), [1, 2], keepdims=False)
+    targets = tf.nn.l2_normalize(targets, axis=-1)
+    feat = tf.reduce_mean(tf.multiply(targets, prob), [1, 2], keepdims=False)

    return feat, prob, score

@@ -208,7 +214,9 @@ class Delf(tf.keras.Model):
    block3 = tf.stop_gradient(block3)
    if self._use_dim_reduction:
      (dim_expanded_features, dim_reduced_features) = self.autoencoder(block3)
-      attn_prelogits, attn_scores, _ = self.attention(dim_expanded_features,
+      attn_prelogits, attn_scores, _ = self.attention(
+          block3,
+          targets=dim_expanded_features,
          training=training)
    else:
      attn_prelogits, attn_scores, _ = self.attention(block3, training=training)

--- a/research/delf/delf/python/training/model/delg_model_test.py
+++ b/research/delf/delf/python/training/model/delg_model_test.py
+# Lint as: python3
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the DELG model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import tensorflow as tf
+
+from delf.python.training.model import delg_model
+
+
+class DelgTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('block3_stridesTrue', True),
+      ('block3_stridesFalse', False),
+  )
+  def test_forward_pass(self, block3_strides):
+    image_size = 321
+    num_classes = 1000
+    batch_size = 2
+    input_shape = (batch_size, image_size, image_size, 3)
+    local_feature_dim = 64
+    feature_map_size = image_size // 16  # reduction factor for resnet50.
+    if block3_strides:
+      feature_map_size //= 2
+
+    model = delg_model.Delg(block3_strides=block3_strides,
+                            use_dim_reduction=True,
+                            reduced_dimension=local_feature_dim)
+    model.init_classifiers(num_classes)
+
+    images = tf.random.uniform(input_shape, minval=-1.0, maxval=1.0, seed=0)
+
+    # Run a complete forward pass of the model.
+    global_feature, attn_scores, local_features = model.build_call(images)
+
+    self.assertAllEqual(global_feature.shape, (batch_size, 2048))
+    self.assertAllEqual(
+        attn_scores.shape,
+        (batch_size, feature_map_size, feature_map_size, 1))
+    self.assertAllEqual(
+        local_features.shape,
+        (batch_size, feature_map_size, feature_map_size, local_feature_dim))
+
+  @parameterized.named_parameters(
+      ('block3_stridesTrue', True),
+      ('block3_stridesFalse', False),
+  )
+  def test_build_model(self, block3_strides):
+    image_size = 321
+    num_classes = 1000
+    batch_size = 2
+    input_shape = (batch_size, image_size, image_size, 3)
+
+    model = delg_model.Delg(
+        block3_strides=block3_strides,
+        use_dim_reduction=True)
+    model.init_classifiers(num_classes)
+
+    images = tf.random.uniform(input_shape, minval=-1.0, maxval=1.0, seed=0)
+    labels = tf.random.uniform((batch_size,),
+                               minval=0,
+                               maxval=model.num_classes - 1,
+                               dtype=tf.int64)
+    blocks = {}
+
+    desc_prelogits = model.backbone(
+        images, intermediates_dict=blocks, training=False)
+    desc_logits = model.desc_classification(desc_prelogits, labels)
+    self.assertAllEqual(desc_prelogits.shape, (batch_size, 2048))
+    self.assertAllEqual(desc_logits.shape, (batch_size, num_classes))
+
+    features = blocks['block3']
+    attn_prelogits, _, _ = model.attention(features)
+    attn_logits = model.attn_classification(attn_prelogits)
+    self.assertAllEqual(attn_prelogits.shape, (batch_size, 1024))
+    self.assertAllEqual(attn_logits.shape, (batch_size, num_classes))
+
+  @parameterized.named_parameters(
+      ('block3_stridesTrue', True),
+      ('block3_stridesFalse', False),
+  )
+  def test_train_step(self, block3_strides):
+    image_size = 321
+    num_classes = 1000
+    batch_size = 2
+    clip_val = 10.0
+    input_shape = (batch_size, image_size, image_size, 3)
+
+    model = delg_model.Delg(
+        block3_strides=block3_strides,
+        use_dim_reduction=True)
+    model.init_classifiers(num_classes)
+
+    optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)
+
+    images = tf.random.uniform(input_shape, minval=0.0, maxval=1.0, seed=0)
+    labels = tf.random.uniform((batch_size,),
+                               minval=0,
+                               maxval=model.num_classes - 1,
+                               dtype=tf.int64)
+
+    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
+        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
+
+    def compute_loss(labels, predictions):
+      per_example_loss = loss_object(labels, predictions)
+      return tf.nn.compute_average_loss(
+          per_example_loss, global_batch_size=batch_size)
+
+    with tf.GradientTape() as gradient_tape:
+      (desc_prelogits, attn_prelogits, _, backbone_blocks,
+       dim_expanded_features, _) = model.global_and_local_forward_pass(images)
+      # Calculate global loss by applying the descriptor classifier.
+      desc_logits = model.desc_classification(desc_prelogits, labels)
+      desc_loss = compute_loss(labels, desc_logits)
+      # Calculate attention loss by applying the attention block classifier.
+      attn_logits = model.attn_classification(attn_prelogits)
+      attn_loss = compute_loss(labels, attn_logits)
+      # Calculate reconstruction loss between the attention prelogits and the
+      # backbone.
+      block3 = tf.stop_gradient(backbone_blocks['block3'])
+      reconstruction_loss = tf.math.reduce_mean(
+          tf.keras.losses.MSE(block3, dim_expanded_features))
+      # Cumulate global loss and attention loss and backpropagate through the
+      # descriptor layer and attention layer together.
+      total_loss = desc_loss + attn_loss + reconstruction_loss
+    gradients = gradient_tape.gradient(total_loss, model.trainable_weights)
+    clipped, _ = tf.clip_by_global_norm(gradients, clip_norm=clip_val)
+    optimizer.apply_gradients(zip(clipped, model.trainable_weights))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -926,11 +926,27 @@ def object_detection_proto_to_params(od_config):
      losses_pb2.WeightedSigmoidClassificationLoss())
  loss.localization_loss.CopyFrom(od_config.localization_loss)
  _, localization_loss, _, _, _, _, _ = (losses_builder.build(loss))
+  if od_config.HasField('scale_head_params'):
+    scale_head_num_filters = list(od_config.scale_head_params.num_filters)
+    scale_head_kernel_sizes = list(od_config.scale_head_params.kernel_sizes)
+  else:
+    scale_head_num_filters = [256]
+    scale_head_kernel_sizes = [3]
+  if od_config.HasField('offset_head_params'):
+    offset_head_num_filters = list(od_config.offset_head_params.num_filters)
+    offset_head_kernel_sizes = list(od_config.offset_head_params.kernel_sizes)
+  else:
+    offset_head_num_filters = [256]
+    offset_head_kernel_sizes = [3]
  return center_net_meta_arch.ObjectDetectionParams(
      localization_loss=localization_loss,
      scale_loss_weight=od_config.scale_loss_weight,
      offset_loss_weight=od_config.offset_loss_weight,
-      task_loss_weight=od_config.task_loss_weight)
+      task_loss_weight=od_config.task_loss_weight,
+      scale_head_num_filters=scale_head_num_filters,
+      scale_head_kernel_sizes=scale_head_kernel_sizes,
+      offset_head_num_filters=offset_head_num_filters,
+      offset_head_kernel_sizes=offset_head_kernel_sizes)


 def object_center_proto_to_params(oc_config):
@@ -973,13 +989,21 @@ def mask_proto_to_params(mask_config):
      losses_pb2.WeightedL2LocalizationLoss())
  loss.classification_loss.CopyFrom(mask_config.classification_loss)
  classification_loss, _, _, _, _, _, _ = (losses_builder.build(loss))
+  if mask_config.HasField('mask_head_params'):
+    mask_head_num_filters = list(mask_config.mask_head_params.num_filters)
+    mask_head_kernel_sizes = list(mask_config.mask_head_params.kernel_sizes)
+  else:
+    mask_head_num_filters = [256]
+    mask_head_kernel_sizes = [3]
  return center_net_meta_arch.MaskParams(
      classification_loss=classification_loss,
      task_loss_weight=mask_config.task_loss_weight,
      mask_height=mask_config.mask_height,
      mask_width=mask_config.mask_width,
      score_threshold=mask_config.score_threshold,
-      heatmap_bias_init=mask_config.heatmap_bias_init)
+      heatmap_bias_init=mask_config.heatmap_bias_init,
+      mask_head_num_filters=mask_head_num_filters,
+      mask_head_kernel_sizes=mask_head_kernel_sizes)


 def densepose_proto_to_params(densepose_config):

--- a/research/object_detection/builders/model_builder_tf2_test.py
+++ b/research/object_detection/builders/model_builder_tf2_test.py
@@ -188,7 +188,7 @@ class ModelBuilderTF2Test(
    return text_format.Merge(proto_txt,
                             center_net_pb2.CenterNet.ObjectCenterParams())

-  def get_fake_object_detection_proto(self):
+  def get_fake_object_detection_proto(self, customize_head_params=False):
    proto_txt = """
      task_loss_weight: 0.5
      offset_loss_weight: 0.1
@@ -198,10 +198,19 @@ class ModelBuilderTF2Test(
        }
      }
    """
+    if customize_head_params:
+      proto_txt += """
+      scale_head_params {
+        num_filters: 128
+        num_filters: 64
+        kernel_sizes: 5
+        kernel_sizes: 3
+      }
+    """
    return text_format.Merge(proto_txt,
                             center_net_pb2.CenterNet.ObjectDetection())

-  def get_fake_mask_proto(self):
+  def get_fake_mask_proto(self, customize_head_params=False):
    proto_txt = """
      task_loss_weight: 0.7
      classification_loss {
@@ -212,6 +221,15 @@ class ModelBuilderTF2Test(
      score_threshold: 0.7
      heatmap_bias_init: -2.0
    """
+    if customize_head_params:
+      proto_txt += """
+      mask_head_params {
+        num_filters: 128
+        num_filters: 64
+        kernel_sizes: 5
+        kernel_sizes: 3
+      }
+    """
    return text_format.Merge(proto_txt,
                             center_net_pb2.CenterNet.MaskEstimation())

@@ -266,14 +284,16 @@ class ModelBuilderTF2Test(
        self.get_fake_object_center_proto(
            customize_head_params=customize_head_params))
    config.center_net.object_detection_task.CopyFrom(
-        self.get_fake_object_detection_proto())
+        self.get_fake_object_detection_proto(
+            customize_head_params=customize_head_params))
    config.center_net.keypoint_estimation_task.append(
        self.get_fake_keypoint_proto(
            customize_head_params=customize_head_params))
    config.center_net.keypoint_label_map_path = (
        self.get_fake_label_map_file_path())
    config.center_net.mask_estimation_task.CopyFrom(
-        self.get_fake_mask_proto())
+        self.get_fake_mask_proto(
+            customize_head_params=customize_head_params))
    config.center_net.densepose_estimation_task.CopyFrom(
        self.get_fake_densepose_proto())

@@ -303,6 +323,14 @@ class ModelBuilderTF2Test(
    self.assertAlmostEqual(model._od_params.task_loss_weight, 0.5)
    self.assertIsInstance(model._od_params.localization_loss,
                          losses.L1LocalizationLoss)
+    self.assertEqual(model._od_params.offset_head_num_filters, [256])
+    self.assertEqual(model._od_params.offset_head_kernel_sizes, [3])
+    if customize_head_params:
+      self.assertEqual(model._od_params.scale_head_num_filters, [128, 64])
+      self.assertEqual(model._od_params.scale_head_kernel_sizes, [5, 3])
+    else:
+      self.assertEqual(model._od_params.scale_head_num_filters, [256])
+      self.assertEqual(model._od_params.scale_head_kernel_sizes, [3])

    # Check keypoint estimation related parameters.
    kp_params = model._kp_params_dict['human_pose']
@@ -352,6 +380,12 @@ class ModelBuilderTF2Test(
    self.assertAlmostEqual(model._mask_params.score_threshold, 0.7)
    self.assertAlmostEqual(
        model._mask_params.heatmap_bias_init, -2.0, places=4)
+    if customize_head_params:
+      self.assertEqual(model._mask_params.mask_head_num_filters, [128, 64])
+      self.assertEqual(model._mask_params.mask_head_kernel_sizes, [5, 3])
+    else:
+      self.assertEqual(model._mask_params.mask_head_num_filters, [256])
+      self.assertEqual(model._mask_params.mask_head_kernel_sizes, [3])

    # Check DensePose related parameters.
    self.assertEqual(model._densepose_params.class_id, 0)

--- a/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb
+++ b/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RD3uxzaJweYr"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "C-vBUz5IhJs8"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pHTibyMehTvH"
+      },
+      "source": [
+        "# Tutorial: Convert models trained using TensorFlow Object Detection API to TensorFlow Lite\n",
+        "\n",
+        "This tutorial demonstrate these steps:\n",
+        "* Convert TensorFlow models trained using the TensorFlow Object Detection API to [TensorFlow Lite](https://www.tensorflow.org/lite).\n",
+        "* Add the required metadata using [TFLite Metadata Writer API](https://www.tensorflow.org/lite/convert/metadata_writer_tutorial#object_detectors). This will make the TFLite model compatible with [TFLite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector), so that the model can be integrated in mobile apps in 3 lines of code."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QIR1IFpnLJJA"
+      },
+      "source": [
+        "\u003ctable align=\"left\"\u003e\u003ctd\u003e\n",
+        "  \u003ca target=\"_blank\"  href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb\"\u003e\n",
+        "    \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\n",
+        "  \u003c/a\u003e\n",
+        "\u003c/td\u003e\u003ctd\u003e\n",
+        "  \u003ca target=\"_blank\"  href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/convert_odt_model_to_TFLite.ipynb\"\u003e\n",
+        "    \u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "\u003c/td\u003e\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ok_Rpv7XNaFJ"
+      },
+      "source": [
+        "## Preparation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t7CAW5C1cmel"
+      },
+      "source": [
+        "### Install the TFLite Support Library"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DwtFa0jSnNU4"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q tflite_support"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XRfJR9QXctAR"
+      },
+      "source": [
+        "### Install the TensorFlow Object Detection API\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7PP2P5XAqeI5"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import pathlib\n",
+        "\n",
+        "# Clone the tensorflow models repository if it doesn't already exist\n",
+        "if \"models\" in pathlib.Path.cwd().parts:\n",
+        "  while \"models\" in pathlib.Path.cwd().parts:\n",
+        "    os.chdir('..')\n",
+        "elif not pathlib.Path('models').exists():\n",
+        "  !git clone --depth 1 https://github.com/tensorflow/models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bP6SSh6zqi07"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "cd models/research/\n",
+        "protoc object_detection/protos/*.proto --python_out=.\n",
+        "cp object_detection/packages/tf2/setup.py .\n",
+        "pip install -q ."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i0to7aXKc0O9"
+      },
+      "source": [
+        "### Import the necessary libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4M8CC1PgqnSf"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import os\n",
+        "import random\n",
+        "import io\n",
+        "import imageio\n",
+        "import glob\n",
+        "import scipy.misc\n",
+        "import numpy as np\n",
+        "from six import BytesIO\n",
+        "from PIL import Image, ImageDraw, ImageFont\n",
+        "from IPython.display import display, Javascript\n",
+        "from IPython.display import Image as IPyImage\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from object_detection.utils import label_map_util\n",
+        "from object_detection.utils import config_util\n",
+        "from object_detection.utils import visualization_utils as viz_utils\n",
+        "from object_detection.utils import colab_utils\n",
+        "from object_detection.utils import config_util\n",
+        "from object_detection.builders import model_builder\n",
+        "\n",
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s9WIOOMTNti5"
+      },
+      "source": [
+        "## Download a pretrained model from Model Zoo\n",
+        "\n",
+        "In this tutorial, we demonstrate converting a pretrained model `SSD MobileNet V2 FPNLite 640x640` in the [TensorFlow 2 Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md). You can replace the model with your own model and the rest will work the same."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TIY3cxDgsxuZ"
+      },
+      "outputs": [],
+      "source": [
+        "!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz\n",
+        "!tar -xf ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz\n",
+        "!rm ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0gV8vr6nN-z9"
+      },
+      "source": [
+        "## Generate TensorFlow Lite Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z8FjeSmmxpXz"
+      },
+      "source": [
+        "### Step 1: Export TFLite inference graph\n",
+        "\n",
+        "First, we invoke `export_tflite_graph_tf2.py` to generate a TFLite-friendly intermediate SavedModel. This will then be passed to the TensorFlow Lite Converter for generating the final model.\n",
+        "\n",
+        "Use `--help` with the above script to get the full list of supported parameters.\n",
+        "These can fine-tune accuracy and speed for your model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ChfN-tzBXqko"
+      },
+      "outputs": [],
+      "source": [
+        "!python models/research/object_detection/export_tflite_graph_tf2.py \\\n",
+        "    --trained_checkpoint_dir {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint'} \\\n",
+        "    --output_directory {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite'} \\\n",
+        "    --pipeline_config_path {'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/pipeline.config'}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IPr06cZ3OY3H"
+      },
+      "source": [
+        "### Step 2: Convert to TFLite\n",
+        "\n",
+        "Use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to\n",
+        "convert the `SavedModel` to TFLite. Note that you need to use `from_saved_model`\n",
+        "for TFLite conversion with the Python API.\n",
+        "\n",
+        "You can also leverage\n",
+        "[Post-training Quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)\n",
+        "to\n",
+        "[optimize performance](https://www.tensorflow.org/lite/performance/model_optimization)\n",
+        "and obtain a smaller model. In this tutorial, we use the [dynamic range quantization](https://www.tensorflow.org/lite/performance/post_training_quant)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JMpy3Rlpq-Yq"
+      },
+      "outputs": [],
+      "source": [
+        "_TFLITE_MODEL_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/model.tflite\"\n",
+        "\n",
+        "converter = tf.lite.TFLiteConverter.from_saved_model('ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite/saved_model')\n",
+        "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+        "tflite_model = converter.convert()\n",
+        "\n",
+        "with open(_TFLITE_MODEL_PATH, 'wb') as f:\n",
+        "  f.write(tflite_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fyjlnmaEOtKp"
+      },
+      "source": [
+        "### Step 3: Add Metadata\n",
+        "\n",
+        "The model needs to be packed with [TFLite Metadata](https://www.tensorflow.org/lite/convert/metadata) to enable easy integration into mobile apps using the [TFLite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector). This metadata helps the inference code perform the correct pre \u0026 post processing as required by the model. Use the following code to create the metadata."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-ecGLG_Ovjcr"
+      },
+      "outputs": [],
+      "source": [
+        "# Download the COCO dataset label map that was used to trained the SSD MobileNet V2 FPNLite 640x640 model\n",
+        "!wget https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt -q\n",
+        "\n",
+        "# We need to convert the Object Detection API's labelmap into what the Task API needs:\n",
+        "# a txt file with one class name on each line from index 0 to N.\n",
+        "# The first '0' class indicates the background.\n",
+        "# This code assumes COCO detection which has 90 classes, you can write a label\n",
+        "# map file for your model if re-trained.\n",
+        "_ODT_LABEL_MAP_PATH = 'mscoco_label_map.pbtxt'\n",
+        "_TFLITE_LABEL_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/tflite_label_map.txt\"\n",
+        "\n",
+        "category_index = label_map_util.create_category_index_from_labelmap(\n",
+        "    _ODT_LABEL_MAP_PATH)\n",
+        "f = open(_TFLITE_LABEL_PATH, 'w')\n",
+        "for class_id in range(1, 91):\n",
+        "  if class_id not in category_index:\n",
+        "    f.write('???\\n')\n",
+        "    continue\n",
+        "  name = category_index[class_id]['name']\n",
+        "  f.write(name+'\\n')\n",
+        "f.close()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YJSyXq5Qss9X"
+      },
+      "source": [
+        "Then we'll add the label map and other necessary metadata (e.g. normalization config) to the TFLite model.\n",
+        "\n",
+        "As the `SSD MobileNet V2 FPNLite 640x640` model take input image with pixel value in the range of [-1..1] ([code](https://github.com/tensorflow/models/blob/b09e75828e2c65ead9e624a5c7afed8d214247aa/research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py#L132)), we need to set `norm_mean = 127.5` and `norm_std = 127.5`. See this [documentation](https://www.tensorflow.org/lite/convert/metadata#normalization_and_quantization_parameters) for more details on the normalization parameters."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CRQpfDAWsPeK"
+      },
+      "outputs": [],
+      "source": [
+        "from tflite_support.metadata_writers import object_detector\n",
+        "from tflite_support.metadata_writers import writer_utils\n",
+        "\n",
+        "_TFLITE_MODEL_WITH_METADATA_PATH = \"ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/model_with_metadata.tflite\"\n",
+        "\n",
+        "writer = object_detector.MetadataWriter.create_for_inference(\n",
+        "    writer_utils.load_file(_TFLITE_MODEL_PATH), input_norm_mean=[127.5], \n",
+        "    input_norm_std=[127.5], label_file_paths=[_TFLITE_LABEL_PATH])\n",
+        "writer_utils.save_file(writer.populate(), _TFLITE_MODEL_WITH_METADATA_PATH)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YFEAjRBdPCQb"
+      },
+      "source": [
+        "Optional: Print out the metadata added to the TFLite model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FT3-38PJsSOt"
+      },
+      "outputs": [],
+      "source": [
+        "from tflite_support import metadata\n",
+        "\n",
+        "displayer = metadata.MetadataDisplayer.with_model_file(_TFLITE_MODEL_WITH_METADATA_PATH)\n",
+        "print(\"Metadata populated:\")\n",
+        "print(displayer.get_metadata_json())\n",
+        "print(\"=============================\")\n",
+        "print(\"Associated file(s) populated:\")\n",
+        "print(displayer.get_packed_associated_file_list())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l7zVslTRnEHX"
+      },
+      "source": [
+        "The TFLite model now can be integrated into a mobile app using the TFLite Task Library. See the [documentation](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector) for more details."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "Convert TF Object Detection API model to TFLite.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1R4_y-u14YTdvBzhmvC0HQwh3HkcCN2Bd",
+          "timestamp": 1623114733432
+        },
+        {
+          "file_id": "1Rey5kAzNQhJ77tsXGjhcAV0UZ6du0Sla",
+          "timestamp": 1622897882140
+        }
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/research/object_detection/core/anchor_generator.py
+++ b/research/object_detection/core/anchor_generator.py
@@ -37,7 +37,6 @@ from abc import ABCMeta
 from abc import abstractmethod

 import six
-from six.moves import zip
 import tensorflow.compat.v1 as tf


@@ -107,11 +106,9 @@ class AnchorGenerator(six.with_metaclass(ABCMeta, object)):
    with tf.name_scope(self.name_scope()):
      anchors_list = self._generate(feature_map_shape_list, **params)
      if self.check_num_anchors:
-        with tf.control_dependencies([
-            self._assert_correct_number_of_anchors(
-                anchors_list, feature_map_shape_list)]):
        for item in anchors_list:
          item.set(tf.identity(item.get()))
+
      return anchors_list

  @abstractmethod
@@ -146,26 +143,3 @@ class AnchorGenerator(six.with_metaclass(ABCMeta, object)):
      feature_map_indices_list.append(
          i * tf.ones([boxes.num_boxes()], dtype=tf.int32))
    return tf.concat(feature_map_indices_list, axis=0)
-
-  def _assert_correct_number_of_anchors(self, anchors_list,
-                                        feature_map_shape_list):
-    """Assert that correct number of anchors was generated.
-
-    Args:
-      anchors_list: A list of box_list.BoxList object holding anchors generated.
-      feature_map_shape_list: list of (height, width) pairs in the format
-        [(height_0, width_0), (height_1, width_1), ...] that the generated
-        anchors must align with.
-    Returns:
-      Op that raises InvalidArgumentError if the number of anchors does not
-        match the number of expected anchors.
-    """
-    expected_num_anchors = 0
-    actual_num_anchors = 0
-    for num_anchors_per_location, feature_map_shape, anchors in zip(
-        self.num_anchors_per_location(), feature_map_shape_list, anchors_list):
-      expected_num_anchors += (num_anchors_per_location
-                               * feature_map_shape[0]
-                               * feature_map_shape[1])
-      actual_num_anchors += anchors.num_boxes()
-    return tf.assert_equal(expected_num_anchors, actual_num_anchors)
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -101,7 +101,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):

    Args:
      field: a string key, options are
-        fields.BoxListFields.{boxes,classes,masks,keypoints,
+        fields.BoxListFields.{boxes,classes,masks,mask_weights,keypoints,
        keypoint_visibilities, densepose_*, track_ids,
        temporal_offsets, track_match_flags}
        fields.InputDataFields.is_annotated.
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):

    Args:
      field: a string key, options are
-        fields.BoxListFields.{boxes,classes,masks,keypoints,
+        fields.BoxListFields.{boxes,classes,masks,mask_weights,keypoints,
        keypoint_visibilities, densepose_*, track_ids} or
        fields.InputDataFields.is_annotated.

@@ -299,6 +299,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      groundtruth_boxes_list,
      groundtruth_classes_list,
      groundtruth_masks_list=None,
+      groundtruth_mask_weights_list=None,
      groundtruth_keypoints_list=None,
      groundtruth_keypoint_visibilities_list=None,
      groundtruth_dp_num_points_list=None,
@@ -334,6 +335,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        masks with values in {0, 1}.  If None, no masks are provided.
        Mask resolution `height_in`x`width_in` must agree with the resolution
        of the input image tensor provided to the `preprocess` function.
+      groundtruth_mask_weights_list: a list of 1-D tf.float32 tensors of shape
+        [num_boxes] with weights for each instance mask.
      groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
        shape [num_boxes, num_keypoints, 2] containing keypoints.
        Keypoints are assumed to be provided in normalized coordinates and
@@ -399,6 +402,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    if groundtruth_masks_list:
      self._groundtruth_lists[
          fields.BoxListFields.masks] = groundtruth_masks_list
+    if groundtruth_mask_weights_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.mask_weights] = groundtruth_mask_weights_list
    if groundtruth_keypoints_list:
      self._groundtruth_lists[
          fields.BoxListFields.keypoints] = groundtruth_keypoints_list