Internal change

PiperOrigin-RevId: 326286926

Internal change
PiperOrigin-RevId: 326286926
88253ce5 · Hongkun Yu · A. Unique TensorFlower · 52371ffe · 88253ce5 · 88253ce5
Commit 88253ce5 authored Aug 12, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Aug 12, 2020
20 changed files
--- a/official/utils/misc/distribution_utils.py
+++ b/official/utils/misc/distribution_utils.py
@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
  Args:
    distribution_strategy: a string specifying which distribution strategy to
      use. Accepted values are "off", "one_device", "mirrored",
-      "parameter_server", "multi_worker_mirrored", and "tpu" -- case insensitive.
+      "parameter_server", "multi_worker_mirrored", and "tpu" -- case
-      "off" means not to use Distribution Strategy; "tpu" means to use
+      insensitive. "off" means not to use Distribution Strategy; "tpu" means to
-      TPUStrategy using `tpu_address`.
+      use TPUStrategy using `tpu_address`.
    num_gpus: Number of GPUs to run this model.
    all_reduce_alg: Optional. Specifies which algorithm to use when performing
      all-reduce. For `MirroredStrategy`, valid values are "nccl" and
@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
      device topology.
    num_packs: Optional.  Sets the `num_packs` in `tf.distribute.NcclAllReduce`
      or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
-    tpu_address: Optional. String that represents TPU to connect to. Must not
+    tpu_address: Optional. String that represents TPU to connect to. Must not be
-      be None if `distribution_strategy` is set to `tpu`.
+      None if `distribution_strategy` is set to `tpu`.
  Returns:
    tf.distribute.DistibutionStrategy object.
  Raises:
@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
  distribution_strategy = distribution_strategy.lower()
  if distribution_strategy == "off":
    if num_gpus > 1:
-      raise ValueError(
+      raise ValueError("When {} GPUs are specified, distribution_strategy "
-          "When {} GPUs are specified, distribution_strategy "
+                       "flag cannot be set to `off`.".format(num_gpus))
-          "flag cannot be set to `off`.".format(num_gpus))
    return None
  if distribution_strategy == "tpu":
@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
  if distribution_strategy == "parameter_server":
    return tf.distribute.experimental.ParameterServerStrategy()
-  raise ValueError(
+  raise ValueError("Unrecognized Distribution Strategy: %r" %
-      "Unrecognized Distribution Strategy: %r" % distribution_strategy)
+                   distribution_strategy)
 def configure_cluster(worker_hosts=None, task_index=-1):
@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
  """
  tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
  if tf_config:
-    num_workers = (len(tf_config["cluster"].get("chief", [])) +
+    num_workers = (
-                   len(tf_config["cluster"].get("worker", [])))
+        len(tf_config["cluster"].get("chief", [])) +
+        len(tf_config["cluster"].get("worker", [])))
  elif worker_hosts:
    workers = worker_hosts.split(",")
    num_workers = len(workers)
@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
        "cluster": {
            "worker": workers
        },
-        "task": {"type": "worker", "index": task_index}
+        "task": {
+            "type": "worker",
+            "index": task_index
+        }
    })
  else:
    num_workers = 1

--- a/official/utils/misc/distribution_utils_test.py
+++ b/official/utils/misc/distribution_utils_test.py
@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
 class GetDistributionStrategyTest(tf.test.TestCase):
  """Tests for get_distribution_strategy."""
  def test_one_device_strategy_cpu(self):
    ds = distribution_utils.get_distribution_strategy(num_gpus=0)
    self.assertEquals(ds.num_replicas_in_sync, 1)
@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
      self.assertIn('GPU', device)
-if __name__ == "__main__":
+if __name__ == '__main__':
  tf.test.main()
--- a/official/utils/misc/keras_utils.py
+++ b/official/utils/misc/keras_utils.py
@@ -25,7 +25,6 @@ import time
 from absl import logging
 import tensorflow as tf
 from tensorflow.python.eager import monitoring
 global_batch_size_gauge = monitoring.IntGauge(
@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
    # Record the timestamp of the first global step
    if not self.timestamp_log:
-      self.timestamp_log.append(BatchTimestamp(self.global_steps,
+      self.timestamp_log.append(
-                                               self.start_time))
+          BatchTimestamp(self.global_steps, self.start_time))
  def on_batch_end(self, batch, logs=None):
    """Records elapse time of the batch and calculates examples per second."""
@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
  if enable_xla:
    tf.config.optimizer.set_jit(True)
 # TODO(hongkuny): remove set_config_v2 globally.
 set_config_v2 = set_session_config
-def set_gpu_thread_mode_and_count(gpu_thread_mode,
+def set_gpu_thread_mode_and_count(gpu_thread_mode, datasets_num_private_threads,
-                                  datasets_num_private_threads,
                                  num_gpus, per_gpu_thread_count):
  """Set GPU thread mode and count, and adjust dataset threads count."""
  cpu_count = multiprocessing.cpu_count()
@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
  per_gpu_thread_count = per_gpu_thread_count or 2
  os.environ['TF_GPU_THREAD_MODE'] = gpu_thread_mode
  os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count)
-  logging.info('TF_GPU_THREAD_COUNT: %s',
+  logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT'])
-               os.environ['TF_GPU_THREAD_COUNT'])
+  logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE'])
-  logging.info('TF_GPU_THREAD_MODE: %s',
-               os.environ['TF_GPU_THREAD_MODE'])
  # Limit data preprocessing threadpool to CPU cores minus number of total GPU
  # private threads and memory copy threads.
@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
  num_runtime_threads = num_gpus
  if not datasets_num_private_threads:
    datasets_num_private_threads = min(
-        cpu_count - total_gpu_thread_count - num_runtime_threads,
+        cpu_count - total_gpu_thread_count - num_runtime_threads, num_gpus * 8)
-        num_gpus * 8)
    logging.info('Set datasets_num_private_threads to %s',
                 datasets_num_private_threads)
--- a/official/utils/misc/model_helpers.py
+++ b/official/utils/misc/model_helpers.py
@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
  return False
-def generate_synthetic_data(
+def generate_synthetic_data(input_shape,
-    input_shape, input_value=0, input_dtype=None, label_shape=None,
+                            input_value=0,
-    label_value=0, label_dtype=None):
+                            input_dtype=None,
+                            label_shape=None,
+                            label_value=0,
+                            label_dtype=None):
  """Create a repeating dataset with constant values.
  Args:

--- a/official/utils/misc/model_helpers_test.py
+++ b/official/utils/misc/model_helpers_test.py
@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
  def test_past_stop_threshold_not_number(self):
    """Tests for error conditions."""
    with self.assertRaises(ValueError):
-      model_helpers.past_stop_threshold("str", 1)
+      model_helpers.past_stop_threshold('str', 1)
    with self.assertRaises(ValueError):
-      model_helpers.past_stop_threshold("str", tf.constant(5))
+      model_helpers.past_stop_threshold('str', tf.constant(5))
    with self.assertRaises(ValueError):
-      model_helpers.past_stop_threshold("str", "another")
+      model_helpers.past_stop_threshold('str', 'another')
    with self.assertRaises(ValueError):
      model_helpers.past_stop_threshold(0, None)
    with self.assertRaises(ValueError):
-      model_helpers.past_stop_threshold(0.7, "str")
+      model_helpers.past_stop_threshold(0.7, 'str')
    with self.assertRaises(ValueError):
      model_helpers.past_stop_threshold(tf.constant(4), None)
@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
  def test_generate_synethetic_data(self):
    input_element, label_element = tf.compat.v1.data.make_one_shot_iterator(
-        model_helpers.generate_synthetic_data(input_shape=tf.TensorShape([5]),
+        model_helpers.generate_synthetic_data(
-                                              input_value=123,
+            input_shape=tf.TensorShape([5]),
-                                              input_dtype=tf.float32,
+            input_value=123,
-                                              label_shape=tf.TensorShape([]),
+            input_dtype=tf.float32,
-                                              label_value=456,
+            label_shape=tf.TensorShape([]),
-                                              label_dtype=tf.int32)).get_next()
+            label_value=456,
+            label_dtype=tf.int32)).get_next()
    with self.session() as sess:
      for n in range(5):
@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
  def test_generate_nested_data(self):
    d = model_helpers.generate_synthetic_data(
-        input_shape={'a': tf.TensorShape([2]),
+        input_shape={
-                     'b': {'c': tf.TensorShape([3]), 'd': tf.TensorShape([])}},
+            'a': tf.TensorShape([2]),
+            'b': {
+                'c': tf.TensorShape([3]),
+                'd': tf.TensorShape([])
+            }
+        },
        input_value=1.1)
    element = tf.compat.v1.data.make_one_shot_iterator(d).get_next()
@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
      self.assertAllClose(inp['b']['d'], 1.1)
-if __name__ == "__main__":
+if __name__ == '__main__':
  tf.test.main()
--- a/official/utils/registry_test.py
+++ b/official/utils/registry_test.py
@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
    @registry.register(collection, 'functions/func_0')
    def func_test():
      pass
-    self.assertEqual(
-        registry.lookup(collection, 'functions/func_0'), func_test)
+    self.assertEqual(registry.lookup(collection, 'functions/func_0'), func_test)
    @registry.register(collection, 'classes/cls_0')
    class ClassRegistryKey:
      pass
    self.assertEqual(
        registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey)
    @registry.register(collection, ClassRegistryKey)
    class ClassRegistryValue:
      pass
    self.assertEqual(
        registry.lookup(collection, ClassRegistryKey), ClassRegistryValue)
@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
    @registry.register(collection, 'functions/func_0')
    def func_test0():
      pass
    @registry.register(collection, 'func_1')
    def func_test1():
      pass
    @registry.register(collection, func_test1)
    def func_test2():
      pass
    expected_collection = {
        'functions': {
            'func_0': func_test0,
@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
    @registry.register(collection, 'functions/func_0')
    def func_test0():  # pylint: disable=unused-variable
      pass
    with self.assertRaises(KeyError):
      @registry.register(collection, 'functions/func_0/sub_func')
      def func_test1():  # pylint: disable=unused-variable
        pass
    with self.assertRaises(LookupError):
      registry.lookup(collection, 'non-exist')

--- a/official/utils/testing/integration.py
+++ b/official/utils/testing/integration.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Helper code to run complete models from within python.
+"""Helper code to run complete models from within python."""
-"""
 from __future__ import absolute_import
 from __future__ import division
@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
 @flagsaver.flagsaver
-def run_synthetic(main, tmp_root, extra_flags=None, synth=True, train_epochs=1,
+def run_synthetic(main,
+                  tmp_root,
+                  extra_flags=None,
+                  synth=True,
+                  train_epochs=1,
                  epochs_between_evals=1):
  """Performs a minimal run of a model.

--- a/official/vision/detection/dataloader/anchor.py
+++ b/official/vision/detection/dataloader/anchor.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import collections
 import tensorflow as tf
 from official.vision.detection.utils.object_detection import argmax_matcher
 from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
 class Anchor(object):
  """Anchor class for anchor-based object detectors."""
-  def __init__(self,
+  def __init__(self, min_level, max_level, num_scales, aspect_ratios,
-               min_level,
+               anchor_size, image_size):
-               max_level,
-               num_scales,
-               aspect_ratios,
-               anchor_size,
-               image_size):
    """Constructs multiscale anchors.
    Args:
      min_level: integer number of minimum level of the output feature pyramid.
      max_level: integer number of maximum level of the output feature pyramid.
-      num_scales: integer number representing intermediate scales added
+      num_scales: integer number representing intermediate scales added on each
-        on each level. For instances, num_scales=2 adds one additional
+        level. For instances, num_scales=2 adds one additional intermediate
-        intermediate anchor scales [2^0, 2^0.5] on each level.
+        anchor scales [2^0, 2^0.5] on each level.
      aspect_ratios: list of float numbers representing the aspect ratio anchors
        added on each level. The number indicates the ratio of width to height.
        For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
        scale level.
      anchor_size: float number representing the scale of size of the base
        anchor to the feature stride 2^level.
-      image_size: a list of integer numbers or Tensors representing
+      image_size: a list of integer numbers or Tensors representing [height,
-        [height, width] of the input image size.The image_size should be
+        width] of the input image size.The image_size should be divisible by the
-        divisible by the largest feature stride 2^max_level.
+        largest feature stride 2^max_level.
    """
    self.min_level = min_level
    self.max_level = max_level
@@ -76,11 +72,11 @@ class Anchor(object):
      boxes_l = []
      for scale in range(self.num_scales):
        for aspect_ratio in self.aspect_ratios:
-          stride = 2 ** level
+          stride = 2**level
-          intermediate_scale = 2 ** (scale / float(self.num_scales))
+          intermediate_scale = 2**(scale / float(self.num_scales))
          base_anchor_size = self.anchor_size * stride * intermediate_scale
-          aspect_x = aspect_ratio ** 0.5
+          aspect_x = aspect_ratio**0.5
-          aspect_y = aspect_ratio ** -0.5
+          aspect_y = aspect_ratio**-0.5
          half_anchor_size_x = base_anchor_size * aspect_x / 2.0
          half_anchor_size_y = base_anchor_size * aspect_y / 2.0
          x = tf.range(stride / 2, self.image_size[1], stride)
@@ -89,8 +85,10 @@ class Anchor(object):
          xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32)
          yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32)
          # Tensor shape Nx4.
-          boxes = tf.stack([yv - half_anchor_size_y, xv - half_anchor_size_x,
+          boxes = tf.stack([
-                            yv + half_anchor_size_y, xv + half_anchor_size_x],
+              yv - half_anchor_size_y, xv - half_anchor_size_x,
+              yv + half_anchor_size_y, xv + half_anchor_size_x
+          ],
                           axis=1)
          boxes_l.append(boxes)
      # Concat anchors on the same level to tensor shape NxAx4.
@@ -104,11 +102,11 @@ class Anchor(object):
    unpacked_labels = collections.OrderedDict()
    count = 0
    for level in range(self.min_level, self.max_level + 1):
-      feat_size_y = tf.cast(self.image_size[0] / 2 ** level, tf.int32)
+      feat_size_y = tf.cast(self.image_size[0] / 2**level, tf.int32)
-      feat_size_x = tf.cast(self.image_size[1] / 2 ** level, tf.int32)
+      feat_size_x = tf.cast(self.image_size[1] / 2**level, tf.int32)
      steps = feat_size_y * feat_size_x * self.anchors_per_location
-      unpacked_labels[level] = tf.reshape(
+      unpacked_labels[level] = tf.reshape(labels[count:count + steps],
-          labels[count:count + steps], [feat_size_y, feat_size_x, -1])
+                                          [feat_size_y, feat_size_x, -1])
      count += steps
    return unpacked_labels
@@ -124,10 +122,7 @@ class Anchor(object):
 class AnchorLabeler(object):
  """Labeler for dense object detector."""
-  def __init__(self,
+  def __init__(self, anchor, match_threshold=0.5, unmatched_threshold=0.5):
-               anchor,
-               match_threshold=0.5,
-               unmatched_threshold=0.5):
    """Constructs anchor labeler to assign labels to anchors.
    Args:
@@ -161,6 +156,7 @@ class AnchorLabeler(object):
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
@@ -205,11 +201,14 @@ class AnchorLabeler(object):
 class RpnAnchorLabeler(AnchorLabeler):
  """Labeler for Region Proposal Network."""
-  def __init__(self, anchor, match_threshold=0.7,
+  def __init__(self,
-               unmatched_threshold=0.3, rpn_batch_size_per_im=256,
+               anchor,
+               match_threshold=0.7,
+               unmatched_threshold=0.3,
+               rpn_batch_size_per_im=256,
               rpn_fg_fraction=0.5):
-    AnchorLabeler.__init__(self, anchor, match_threshold=0.7,
+    AnchorLabeler.__init__(
-                           unmatched_threshold=0.3)
+        self, anchor, match_threshold=0.7, unmatched_threshold=0.3)
    self._rpn_batch_size_per_im = rpn_batch_size_per_im
    self._rpn_fg_fraction = rpn_fg_fraction
@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
    This function performs subsampling for foreground (fg) and background (bg)
    anchors.
    Args:
-      match_results: A integer tensor with shape [N] representing the
+      match_results: A integer tensor with shape [N] representing the matching
-        matching results of anchors. (1) match_results[i]>=0,
+        results of anchors. (1) match_results[i]>=0, meaning that column i is
-        meaning that column i is matched with row match_results[i].
+        matched with row match_results[i]. (2) match_results[i]=-1, meaning that
-        (2) match_results[i]=-1, meaning that column i is not matched.
+        column i is not matched. (3) match_results[i]=-2, meaning that column i
-        (3) match_results[i]=-2, meaning that column i is ignored.
+        is ignored.
    Returns:
      score_targets: a integer tensor with the a shape of [N].
        (1) score_targets[i]=1, the anchor is a positive sample.
@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
    indicator = tf.greater(match_results, -2)
    labels = tf.greater(match_results, -1)
-    samples = sampler.subsample(
+    samples = sampler.subsample(indicator, self._rpn_batch_size_per_im, labels)
-        indicator, self._rpn_batch_size_per_im, labels)
    positive_labels = tf.where(
        tf.logical_and(samples, labels),
        tf.constant(2, dtype=tf.int32, shape=match_results.shape),
@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
    ignore_labels = tf.fill(match_results.shape, -1)
-    return (ignore_labels + positive_labels + negative_labels,
+    return (ignore_labels + positive_labels + negative_labels, positive_labels,
-            positive_labels, negative_labels)
+            negative_labels)
  def label_anchors(self, gt_boxes, gt_labels):
    """Labels anchors with ground truth inputs.
@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.
    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with

--- a/official/vision/detection/dataloader/input_reader.py
+++ b/official/vision/detection/dataloader/input_reader.py
@@ -91,7 +91,8 @@ class InputFn(object):
      dataset = dataset.repeat()
    dataset = dataset.interleave(
-        map_func=self._dataset_fn, cycle_length=32,
+        map_func=self._dataset_fn,
+        cycle_length=32,
        num_parallel_calls=tf.data.experimental.AUTOTUNE)
    if self._is_training:

--- a/official/vision/detection/dataloader/retinanet_parser.py
+++ b/official/vision/detection/dataloader/retinanet_parser.py
@@ -79,9 +79,9 @@ class Parser(object):
        output_size should be divided by the largest feature stride 2^max_level.
      min_level: `int` number of minimum level of the output feature pyramid.
      max_level: `int` number of maximum level of the output feature pyramid.
-      num_scales: `int` number representing intermediate scales added
+      num_scales: `int` number representing intermediate scales added on each
-        on each level. For instances, num_scales=2 adds one additional
+        level. For instances, num_scales=2 adds one additional intermediate
-        intermediate anchor scales [2^0, 2^0.5] on each level.
+        anchor scales [2^0, 2^0.5] on each level.
      aspect_ratios: `list` of float numbers representing the aspect raito
        anchors added on each level. The number indicates the ratio of width to
        height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
@@ -94,8 +94,8 @@ class Parser(object):
      unmatched_threshold: `float` number between 0 and 1 representing the
        upper-bound threshold to assign negative labels for anchors. An anchor
        with a score below the threshold is labeled negative.
-      aug_rand_hflip: `bool`, if True, augment training with random
+      aug_rand_hflip: `bool`, if True, augment training with random horizontal
-        horizontal flip.
+        flip.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
@@ -109,8 +109,8 @@ class Parser(object):
      max_num_instances: `int` number of maximum number of instances in an
        image. The groundtruth data will be padded to `max_num_instances`.
      use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
-      mode: a ModeKeys. Specifies if this is training, evaluation, prediction
+      mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
-        or prediction with groundtruths in the outputs.
+        prediction with groundtruths in the outputs.
    """
    self._mode = mode
    self._max_num_instances = max_num_instances
@@ -232,8 +232,8 @@ class Parser(object):
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
-        padded_size=input_utils.compute_padded_size(
+        padded_size=input_utils.compute_padded_size(self._output_size,
-            self._output_size, 2 ** self._max_level),
+                                                    2**self._max_level),
        aug_scale_min=self._aug_scale_min,
        aug_scale_max=self._aug_scale_max)
    image_height, image_width, _ = image.get_shape().as_list()
@@ -241,22 +241,21 @@ class Parser(object):
    # Resizes and crops boxes.
    image_scale = image_info[2, :]
    offset = image_info[3, :]
-    boxes = input_utils.resize_and_crop_boxes(
+    boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
-        boxes, image_scale, image_info[1, :], offset)
+                                              image_info[1, :], offset)
    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    # Assigns anchors.
-    input_anchor = anchor.Anchor(
+    input_anchor = anchor.Anchor(self._min_level, self._max_level,
-        self._min_level, self._max_level, self._num_scales,
+                                 self._num_scales, self._aspect_ratios,
-        self._aspect_ratios, self._anchor_size, (image_height, image_width))
+                                 self._anchor_size, (image_height, image_width))
-    anchor_labeler = anchor.AnchorLabeler(
+    anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
-        input_anchor, self._match_threshold, self._unmatched_threshold)
+                                          self._unmatched_threshold)
    (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
-        boxes,
+        boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
-        tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
    # If bfloat16 is used, casts input image to tf.bfloat16.
    if self._use_bfloat16:
@@ -292,8 +291,8 @@ class Parser(object):
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
-        padded_size=input_utils.compute_padded_size(
+        padded_size=input_utils.compute_padded_size(self._output_size,
-            self._output_size, 2 ** self._max_level),
+                                                    2**self._max_level),
        aug_scale_min=1.0,
        aug_scale_max=1.0)
    image_height, image_width, _ = image.get_shape().as_list()
@@ -301,22 +300,21 @@ class Parser(object):
    # Resizes and crops boxes.
    image_scale = image_info[2, :]
    offset = image_info[3, :]
-    boxes = input_utils.resize_and_crop_boxes(
+    boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
-        boxes, image_scale, image_info[1, :], offset)
+                                              image_info[1, :], offset)
    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    # Assigns anchors.
-    input_anchor = anchor.Anchor(
+    input_anchor = anchor.Anchor(self._min_level, self._max_level,
-        self._min_level, self._max_level, self._num_scales,
+                                 self._num_scales, self._aspect_ratios,
-        self._aspect_ratios, self._anchor_size, (image_height, image_width))
+                                 self._anchor_size, (image_height, image_width))
-    anchor_labeler = anchor.AnchorLabeler(
+    anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
-        input_anchor, self._match_threshold, self._unmatched_threshold)
+                                          self._unmatched_threshold)
    (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
-        boxes,
+        boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
-        tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
    # If bfloat16 is used, casts input image to tf.bfloat16.
    if self._use_bfloat16:
@@ -324,18 +322,24 @@ class Parser(object):
    # Sets up groundtruth data for evaluation.
    groundtruths = {
-        'source_id': data['source_id'],
+        'source_id':
-        'num_groundtrtuhs': tf.shape(data['groundtruth_classes']),
+            data['source_id'],
-        'image_info': image_info,
+        'num_groundtrtuhs':
-        'boxes': box_utils.denormalize_boxes(
+            tf.shape(data['groundtruth_classes']),
-            data['groundtruth_boxes'], image_shape),
+        'image_info':
-        'classes': data['groundtruth_classes'],
+            image_info,
-        'areas': data['groundtruth_area'],
+        'boxes':
-        'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
+            box_utils.denormalize_boxes(data['groundtruth_boxes'], image_shape),
+        'classes':
+            data['groundtruth_classes'],
+        'areas':
+            data['groundtruth_area'],
+        'is_crowds':
+            tf.cast(data['groundtruth_is_crowd'], tf.int32),
    }
    groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
-    groundtruths = pad_groundtruths_to_fixed_size(
+    groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
-        groundtruths, self._max_num_instances)
+                                                  self._max_num_instances)
    # Packs labels for model_fn outputs.
    labels = {
@@ -361,8 +365,8 @@ class Parser(object):
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
-        padded_size=input_utils.compute_padded_size(
+        padded_size=input_utils.compute_padded_size(self._output_size,
-            self._output_size, 2 ** self._max_level),
+                                                    2**self._max_level),
        aug_scale_min=1.0,
        aug_scale_max=1.0)
    image_height, image_width, _ = image.get_shape().as_list()
@@ -372,9 +376,9 @@ class Parser(object):
      image = tf.cast(image, dtype=tf.bfloat16)
    # Compute Anchor boxes.
-    input_anchor = anchor.Anchor(
+    input_anchor = anchor.Anchor(self._min_level, self._max_level,
-        self._min_level, self._max_level, self._num_scales,
+                                 self._num_scales, self._aspect_ratios,
-        self._aspect_ratios, self._anchor_size, (image_height, image_width))
+                                 self._anchor_size, (image_height, image_width))
    labels = {
        'anchor_boxes': input_anchor.multilevel_boxes,
@@ -384,8 +388,8 @@ class Parser(object):
    # in labels.
    if self._mode == ModeKeys.PREDICT_WITH_GT:
      # Converts boxes from normalized coordinates to pixel coordinates.
-      boxes = box_utils.denormalize_boxes(
+      boxes = box_utils.denormalize_boxes(data['groundtruth_boxes'],
-          data['groundtruth_boxes'], image_shape)
+                                          image_shape)
      groundtruths = {
          'source_id': data['source_id'],
          'num_detections': tf.shape(data['groundtruth_classes']),
@@ -395,8 +399,8 @@ class Parser(object):
          'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
      }
      groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
-      groundtruths = pad_groundtruths_to_fixed_size(
+      groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
-          groundtruths, self._max_num_instances)
+                                                    self._max_num_instances)
      labels['groundtruths'] = groundtruths
      # Computes training objective for evaluation loss.
@@ -404,18 +408,17 @@ class Parser(object):
      image_scale = image_info[2, :]
      offset = image_info[3, :]
-      boxes = input_utils.resize_and_crop_boxes(
+      boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
-          boxes, image_scale, image_info[1, :], offset)
+                                                image_info[1, :], offset)
      # Filters out ground truth boxes that are all zeros.
      indices = box_utils.get_non_empty_box_indices(boxes)
      boxes = tf.gather(boxes, indices)
      # Assigns anchors.
-      anchor_labeler = anchor.AnchorLabeler(
+      anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
-          input_anchor, self._match_threshold, self._unmatched_threshold)
+                                            self._unmatched_threshold)
      (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
-          boxes,
+          boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
-          tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
      labels['cls_targets'] = cls_targets
      labels['box_targets'] = box_targets
      labels['num_positives'] = num_positives

--- a/official/vision/detection/dataloader/shapemask_parser.py
+++ b/official/vision/detection/dataloader/shapemask_parser.py
@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
 ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
 arXiv:1904.03239.
 """
 import tensorflow as tf
 from official.vision.detection.dataloader import anchor

--- a/official/vision/detection/evaluation/coco_evaluator.py
+++ b/official/vision/detection/evaluation/coco_evaluator.py
@@ -32,6 +32,7 @@ from __future__ import print_function
 import atexit
 import tempfile
 import numpy as np
 from absl import logging
 from pycocotools import cocoeval
@@ -197,22 +198,21 @@ class COCOEvaluator(object):
    """Update and aggregate detection results and groundtruth data.
    Args:
-      predictions: a dictionary of numpy arrays including the fields below.
+      predictions: a dictionary of numpy arrays including the fields below. See
-        See different parsers under `../dataloader` for more details.
+        different parsers under `../dataloader` for more details.
        Required fields:
          - source_id: a numpy array of int or string of shape [batch_size].
          - image_info [if `need_rescale_bboxes` is True]: a numpy array of
            float of shape [batch_size, 4, 2].
-          - num_detections: a numpy array of
+          - num_detections: a numpy array of int of shape [batch_size].
-            int of shape [batch_size].
          - detection_boxes: a numpy array of float of shape [batch_size, K, 4].
          - detection_classes: a numpy array of int of shape [batch_size, K].
          - detection_scores: a numpy array of float of shape [batch_size, K].
        Optional fields:
-          - detection_masks: a numpy array of float of shape
+          - detection_masks: a numpy array of float of shape [batch_size, K,
-              [batch_size, K, mask_height, mask_width].
+            mask_height, mask_width].
-      groundtruths: a dictionary of numpy arrays including the fields below.
+      groundtruths: a dictionary of numpy arrays including the fields below. See
-        See also different parsers under `../dataloader` for more details.
+        also different parsers under `../dataloader` for more details.
        Required fields:
          - source_id: a numpy array of int or string of shape [batch_size].
          - height: a numpy array of int of shape [batch_size].
@@ -222,12 +222,12 @@ class COCOEvaluator(object):
          - classes: a numpy array of int of shape [batch_size, K].
        Optional fields:
          - is_crowds: a numpy array of int of shape [batch_size, K]. If the
-              field is absent, it is assumed that this instance is not crowd.
+            field is absent, it is assumed that this instance is not crowd.
-          - areas: a numy array of float of shape [batch_size, K]. If the
+          - areas: a numy array of float of shape [batch_size, K]. If the field
-              field is absent, the area is calculated using either boxes or
+            is absent, the area is calculated using either boxes or masks
-              masks depending on which one is available.
+            depending on which one is available.
-          - masks: a numpy array of float of shape
+          - masks: a numpy array of float of shape [batch_size, K, mask_height,
-              [batch_size, K, mask_height, mask_width],
+            mask_width],
    Raises:
      ValueError: if the required prediction or groundtruth fields are not
@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
        metrics = np.hstack((coco_metrics, mcoco_eval.stats))
      else:
        mask_coco_metrics = mcoco_eval.category_stats
-        val_catg_idx = np.isin(mcoco_eval.params.catIds,
+        val_catg_idx = np.isin(mcoco_eval.params.catIds, self._eval_categories)
-                               self._eval_categories)
        # Gather the valid evaluation of the eval categories.
        if np.any(val_catg_idx):
          mean_val_metrics = []

--- a/official/vision/detection/main.py
+++ b/official/vision/detection/main.py
@@ -23,6 +23,7 @@ import functools
 import pprint
 # pylint: disable=g-bad-import-order
+# Import libraries
 import tensorflow as tf
 from absl import app

--- a/official/vision/detection/modeling/architecture/heads.py
+++ b/official/vision/detection/modeling/architecture/heads.py
@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
 class RpnHead(tf.keras.layers.Layer):
  """Region Proposal Network head."""
-  def __init__(self,
+  def __init__(
-               min_level,
+      self,
-               max_level,
+      min_level,
-               anchors_per_location,
+      max_level,
-               num_convs=2,
+      anchors_per_location,
-               num_filters=256,
+      num_convs=2,
-               use_separable_conv=False,
+      num_filters=256,
-               activation='relu',
+      use_separable_conv=False,
-               use_batch_norm=True,
+      activation='relu',
-               norm_activation=nn_ops.norm_activation_builder(
+      use_batch_norm=True,
-                   activation='relu')):
+      norm_activation=nn_ops.norm_activation_builder(activation='relu')):
    """Initialize params to build Region Proposal Network head.
    Args:
@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
        is used.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
    """
    self._min_level = min_level
    self._max_level = max_level
@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
 class FastrcnnHead(tf.keras.layers.Layer):
  """Fast R-CNN box head."""
-  def __init__(self,
+  def __init__(
-               num_classes,
+      self,
-               num_convs=0,
+      num_classes,
-               num_filters=256,
+      num_convs=0,
-               use_separable_conv=False,
+      num_filters=256,
-               num_fcs=2,
+      use_separable_conv=False,
-               fc_dims=1024,
+      num_fcs=2,
-               activation='relu',
+      fc_dims=1024,
-               use_batch_norm=True,
+      activation='relu',
-               norm_activation=nn_ops.norm_activation_builder(
+      use_batch_norm=True,
-                   activation='relu')):
+      norm_activation=nn_ops.norm_activation_builder(activation='relu')):
    """Initialize params to build Fast R-CNN box head.
    Args:
@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
        layers.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
    """
    self._num_classes = num_classes
@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
              strides=(1, 1),
              padding='same',
              dilation_rate=(1, 1),
-              activation=(None if self._use_batch_norm else self._activation_op),
+              activation=(None
+                          if self._use_batch_norm else self._activation_op),
              name='conv_{}'.format(i)))
      if self._use_batch_norm:
        self._conv_bn_ops.append(self._norm_activation())
@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
      self._fc_ops.append(
          tf.keras.layers.Dense(
              units=self._fc_dims,
-              activation=(None if self._use_batch_norm else self._activation_op),
+              activation=(None
+                          if self._use_batch_norm else self._activation_op),
              name='fc{}'.format(i)))
      if self._use_batch_norm:
        self._fc_bn_ops.append(self._norm_activation(fused=False))
@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
    """Box and class branches for the Mask-RCNN model.
    Args:
-      roi_features: A ROI feature tensor of shape
+      roi_features: A ROI feature tensor of shape [batch_size, num_rois,
-        [batch_size, num_rois, height_l, width_l, num_filters].
+        height_l, width_l, num_filters].
      is_training: `boolean`, if True if model is in training mode.
    Returns:
@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
 class MaskrcnnHead(tf.keras.layers.Layer):
  """Mask R-CNN head."""
-  def __init__(self,
+  def __init__(
-               num_classes,
+      self,
-               mask_target_size,
+      num_classes,
-               num_convs=4,
+      mask_target_size,
-               num_filters=256,
+      num_convs=4,
-               use_separable_conv=False,
+      num_filters=256,
-               activation='relu',
+      use_separable_conv=False,
-               use_batch_norm=True,
+      activation='relu',
-               norm_activation=nn_ops.norm_activation_builder(
+      use_batch_norm=True,
-                   activation='relu')):
+      norm_activation=nn_ops.norm_activation_builder(activation='relu')):
    """Initialize params to build Fast R-CNN head.
    Args:
@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
        is used.
      activation: activation function. Support 'relu' and 'swish'.
      use_batch_norm: 'bool', indicating whether batchnorm layers are added.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
    """
    self._num_classes = num_classes
    self._mask_target_size = mask_target_size
@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
              strides=(1, 1),
              padding='same',
              dilation_rate=(1, 1),
-              activation=(None if self._use_batch_norm else self._activation_op),
+              activation=(None
+                          if self._use_batch_norm else self._activation_op),
              name='mask-conv-l%d' % i))
    self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose(
        self._num_filters,
@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
    """Mask branch for the Mask-RCNN model.
    Args:
-      roi_features: A ROI feature tensor of shape
+      roi_features: A ROI feature tensor of shape [batch_size, num_rois,
-        [batch_size, num_rois, height_l, width_l, num_filters].
+        height_l, width_l, num_filters].
-      class_indices: a Tensor of shape [batch_size, num_rois], indicating
+      class_indices: a Tensor of shape [batch_size, num_rois], indicating which
-        which class the ROI is.
+        class the ROI is.
      is_training: `boolean`, if True if model is in training mode.
    Returns:
@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
 class RetinanetHead(object):
  """RetinaNet head."""
-  def __init__(self,
+  def __init__(
-               min_level,
+      self,
-               max_level,
+      min_level,
-               num_classes,
+      max_level,
-               anchors_per_location,
+      num_classes,
-               num_convs=4,
+      anchors_per_location,
-               num_filters=256,
+      num_convs=4,
-               use_separable_conv=False,
+      num_filters=256,
-               norm_activation=nn_ops.norm_activation_builder(
+      use_separable_conv=False,
-                   activation='relu')):
+      norm_activation=nn_ops.norm_activation_builder(activation='relu')):
    """Initialize params to build RetinaNet head.
    Args:
@@ -437,8 +440,8 @@ class RetinanetHead(object):
      num_filters: `int` number of filters used in the head architecture.
      use_separable_conv: `bool` to indicate whether to use separable
        convoluation.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
    """
    self._min_level = min_level
    self._max_level = max_level
@@ -600,12 +603,8 @@ class RetinanetHead(object):
 class ShapemaskPriorHead(object):
  """ShapeMask Prior head."""
-  def __init__(self,
+  def __init__(self, num_classes, num_downsample_channels, mask_crop_size,
-               num_classes,
+               use_category_for_mask, shape_prior_path):
-               num_downsample_channels,
-               mask_crop_size,
-               use_category_for_mask,
-               shape_prior_path):
    """Initialize params to build RetinaNet head.
    Args:
@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
    Args:
      fpn_features: a dictionary of FPN features.
-      boxes: a float tensor of shape [batch_size, num_instances, 4]
+      boxes: a float tensor of shape [batch_size, num_instances, 4] representing
-        representing the tight gt boxes from dataloader/detection.
+        the tight gt boxes from dataloader/detection.
      outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
        representing the loose gt boxes from dataloader/detection.
-      classes: a int Tensor of shape [batch_size, num_instances]
+      classes: a int Tensor of shape [batch_size, num_instances] of instance
-        of instance classes.
+        classes.
      is_training: training mode or not.
    Returns:
@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
      shape_priors = self._get_priors()
      # Get uniform priors for each outer box.
-      uniform_priors = tf.ones([batch_size, num_instances, self._mask_crop_size,
+      uniform_priors = tf.ones([
-                                self._mask_crop_size])
+          batch_size, num_instances, self._mask_crop_size, self._mask_crop_size
+      ])
      uniform_priors = spatial_transform_ops.crop_mask_in_target_box(
          uniform_priors, boxes, outer_boxes, self._mask_crop_size)
@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
          tf.cast(instance_features, tf.float32), uniform_priors, classes)
      instance_priors = tf.gather(shape_priors, classes)
-      instance_priors *= tf.expand_dims(tf.expand_dims(
+      instance_priors *= tf.expand_dims(
-          tf.cast(prior_distribution, tf.float32), axis=-1), axis=-1)
+          tf.expand_dims(tf.cast(prior_distribution, tf.float32), axis=-1),
+          axis=-1)
      instance_priors = tf.reduce_sum(instance_priors, axis=2)
      detection_priors = spatial_transform_ops.crop_mask_in_target_box(
          instance_priors, boxes, outer_boxes, self._mask_crop_size)
@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
      # If prior path does not exist, do not use priors, i.e., pirors equal to
      # uniform empty 32x32 patch.
      self._num_clusters = 1
-      priors = tf.zeros([self._mask_num_classes, self._num_clusters,
+      priors = tf.zeros([
-                         self._mask_crop_size, self._mask_crop_size])
+          self._mask_num_classes, self._num_clusters, self._mask_crop_size,
+          self._mask_crop_size
+      ])
    return priors
  def _classify_shape_priors(self, features, uniform_priors, classes):
@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
    category.
    Args:
-      features: A float Tensor of shape [batch_size, num_instances,
+      features: A float Tensor of shape [batch_size, num_instances, mask_size,
-        mask_size, mask_size, num_channels].
+        mask_size, num_channels].
      uniform_priors: A float Tensor of shape [batch_size, num_instances,
        mask_size, mask_size] representing the uniform detection priors.
-      classes: A int Tensor of shape [batch_size, num_instances]
+      classes: A int Tensor of shape [batch_size, num_instances] of detection
-        of detection class ids.
+        class ids.
    Returns:
      prior_distribution: A float Tensor of shape
@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
    features = tf.reduce_mean(features, axis=(2, 3))
    logits = tf.keras.layers.Dense(
        self._mask_num_classes * self._num_clusters,
-        kernel_initializer=tf.random_normal_initializer(stddev=0.01))(features)
+        kernel_initializer=tf.random_normal_initializer(stddev=0.01))(
-    logits = tf.reshape(logits,
+            features)
-                        [batch_size, num_instances,
+    logits = tf.reshape(
-                         self._mask_num_classes, self._num_clusters])
+        logits,
+        [batch_size, num_instances, self._mask_num_classes, self._num_clusters])
    if self._use_category_for_mask:
      logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2)
      logits = tf.squeeze(logits, axis=2)
@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
      use_category_for_mask: use class information in mask branch.
      num_convs: `int` number of stacked convolution before the last prediction
        layer.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
    """
    self._mask_num_classes = num_classes if use_category_for_mask else 1
    self._use_category_for_mask = use_category_for_mask
@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
    self._class_norm_activation = []
    for i in range(self._num_convs):
-      self._class_conv.append(tf.keras.layers.Conv2D(
+      self._class_conv.append(
-          self._num_downsample_channels,
+          tf.keras.layers.Conv2D(
-          kernel_size=(3, 3),
+              self._num_downsample_channels,
-          bias_initializer=tf.zeros_initializer(),
+              kernel_size=(3, 3),
-          kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
+              bias_initializer=tf.zeros_initializer(),
-          padding='same',
+              kernel_initializer=tf.keras.initializers.RandomNormal(
-          name='coarse-mask-class-%d' % i))
+                  stddev=0.01),
+              padding='same',
+              name='coarse-mask-class-%d' % i))
      self._class_norm_activation.append(
          norm_activation(name='coarse-mask-class-%d-bn' % i))
@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
        instance feature crop.
      detection_priors: a float Tensor of shape [batch_size, num_instances,
-        mask_crop_size, mask_crop_size, 1]. This is the detection prior for
+        mask_crop_size, mask_crop_size, 1]. This is the detection prior for the
-        the instance.
+        instance.
-      classes: a int Tensor of shape [batch_size, num_instances]
+      classes: a int Tensor of shape [batch_size, num_instances] of instance
-        of instance classes.
+        classes.
      is_training: a bool indicating whether in training mode.
    Returns:
@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
      # Gather the logits with right input class.
      if self._use_category_for_mask:
        mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
-        mask_logits = tf.gather(mask_logits, tf.expand_dims(classes, -1),
+        mask_logits = tf.gather(
-                                batch_dims=2)
+            mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
        mask_logits = tf.squeeze(mask_logits, axis=2)
      else:
        mask_logits = mask_logits[..., 0]
@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
    """
    (batch_size, num_instances, height, width,
     num_channels) = features.get_shape().as_list()
-    features = tf.reshape(features, [batch_size * num_instances, height, width,
+    features = tf.reshape(
-                                     num_channels])
+        features, [batch_size * num_instances, height, width, num_channels])
    for i in range(self._num_convs):
      features = self._class_conv[i](features)
-      features = self._class_norm_activation[i](features,
+      features = self._class_norm_activation[i](
-                                                is_training=is_training)
+          features, is_training=is_training)
    mask_logits = self._class_predict(features)
-    mask_logits = tf.reshape(mask_logits, [batch_size, num_instances, height,
+    mask_logits = tf.reshape(
-                                           width, self._mask_num_classes])
+        mask_logits,
+        [batch_size, num_instances, height, width, self._mask_num_classes])
    return mask_logits
@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
              activation=None,
              padding='same',
              name='fine-mask-class-%d' % i))
-      self._fine_class_bn.append(norm_activation(
+      self._fine_class_bn.append(
-          name='fine-mask-class-%d-bn' % i))
+          norm_activation(name='fine-mask-class-%d-bn' % i))
    self._class_predict_conv = tf.keras.layers.Conv2D(
        self._mask_num_classes,
@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
    https://arxiv.org/pdf/1904.03239.pdf
    Args:
-      features: a float Tensor of shape
+      features: a float Tensor of shape [batch_size, num_instances,
-        [batch_size, num_instances, mask_crop_size, mask_crop_size,
+        mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
-        num_downsample_channels]. This is the instance feature crop.
+        instance feature crop.
-      mask_logits: a float Tensor of shape
+      mask_logits: a float Tensor of shape [batch_size, num_instances,
-        [batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
+        mask_crop_size, mask_crop_size] indicating predicted mask logits.
-        predicted mask logits.
+      classes: a int Tensor of shape [batch_size, num_instances] of instance
-      classes: a int Tensor of shape [batch_size, num_instances]
+        classes.
-        of instance classes.
      is_training: a bool indicating whether in training mode.
    Returns:
@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
      mask_logits = self.decoder_net(features, is_training)
      if self._use_category_for_mask:
        mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
-        mask_logits = tf.gather(mask_logits,
+        mask_logits = tf.gather(
-                                tf.expand_dims(classes, -1), batch_dims=2)
+            mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
        mask_logits = tf.squeeze(mask_logits, axis=2)
      else:
        mask_logits = mask_logits[..., 0]
@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
    """
    (batch_size, num_instances, height, width,
     num_channels) = features.get_shape().as_list()
-    features = tf.reshape(features, [batch_size * num_instances, height, width,
+    features = tf.reshape(
-                                     num_channels])
+        features, [batch_size * num_instances, height, width, num_channels])
    for i in range(self._num_convs):
      features = self._fine_class_conv[i](features)
      features = self._fine_class_bn[i](features, is_training=is_training)
@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
    # Predict per-class instance masks.
    mask_logits = self._class_predict_conv(features)
-    mask_logits = tf.reshape(mask_logits,
+    mask_logits = tf.reshape(mask_logits, [
-                             [batch_size, num_instances,
+        batch_size, num_instances, height * self.up_sample_factor,
-                              height * self.up_sample_factor,
+        width * self.up_sample_factor, self._mask_num_classes
-                              width * self.up_sample_factor,
+    ])
-                              self._mask_num_classes])
    return mask_logits
--- a/official/vision/detection/modeling/architecture/nn_ops.py
+++ b/official/vision/detection/modeling/architecture/nn_ops.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import functools
 import tensorflow as tf
@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
        GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
        layer.
      init_zero: `bool` if True, initializes scale parameter of batch
-          normalization with 0. If False, initialize it with 1.
+        normalization with 0. If False, initialize it with 1.
      fused: `bool` fused option in batch normalziation.
      use_actiation: `bool`, whether to add the optional activation layer after
        the batch normalization layer.

--- a/official/vision/detection/modeling/architecture/resnet.py
+++ b/official/vision/detection/modeling/architecture/resnet.py
@@ -28,22 +28,23 @@ import tensorflow as tf
 from official.vision.detection.modeling.architecture import keras_utils
 from official.vision.detection.modeling.architecture import nn_ops
 # TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
 class Resnet(object):
  """Class to build ResNet family model."""
-  def __init__(self,
+  def __init__(
-               resnet_depth,
+      self,
-               activation='relu',
+      resnet_depth,
-               norm_activation=nn_ops.norm_activation_builder(
+      activation='relu',
-                   activation='relu'),
+      norm_activation=nn_ops.norm_activation_builder(activation='relu'),
-               data_format='channels_last'):
+      data_format='channels_last'):
    """ResNet initialization function.
    Args:
      resnet_depth: `int` depth of ResNet backbone model.
-      norm_activation: an operation that includes a normalization layer
+      norm_activation: an operation that includes a normalization layer followed
-        followed by an optional activation layer.
+        by an optional activation layer.
      data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    """
@@ -58,24 +59,45 @@ class Resnet(object):
    self._data_format = data_format
    model_params = {
-        10: {'block': self.residual_block, 'layers': [1, 1, 1, 1]},
+        10: {
-        18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]},
+            'block': self.residual_block,
-        34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]},
+            'layers': [1, 1, 1, 1]
-        50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]},
+        },
-        101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]},
+        18: {
-        152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]},
+            'block': self.residual_block,
-        200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]}
+            'layers': [2, 2, 2, 2]
+        },
+        34: {
+            'block': self.residual_block,
+            'layers': [3, 4, 6, 3]
+        },
+        50: {
+            'block': self.bottleneck_block,
+            'layers': [3, 4, 6, 3]
+        },
+        101: {
+            'block': self.bottleneck_block,
+            'layers': [3, 4, 23, 3]
+        },
+        152: {
+            'block': self.bottleneck_block,
+            'layers': [3, 8, 36, 3]
+        },
+        200: {
+            'block': self.bottleneck_block,
+            'layers': [3, 24, 36, 3]
+        }
    }
    if resnet_depth not in model_params:
      valid_resnet_depths = ', '.join(
          [str(depth) for depth in sorted(model_params.keys())])
      raise ValueError(
-          'The resnet_depth should be in [%s]. Not a valid resnet_depth:'%(
+          'The resnet_depth should be in [%s]. Not a valid resnet_depth:' %
-              valid_resnet_depths), self._resnet_depth)
+          (valid_resnet_depths), self._resnet_depth)
    params = model_params[resnet_depth]
-    self._resnet_fn = self.resnet_v1_generator(
+    self._resnet_fn = self.resnet_v1_generator(params['block'],
-        params['block'], params['layers'])
+                                               params['layers'])
  def __call__(self, inputs, is_training=None):
    """Returns the ResNet model for a given size and number of output classes.
@@ -98,10 +120,10 @@ class Resnet(object):
    """Pads the input along the spatial dimensions independently of input size.
    Args:
-      inputs: `Tensor` of size `[batch, channels, height, width]` or
+      inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch,
-          `[batch, height, width, channels]` depending on `data_format`.
+        height, width, channels]` depending on `data_format`.
      kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
-          operations. Should be a positive integer.
+        operations. Should be a positive integer.
    Returns:
      A padded `Tensor` of the same `data_format` with size either intact
@@ -160,14 +182,15 @@ class Resnet(object):
    Args:
      inputs: `Tensor` of size `[batch, channels, height, width]`.
      filters: `int` number of filters for the first two convolutions. Note that
-          the third and final convolution will use 4 times as many filters.
+        the third and final convolution will use 4 times as many filters.
      strides: `int` block stride. If greater than 1, this block will ultimately
-          downsample the input.
+        downsample the input.
      use_projection: `bool` for whether this block should use a projection
-          shortcut (versus the default identity shortcut). This is usually
+        shortcut (versus the default identity shortcut). This is usually `True`
-          `True` for the first block of a block group, which may change the
+        for the first block of a block group, which may change the number of
-          number of filters and the resolution.
+        filters and the resolution.
      is_training: `bool` if True, the model is in training mode.
    Returns:
      The output `Tensor` of the block.
    """
@@ -185,8 +208,9 @@ class Resnet(object):
    inputs = self.conv2d_fixed_padding(
        inputs=inputs, filters=filters, kernel_size=3, strides=1)
-    inputs = self._norm_activation(use_activation=False, init_zero=True)(
+    inputs = self._norm_activation(
-        inputs, is_training=is_training)
+        use_activation=False, init_zero=True)(
+            inputs, is_training=is_training)
    return self._activation_op(inputs + shortcut)
@@ -201,13 +225,13 @@ class Resnet(object):
    Args:
      inputs: `Tensor` of size `[batch, channels, height, width]`.
      filters: `int` number of filters for the first two convolutions. Note that
-          the third and final convolution will use 4 times as many filters.
+        the third and final convolution will use 4 times as many filters.
      strides: `int` block stride. If greater than 1, this block will ultimately
-          downsample the input.
+        downsample the input.
      use_projection: `bool` for whether this block should use a projection
-          shortcut (versus the default identity shortcut). This is usually
+        shortcut (versus the default identity shortcut). This is usually `True`
-          `True` for the first block of a block group, which may change the
+        for the first block of a block group, which may change the number of
-          number of filters and the resolution.
+        filters and the resolution.
      is_training: `bool` if True, the model is in training mode.
    Returns:
@@ -233,8 +257,9 @@ class Resnet(object):
    inputs = self.conv2d_fixed_padding(
        inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
-    inputs = self._norm_activation(use_activation=False, init_zero=True)(
+    inputs = self._norm_activation(
-        inputs, is_training=is_training)
+        use_activation=False, init_zero=True)(
+            inputs, is_training=is_training)
    return self._activation_op(inputs + shortcut)
@@ -248,7 +273,7 @@ class Resnet(object):
      block_fn: `function` for the block to use within the model
      blocks: `int` number of blocks contained in the layer.
      strides: `int` stride to use for the first convolution of the layer. If
-          greater than 1, this layer will downsample the input.
+        greater than 1, this layer will downsample the input.
      name: `str`name for the Tensor output of the block layer.
      is_training: `bool` if True, the model is in training mode.
@@ -256,8 +281,8 @@ class Resnet(object):
      The output `Tensor` of the block layer.
    """
    # Only the first block per block_group uses projection shortcut and strides.
-    inputs = block_fn(inputs, filters, strides, use_projection=True,
+    inputs = block_fn(
-                      is_training=is_training)
+        inputs, filters, strides, use_projection=True, is_training=is_training)
    for _ in range(1, blocks):
      inputs = block_fn(inputs, filters, 1, is_training=is_training)
@@ -269,7 +294,7 @@ class Resnet(object):
    Args:
      block_fn: `function` for the block to use within the model. Either
-          `residual_block` or `bottleneck_block`.
+        `residual_block` or `bottleneck_block`.
      layers: list of 4 `int`s denoting the number of blocks to include in each
        of the 4 block groups. Each group consists of blocks that take inputs of
        the same resolution.
@@ -293,17 +318,37 @@ class Resnet(object):
      inputs = tf.identity(inputs, 'initial_max_pool')
      c2 = self.block_group(
-          inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
+          inputs=inputs,
-          strides=1, name='block_group1', is_training=is_training)
+          filters=64,
+          block_fn=block_fn,
+          blocks=layers[0],
+          strides=1,
+          name='block_group1',
+          is_training=is_training)
      c3 = self.block_group(
-          inputs=c2, filters=128, block_fn=block_fn, blocks=layers[1],
+          inputs=c2,
-          strides=2, name='block_group2', is_training=is_training)
+          filters=128,
+          block_fn=block_fn,
+          blocks=layers[1],
+          strides=2,
+          name='block_group2',
+          is_training=is_training)
      c4 = self.block_group(
-          inputs=c3, filters=256, block_fn=block_fn, blocks=layers[2],
+          inputs=c3,
-          strides=2, name='block_group3', is_training=is_training)
+          filters=256,
+          block_fn=block_fn,
+          blocks=layers[2],
+          strides=2,
+          name='block_group3',
+          is_training=is_training)
      c5 = self.block_group(
-          inputs=c4, filters=512, block_fn=block_fn, blocks=layers[3],
+          inputs=c4,
-          strides=2, name='block_group4', is_training=is_training)
+          filters=512,
+          block_fn=block_fn,
+          blocks=layers[3],
+          strides=2,
+          name='block_group4',
+          is_training=is_training)
      return {2: c2, 3: c3, 4: c4, 5: c5}
    return model
--- a/official/vision/detection/modeling/base_model.py
+++ b/official/vision/detection/modeling/base_model.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import abc
 import functools
 import re
 import tensorflow as tf
 from official.vision.detection.modeling import checkpoint_utils
 from official.vision.detection.modeling import learning_rates
@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
    # frozen_variable_prefix: a regex string specifing the prefix pattern of
    # the frozen variables' names.
    filtered_variables = [
-        v for v in variables
+        v for v in variables if not frozen_variable_prefix or
-        if not frozen_variable_prefix or
        not re.match(frozen_variable_prefix, v.name)
    ]
    return filtered_variables
@@ -115,8 +115,8 @@ class Model(object):
  def weight_decay_loss(self, trainable_variables):
    reg_variables = [
        v for v in trainable_variables
-        if self._regularization_var_regex is None
+        if self._regularization_var_regex is None or
-        or re.match(self._regularization_var_regex, v.name)
+        re.match(self._regularization_var_regex, v.name)
    ]
    return self._l2_weight_decay * tf.add_n(

--- a/official/vision/detection/modeling/checkpoint_utils.py
+++ b/official/vision/detection/modeling/checkpoint_utils.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x
+"""Util functions for loading checkpoints.
+Especially for loading Tensorflow 1.x
 checkpoint to Tensorflow 2.x (keras) model.
 """
@@ -20,18 +22,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import re
 from absl import logging
 import tensorflow as tf
 def _build_assignment_map(keras_model,
-                         prefix='',
+                          prefix='',
-                         skip_variables_regex=None,
+                          skip_variables_regex=None,
-                         var_to_shape_map=None):
+                          var_to_shape_map=None):
  """Compute an assignment mapping for loading older checkpoints into a Keras
  model. Variable names are remapped from the original TPUEstimator model to
  the new Keras name.
@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
  """
  assignment_map = {}
  checkpoint_names = None
  if var_to_shape_map:
-    checkpoint_names = list(filter(
+    checkpoint_names = list(
-        lambda x: not x.endswith('Momentum') and not x.endswith(
+        filter(
-            'global_step'), var_to_shape_map.keys()))
+            lambda x: not x.endswith('Momentum') and not x.endswith(
+                'global_step'), var_to_shape_map.keys()))
  for var in keras_model.variables:
    var_name = var.name
@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
 def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
  """Returns scaffold function to restore parameters from v1 checkpoint.
  Args:
    checkpoint_path: path of the checkpoint folder or file.
      Example 1: '/path/to/model_dir/'
      Example 2: '/path/to/model.ckpt-22500'
    prefix: prefix in the variable name to be remove for alignment with names in
      the checkpoint.
-    skip_regex: regular expression to math the names of variables that
+    skip_regex: regular expression to math the names of variables that do not
-      do not need to be assign.
+      need to be assign.
  Returns:
    Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
        var_to_shape_map=var_to_shape_map)
    if not vars_to_load:
      raise ValueError('Variables to load is empty.')
-    tf.compat.v1.train.init_from_checkpoint(checkpoint_path,
+    tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load)
-                                            vars_to_load)
  return _restore_checkpoint_fn
--- a/official/vision/detection/modeling/learning_rates.py
+++ b/official/vision/detection/modeling/learning_rates.py
@@ -25,7 +25,8 @@ import tensorflow as tf
 from official.modeling.hyperparams import params_dict
-class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
+class StepLearningRateWithLinearWarmup(
+    tf.keras.optimizers.schedules.LearningRateSchedule):
  """Class to generate learning rate tensor."""
  def __init__(self, total_steps, params):
@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
    return {'_params': self._params.as_dict()}
-class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
+class CosineLearningRateWithLinearWarmup(
+    tf.keras.optimizers.schedules.LearningRateSchedule):
  """Class to generate learning rate tensor."""
  def __init__(self, total_steps, params):

--- a/official/vision/detection/modeling/maskrcnn_model.py
+++ b/official/vision/detection/modeling/maskrcnn_model.py
@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
      box_targets = tf.where(
          tf.tile(
              tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
-              [1, 1, 4]),
+              [1, 1, 4]), tf.zeros_like(box_targets), box_targets)
-          tf.zeros_like(box_targets),
-          box_targets)
      model_outputs.update({
          'class_targets': matched_gt_classes,
          'box_targets': box_targets,
@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
                                    mask_outputs),
      })
    else:
-      model_outputs.update({
+      model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)})
-          'detection_masks': tf.nn.sigmoid(mask_outputs)
-      })
    return model_outputs
@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
    required_output_fields = ['class_outputs', 'box_outputs']
    for field in required_output_fields:
      if field not in outputs:
-        raise ValueError('"%s" is missing in outputs, requried %s found %s'
+        raise ValueError('"%s" is missing in outputs, requried %s found %s' %
-                         %(field, required_output_fields, outputs.keys()))
+                         (field, required_output_fields, outputs.keys()))
    predictions = {
        'image_info': labels['image_info'],
        'num_detections': outputs['num_detections'],