Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

3ce2f61b · Kaushik Shivakumar · bb16d5ca · 8e9296ff · 3ce2f61b · 3ce2f61b
Commit 3ce2f61b authored Jul 12, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/attention_ocr/README.md
+++ b/research/attention_ocr/README.md
@@ -166,6 +166,14 @@ implement one in Python or C++.
 The recommended way is to use the [Serving infrastructure][serving].
+To export to SavedModel format:
+```
+python model_export.py \
+  --checkpoint=model.ckpt-399731 \
+  --export_dir=/tmp/attention_ocr_export
+```
 Alternatively you can:
 1. define a placeholder for images (or use directly an numpy array)
 2. [create a graph ](https://github.com/tensorflow/models/blob/master/research/attention_ocr/python/eval.py#L60)
@@ -188,7 +196,7 @@ other than a one time experiment please use the [TensorFlow Serving][serving].
 [1]: https://github.com/tensorflow/tensorflow/blob/aaf7adc/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py
 [2]: https://www.tensorflow.org/api_docs/python/tf/contrib/framework/assign_from_checkpoint_fn
-[serving]: https://tensorflow.github.io/serving/serving_basic
+[serving]: https://www.tensorflow.org/tfx/serving/serving_basic
 ## Disclaimer

--- a/research/attention_ocr/python/common_flags.py
+++ b/research/attention_ocr/python/common_flags.py
@@ -14,10 +14,10 @@
 # ==============================================================================
 """Define flags are common for both train.py and eval.py scripts."""
+import logging
 import sys
 from tensorflow.python.platform import flags
-import logging
 import datasets
 import model
@@ -35,9 +35,17 @@ logging.basicConfig(
    datefmt='%Y-%m-%d %H:%M:%S')
+_common_flags_defined = False
 def define():
  """Define common flags."""
  # yapf: disable
+  # common_flags.define() may be called multiple times in unit tests.
+  global _common_flags_defined
+  if _common_flags_defined:
+    return
+  _common_flags_defined = True
  flags.DEFINE_integer('batch_size', 32,
                       'Batch size.')
@@ -74,7 +82,7 @@ def define():
                      'the optimizer to use')
  flags.DEFINE_float('momentum', 0.9,
-                      'momentum value for the momentum optimizer if used')
+                     'momentum value for the momentum optimizer if used')
  flags.DEFINE_bool('use_augment_input', True,
                    'If True will use image augmentation')

--- a/research/attention_ocr/python/data_provider.py
+++ b/research/attention_ocr/python/data_provider.py
@@ -144,9 +144,6 @@ def preprocess_image(image, augment=False, central_crop_size=None,
        images = [augment_image(img) for img in images]
      image = tf.concat(images, 1)
-    image = tf.subtract(image, 0.5)
-    image = tf.multiply(image, 2.5)
  return image

--- a/research/attention_ocr/python/datasets/fsns.py
+++ b/research/attention_ocr/python/datasets/fsns.py
@@ -177,6 +177,8 @@ def get_split(split_name, dataset_dir=None, config=None):
      items_to_descriptions=config['items_to_descriptions'],
      #  additional parameters for convenience.
      charset=charset,
+      charset_file=charset_file,
+      image_shape=config['image_shape'],
      num_char_classes=len(charset),
      num_of_views=config['num_of_views'],
      max_sequence_length=config['max_sequence_length'],

--- a/research/attention_ocr/python/model.py
+++ b/research/attention_ocr/python/model.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Functions to build the Attention OCR model.
 Usage example:
@@ -26,6 +25,7 @@ Usage example:
 import sys
 import collections
 import logging
+import numpy as np
 import tensorflow as tf
 from tensorflow.contrib import slim
 from tensorflow.contrib.slim.nets import inception
@@ -35,29 +35,28 @@ import sequence_layers
 import utils
 OutputEndpoints = collections.namedtuple('OutputEndpoints', [
-  'chars_logit', 'chars_log_prob', 'predicted_chars', 'predicted_scores',
+    'chars_logit', 'chars_log_prob', 'predicted_chars', 'predicted_scores',
-  'predicted_text'
+    'predicted_text', 'predicted_length', 'predicted_conf',
+    'normalized_seq_conf'
 ])
 # TODO(gorban): replace with tf.HParams when it is released.
-ModelParams = collections.namedtuple('ModelParams', [
+ModelParams = collections.namedtuple(
-  'num_char_classes', 'seq_length', 'num_views', 'null_code'
+    'ModelParams', ['num_char_classes', 'seq_length', 'num_views', 'null_code'])
-])
 ConvTowerParams = collections.namedtuple('ConvTowerParams', ['final_endpoint'])
 SequenceLogitsParams = collections.namedtuple('SequenceLogitsParams', [
-  'use_attention', 'use_autoregression', 'num_lstm_units', 'weight_decay',
+    'use_attention', 'use_autoregression', 'num_lstm_units', 'weight_decay',
-  'lstm_state_clip_value'
+    'lstm_state_clip_value'
 ])
-SequenceLossParams = collections.namedtuple('SequenceLossParams', [
+SequenceLossParams = collections.namedtuple(
-  'label_smoothing', 'ignore_nulls', 'average_across_timesteps'
+    'SequenceLossParams',
-])
+    ['label_smoothing', 'ignore_nulls', 'average_across_timesteps'])
-EncodeCoordinatesParams = collections.namedtuple('EncodeCoordinatesParams', [
+EncodeCoordinatesParams = collections.namedtuple('EncodeCoordinatesParams',
-  'enabled'
+                                                 ['enabled'])
-])
 def _dict_to_array(id_to_char, default_character):
@@ -85,16 +84,16 @@ class CharsetMapper(object):
    """
    mapping_strings = tf.constant(_dict_to_array(charset, default_character))
    self.table = tf.contrib.lookup.index_to_string_table_from_tensor(
-      mapping=mapping_strings, default_value=default_character)
+        mapping=mapping_strings, default_value=default_character)
  def get_text(self, ids):
    """Returns a string corresponding to a sequence of character ids.
        Args:
          ids: a tensor with shape [batch_size, max_sequence_length]
-        """
+    """
    return tf.reduce_join(
-      self.table.lookup(tf.to_int64(ids)), reduction_indices=1)
+        self.table.lookup(tf.to_int64(ids)), reduction_indices=1)
 def get_softmax_loss_fn(label_smoothing):
@@ -111,16 +110,152 @@ def get_softmax_loss_fn(label_smoothing):
    def loss_fn(labels, logits):
      return (tf.nn.softmax_cross_entropy_with_logits(
-        logits=logits, labels=labels))
+          logits=logits, labels=labels))
  else:
    def loss_fn(labels, logits):
      return tf.nn.sparse_softmax_cross_entropy_with_logits(
-        logits=logits, labels=labels)
+          logits=logits, labels=labels)
  return loss_fn
+def get_tensor_dimensions(tensor):
+  """Returns the shape components of a 4D tensor with variable batch size.
+  Args:
+    tensor : A 4D tensor, whose last 3 dimensions are known at graph
+      construction time.
+  Returns:
+    batch_size : The first dimension as a tensor object.
+    height : The second dimension as a scalar value.
+    width : The third dimension as a scalar value.
+    num_features : The forth dimension as a scalar value.
+  Raises:
+    ValueError: if input tensor does not have 4 dimensions.
+  """
+  if len(tensor.get_shape().dims) != 4:
+    raise ValueError(
+        'Incompatible shape: len(tensor.get_shape().dims) != 4 (%d != 4)' %
+        len(tensor.get_shape().dims))
+  batch_size = tf.shape(tensor)[0]
+  height = tensor.get_shape().dims[1].value
+  width = tensor.get_shape().dims[2].value
+  num_features = tensor.get_shape().dims[3].value
+  return batch_size, height, width, num_features
+def lookup_indexed_value(indices, row_vecs):
+  """Lookup values in each row of 'row_vecs' indexed by 'indices'.
+  For each sample in the batch, look up the element for the corresponding
+  index.
+  Args:
+    indices : A tensor of shape (batch, )
+    row_vecs : A tensor of shape [batch, depth]
+  Returns:
+    A tensor of shape (batch, ) formed by row_vecs[i, indices[i]].
+  """
+  gather_indices = tf.stack((tf.range(
+      tf.shape(row_vecs)[0], dtype=tf.int32), tf.cast(indices, tf.int32)),
+                            axis=1)
+  return tf.gather_nd(row_vecs, gather_indices)
+@utils.ConvertAllInputsToTensors
+def max_char_logprob_cumsum(char_log_prob):
+  """Computes the cumulative sum of character logprob for all sequence lengths.
+  Args:
+    char_log_prob: A tensor of shape [batch x seq_length x num_char_classes]
+      with log probabilities of a character.
+  Returns:
+    A tensor of shape [batch x (seq_length+1)] where each element x[_, j] is
+    the sum of the max char logprob for all positions upto j.
+    Note this duplicates the final column and produces (seq_length+1) columns
+    so the same function can be used regardless whether use_length_predictions
+    is true or false.
+  """
+  max_char_log_prob = tf.reduce_max(char_log_prob, reduction_indices=2)
+  # For an input array [a, b, c]) tf.cumsum returns [a, a + b, a + b + c] if
+  # exclusive set to False (default).
+  return tf.cumsum(max_char_log_prob, axis=1, exclusive=False)
+def find_length_by_null(predicted_chars, null_code):
+  """Determine sequence length by finding null_code among predicted char IDs.
+  Given the char class ID for each position, compute the sequence length.
+  Note that this function computes this based on the number of null_code,
+  instead of the position of the first null_code.
+  Args:
+    predicted_chars: A tensor of [batch x seq_length] where each element stores
+      the char class ID with max probability;
+    null_code: an int32, character id for the NULL.
+  Returns:
+    A [batch, ] tensor which stores the sequence length for each sample.
+  """
+  return tf.reduce_sum(
+      tf.cast(tf.not_equal(null_code, predicted_chars), tf.int32), axis=1)
+def axis_pad(tensor, axis, before=0, after=0, constant_values=0.0):
+  """Pad a tensor with the specified values along a single axis.
+  Args:
+    tensor: a Tensor;
+    axis: the dimension to add pad along to;
+    before: number of values to add before the contents of tensor in the
+      selected dimension;
+    after: number of values to add after the contents of tensor in the selected
+      dimension;
+    constant_values: the scalar pad value to use. Must be same type as tensor.
+  Returns:
+    A Tensor. Has the same type as the input tensor, but with a changed shape
+    along the specified dimension.
+  """
+  if before == 0 and after == 0:
+    return tensor
+  ndims = tensor.shape.ndims
+  padding_size = np.zeros((ndims, 2), dtype='int32')
+  padding_size[axis] = before, after
+  return tf.pad(
+      tensor=tensor,
+      paddings=tf.constant(padding_size),
+      constant_values=constant_values)
+def null_based_length_prediction(chars_log_prob, null_code):
+  """Computes length and confidence of prediction based on positions of NULLs.
+  Args:
+    chars_log_prob: A tensor of shape [batch x seq_length x num_char_classes]
+      with log probabilities of a character;
+    null_code: an int32, character id for the NULL.
+  Returns:
+    A tuple (text_log_prob, predicted_length), where
+    text_log_prob - is a tensor of the same shape as length_log_prob.
+    Element #0 of the output corresponds to probability of the empty string,
+    element #seq_length - is the probability of length=seq_length.
+    predicted_length is a tensor with shape [batch].
+  """
+  predicted_chars = tf.to_int32(tf.argmax(chars_log_prob, axis=2))
+  # We do right pad to support sequences with seq_length elements.
+  text_log_prob = max_char_logprob_cumsum(
+      axis_pad(chars_log_prob, axis=1, after=1))
+  predicted_length = find_length_by_null(predicted_chars, null_code)
+  return text_log_prob, predicted_length
 class Model(object):
  """Class to create the Attention OCR Model."""
@@ -137,24 +272,24 @@ class Model(object):
      num_char_classes: size of character set.
      seq_length: number of characters in a sequence.
      num_views: Number of views (conv towers) to use.
-      null_code: A character code corresponding to a character which
+      null_code: A character code corresponding to a character which indicates
-        indicates end of a sequence.
+        end of a sequence.
-      mparams: a dictionary with hyper parameters for methods,  keys -
+      mparams: a dictionary with hyper parameters for methods,  keys - function
-        function names, values - corresponding namedtuples.
+        names, values - corresponding namedtuples.
      charset: an optional dictionary with a mapping between character ids and
-        utf8 strings. If specified the OutputEndpoints.predicted_text will
+        utf8 strings. If specified the OutputEndpoints.predicted_text will utf8
-        utf8 encoded strings corresponding to the character ids returned by
+        encoded strings corresponding to the character ids returned by
        OutputEndpoints.predicted_chars (by default the predicted_text contains
-        an empty vector). 
+        an empty vector).
        NOTE: Make sure you call tf.tables_initializer().run() if the charset
-        specified.
+          specified.
    """
    super(Model, self).__init__()
    self._params = ModelParams(
-      num_char_classes=num_char_classes,
+        num_char_classes=num_char_classes,
-      seq_length=seq_length,
+        seq_length=seq_length,
-      num_views=num_views,
+        num_views=num_views,
-      null_code=null_code)
+        null_code=null_code)
    self._mparams = self.default_mparams()
    if mparams:
      self._mparams.update(mparams)
@@ -162,21 +297,22 @@ class Model(object):
  def default_mparams(self):
    return {
-      'conv_tower_fn':
+        'conv_tower_fn':
-        ConvTowerParams(final_endpoint='Mixed_5d'),
+            ConvTowerParams(final_endpoint='Mixed_5d'),
-      'sequence_logit_fn':
+        'sequence_logit_fn':
-        SequenceLogitsParams(
+            SequenceLogitsParams(
-          use_attention=True,
+                use_attention=True,
-          use_autoregression=True,
+                use_autoregression=True,
-          num_lstm_units=256,
+                num_lstm_units=256,
-          weight_decay=0.00004,
+                weight_decay=0.00004,
-          lstm_state_clip_value=10.0),
+                lstm_state_clip_value=10.0),
-      'sequence_loss_fn':
+        'sequence_loss_fn':
-        SequenceLossParams(
+            SequenceLossParams(
-          label_smoothing=0.1,
+                label_smoothing=0.1,
-          ignore_nulls=True,
+                ignore_nulls=True,
-          average_across_timesteps=False),
+                average_across_timesteps=False),
-      'encode_coordinates_fn': EncodeCoordinatesParams(enabled=False)
+        'encode_coordinates_fn':
+            EncodeCoordinatesParams(enabled=False)
    }
  def set_mparam(self, function, **kwargs):
@@ -205,7 +341,7 @@ class Model(object):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
          net, _ = inception.inception_v3_base(
-            images, final_endpoint=mparams.final_endpoint)
+              images, final_endpoint=mparams.final_endpoint)
      return net
  def _create_lstm_inputs(self, net):
@@ -222,10 +358,10 @@ class Model(object):
    """
    num_features = net.get_shape().dims[1].value
    if num_features < self._params.seq_length:
-      raise AssertionError('Incorrect dimension #1 of input tensor'
+      raise AssertionError(
-                           ' %d should be bigger than %d (shape=%s)' %
+          'Incorrect dimension #1 of input tensor'
-                           (num_features, self._params.seq_length,
+          ' %d should be bigger than %d (shape=%s)' %
-                            net.get_shape()))
+          (num_features, self._params.seq_length, net.get_shape()))
    elif num_features > self._params.seq_length:
      logging.warning('Ignoring some features: use %d of %d (shape=%s)',
                      self._params.seq_length, num_features, net.get_shape())
@@ -252,7 +388,7 @@ class Model(object):
      A tensor with the same size as any input tensors.
    """
    batch_size, height, width, num_features = [
-      d.value for d in nets_list[0].get_shape().dims
+        d.value for d in nets_list[0].get_shape().dims
    ]
    xy_flat_shape = (batch_size, 1, height * width, num_features)
    nets_for_merge = []
@@ -261,7 +397,7 @@ class Model(object):
        nets_for_merge.append(tf.reshape(net, xy_flat_shape))
      merged_net = tf.concat(nets_for_merge, 1)
      net = slim.max_pool2d(
-        merged_net, kernel_size=[len(nets_list), 1], stride=1)
+          merged_net, kernel_size=[len(nets_list), 1], stride=1)
      net = tf.reshape(net, (batch_size, height, width, num_features))
    return net
@@ -279,16 +415,17 @@ class Model(object):
    """
    with tf.variable_scope('pool_views_fn/STCK'):
      net = tf.concat(nets, 1)
-      batch_size = net.get_shape().dims[0].value
+      batch_size = tf.shape(net)[0]
+      image_size = net.get_shape().dims[1].value * net.get_shape().dims[2].value
      feature_size = net.get_shape().dims[3].value
-      return tf.reshape(net, [batch_size, -1, feature_size])
+      return tf.reshape(net, tf.stack([batch_size, image_size, feature_size]))
  def char_predictions(self, chars_logit):
    """Returns confidence scores (softmax values) for predicted characters.
    Args:
-      chars_logit: chars logits, a tensor with shape
+      chars_logit: chars logits, a tensor with shape [batch_size x seq_length x
-        [batch_size x seq_length x num_char_classes]
+        num_char_classes]
    Returns:
      A tuple (ids, log_prob, scores), where:
@@ -303,10 +440,13 @@ class Model(object):
    log_prob = utils.logits_to_log_prob(chars_logit)
    ids = tf.to_int32(tf.argmax(log_prob, axis=2), name='predicted_chars')
    mask = tf.cast(
-      slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
+        slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
    all_scores = tf.nn.softmax(chars_logit)
    selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores')
-    scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length))
+    scores = tf.reshape(
+        selected_scores,
+        shape=(-1, self._params.seq_length),
+        name='predicted_scores')
    return ids, log_prob, scores
  def encode_coordinates_fn(self, net):
@@ -323,12 +463,12 @@ class Model(object):
    """
    mparams = self._mparams['encode_coordinates_fn']
    if mparams.enabled:
-      batch_size, h, w, _ = net.shape.as_list()
+      batch_size, h, w, _ = get_tensor_dimensions(net)
      x, y = tf.meshgrid(tf.range(w), tf.range(h))
      w_loc = slim.one_hot_encoding(x, num_classes=w)
      h_loc = slim.one_hot_encoding(y, num_classes=h)
      loc = tf.concat([h_loc, w_loc], 2)
-      loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
+      loc = tf.tile(tf.expand_dims(loc, 0), tf.stack([batch_size, 1, 1, 1]))
      return tf.concat([net, loc], 3)
    else:
      return net
@@ -341,7 +481,8 @@ class Model(object):
    """Creates a base part of the Model (no gradients, losses or summaries).
    Args:
-      images: A tensor of shape [batch_size, height, width, channels].
+      images: A tensor of shape [batch_size, height, width, channels] with pixel
+        values in the range [0.0, 1.0].
      labels_one_hot: Optional (can be None) one-hot encoding for ground truth
        labels. If provided the function will create a model for training.
      scope: Optional variable_scope.
@@ -353,14 +494,19 @@ class Model(object):
    """
    logging.debug('images: %s', images)
    is_training = labels_one_hot is not None
+    # Normalize image pixel values to have a symmetrical range around zero.
+    images = tf.subtract(images, 0.5)
+    images = tf.multiply(images, 2.5)
    with tf.variable_scope(scope, reuse=reuse):
      views = tf.split(
-        value=images, num_or_size_splits=self._params.num_views, axis=2)
+          value=images, num_or_size_splits=self._params.num_views, axis=2)
      logging.debug('Views=%d single view: %s', len(views), views[0])
      nets = [
-        self.conv_tower_fn(v, is_training, reuse=(i != 0))
+          self.conv_tower_fn(v, is_training, reuse=(i != 0))
-        for i, v in enumerate(views)
+          for i, v in enumerate(views)
      ]
      logging.debug('Conv tower: %s', nets[0])
@@ -374,18 +520,34 @@ class Model(object):
      logging.debug('chars_logit: %s', chars_logit)
      predicted_chars, chars_log_prob, predicted_scores = (
-        self.char_predictions(chars_logit))
+          self.char_predictions(chars_logit))
      if self._charset:
        character_mapper = CharsetMapper(self._charset)
        predicted_text = character_mapper.get_text(predicted_chars)
      else:
        predicted_text = tf.constant([])
+      text_log_prob, predicted_length = null_based_length_prediction(
+          chars_log_prob, self._params.null_code)
+      predicted_conf = lookup_indexed_value(predicted_length, text_log_prob)
+      # Convert predicted confidence from sum of logs to geometric mean
+      normalized_seq_conf = tf.exp(
+          tf.divide(predicted_conf,
+                    tf.cast(predicted_length + 1, predicted_conf.dtype)),
+          name='normalized_seq_conf')
+      predicted_conf = tf.identity(predicted_conf, name='predicted_conf')
+      predicted_text = tf.identity(predicted_text, name='predicted_text')
+      predicted_length = tf.identity(predicted_length, name='predicted_length')
    return OutputEndpoints(
-      chars_logit=chars_logit,
+        chars_logit=chars_logit,
-      chars_log_prob=chars_log_prob,
+        chars_log_prob=chars_log_prob,
-      predicted_chars=predicted_chars,
+        predicted_chars=predicted_chars,
-      predicted_scores=predicted_scores,
+        predicted_scores=predicted_scores,
-      predicted_text=predicted_text)
+        predicted_length=predicted_length,
+        predicted_text=predicted_text,
+        predicted_conf=predicted_conf,
+        normalized_seq_conf=normalized_seq_conf)
  def create_loss(self, data, endpoints):
    """Creates all losses required to train the model.
@@ -413,15 +575,15 @@ class Model(object):
    Uses the same method as in https://arxiv.org/abs/1512.00567.
    Args:
-      chars_labels: ground truth ids of charactes,
+      chars_labels: ground truth ids of charactes, shape=[batch_size,
-        shape=[batch_size, seq_length];
+        seq_length];
      weight: label-smoothing regularization weight.
    Returns:
      A sensor with the same shape as the input.
    """
    one_hot_labels = tf.one_hot(
-      chars_labels, depth=self._params.num_char_classes, axis=-1)
+        chars_labels, depth=self._params.num_char_classes, axis=-1)
    pos_weight = 1.0 - weight
    neg_weight = weight / self._params.num_char_classes
    return one_hot_labels * pos_weight + neg_weight
@@ -433,10 +595,10 @@ class Model(object):
    also ignore all null chars after the first one.
    Args:
-      chars_logits: logits for predicted characters,
+      chars_logits: logits for predicted characters, shape=[batch_size,
-        shape=[batch_size, seq_length, num_char_classes];
+        seq_length, num_char_classes];
-      chars_labels: ground truth ids of characters,
+      chars_labels: ground truth ids of characters, shape=[batch_size,
-        shape=[batch_size, seq_length];
+        seq_length];
      mparams: method hyper parameters.
    Returns:
@@ -446,7 +608,7 @@ class Model(object):
    with tf.variable_scope('sequence_loss_fn/SLF'):
      if mparams.label_smoothing > 0:
        smoothed_one_hot_labels = self.label_smoothing_regularization(
-          chars_labels, mparams.label_smoothing)
+            chars_labels, mparams.label_smoothing)
        labels_list = tf.unstack(smoothed_one_hot_labels, axis=1)
      else:
        # NOTE: in case of sparse softmax we are not using one-hot
@@ -459,20 +621,20 @@ class Model(object):
      else:
        # Suppose that reject character is the last in the charset.
        reject_char = tf.constant(
-          self._params.num_char_classes - 1,
+            self._params.num_char_classes - 1,
-          shape=(batch_size, seq_length),
+            shape=(batch_size, seq_length),
-          dtype=tf.int64)
+            dtype=tf.int64)
        known_char = tf.not_equal(chars_labels, reject_char)
        weights = tf.to_float(known_char)
      logits_list = tf.unstack(chars_logits, axis=1)
      weights_list = tf.unstack(weights, axis=1)
      loss = tf.contrib.legacy_seq2seq.sequence_loss(
-        logits_list,
+          logits_list,
-        labels_list,
+          labels_list,
-        weights_list,
+          weights_list,
-        softmax_loss_function=get_softmax_loss_fn(mparams.label_smoothing),
+          softmax_loss_function=get_softmax_loss_fn(mparams.label_smoothing),
-        average_across_timesteps=mparams.average_across_timesteps)
+          average_across_timesteps=mparams.average_across_timesteps)
      tf.losses.add_loss(loss)
      return loss
@@ -482,8 +644,8 @@ class Model(object):
    Args:
      data: InputEndpoints namedtuple.
      endpoints: OutputEndpoints namedtuple.
-      charset: A dictionary with mapping between character codes and
+      charset: A dictionary with mapping between character codes and unicode
-        unicode characters. Use the one provided by a dataset.charset.
+        characters. Use the one provided by a dataset.charset.
      is_training: If True will create summary prefixes for training job,
        otherwise - for evaluation.
@@ -507,7 +669,7 @@ class Model(object):
    if is_training:
      tf.summary.image(
-        sname('image/orig'), data.images_orig, max_outputs=max_outputs)
+          sname('image/orig'), data.images_orig, max_outputs=max_outputs)
      for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)
      return None
@@ -520,32 +682,35 @@ class Model(object):
        names_to_values[name] = value_update_tuple[0]
        names_to_updates[name] = value_update_tuple[1]
-      use_metric('CharacterAccuracy',
+      use_metric(
-                 metrics.char_accuracy(
+          'CharacterAccuracy',
-                   endpoints.predicted_chars,
+          metrics.char_accuracy(
-                   data.labels,
+              endpoints.predicted_chars,
-                   streaming=True,
+              data.labels,
-                   rej_char=self._params.null_code))
+              streaming=True,
+              rej_char=self._params.null_code))
      # Sequence accuracy computed by cutting sequence at the first null char
-      use_metric('SequenceAccuracy',
+      use_metric(
-                 metrics.sequence_accuracy(
+          'SequenceAccuracy',
-                   endpoints.predicted_chars,
+          metrics.sequence_accuracy(
-                   data.labels,
+              endpoints.predicted_chars,
-                   streaming=True,
+              data.labels,
-                   rej_char=self._params.null_code))
+              streaming=True,
+              rej_char=self._params.null_code))
      for name, value in names_to_values.items():
        summary_name = 'eval/' + name
        tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name))
      return list(names_to_updates.values())
-  def create_init_fn_to_restore(self, master_checkpoint,
+  def create_init_fn_to_restore(self,
+                                master_checkpoint,
                                inception_checkpoint=None):
    """Creates an init operations to restore weights from various checkpoints.
    Args:
-      master_checkpoint: path to a checkpoint which contains all weights for
+      master_checkpoint: path to a checkpoint which contains all weights for the
-        the whole model.
+        whole model.
      inception_checkpoint: path to a checkpoint which contains weights for the
        inception part only.
@@ -556,8 +721,8 @@ class Model(object):
    all_feed_dict = {}
    def assign_from_checkpoint(variables, checkpoint):
-      logging.info('Request to re-store %d weights from %s',
+      logging.info('Request to re-store %d weights from %s', len(variables),
-                   len(variables), checkpoint)
+                   checkpoint)
      if not variables:
        logging.error('Can\'t find any variables to restore.')
        sys.exit(1)
@@ -565,15 +730,18 @@ class Model(object):
      all_assign_ops.append(assign_op)
      all_feed_dict.update(feed_dict)
-    logging.info('variables_to_restore:\n%s' % utils.variables_to_restore().keys())
+    logging.info('variables_to_restore:\n%s',
-    logging.info('moving_average_variables:\n%s' % [v.op.name for v in tf.moving_average_variables()])
+                 utils.variables_to_restore().keys())
-    logging.info('trainable_variables:\n%s' % [v.op.name for v in tf.trainable_variables()])
+    logging.info('moving_average_variables:\n%s',
+                 [v.op.name for v in tf.moving_average_variables()])
+    logging.info('trainable_variables:\n%s',
+                 [v.op.name for v in tf.trainable_variables()])
    if master_checkpoint:
      assign_from_checkpoint(utils.variables_to_restore(), master_checkpoint)
    if inception_checkpoint:
      variables = utils.variables_to_restore(
-        'AttentionOcr_v1/conv_tower_fn/INCE', strip_scope=True)
+          'AttentionOcr_v1/conv_tower_fn/INCE', strip_scope=True)
      assign_from_checkpoint(variables, inception_checkpoint)
    def init_assign_fn(sess):

--- a/research/attention_ocr/python/model_export.py
+++ b/research/attention_ocr/python/model_export.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Converts existing checkpoint into a SavedModel.
+Usage example:
+python model_export.py \
+  --logtostderr --checkpoint=model.ckpt-399731 \
+  --export_dir=/tmp/attention_ocr_export
+"""
+import os
+import tensorflow as tf
+from tensorflow import app
+from tensorflow.contrib import slim
+from tensorflow.python.platform import flags
+import common_flags
+import model_export_lib
+FLAGS = flags.FLAGS
+common_flags.define()
+flags.DEFINE_string('export_dir', None, 'Directory to export model files to.')
+flags.DEFINE_integer(
+    'image_width', None,
+    'Image width used during training (or crop width if used)'
+    ' If not set, the dataset default is used instead.')
+flags.DEFINE_integer(
+    'image_height', None,
+    'Image height used during training(or crop height if used)'
+    ' If not set, the dataset default is used instead.')
+flags.DEFINE_string('work_dir', '/tmp', 'A directory to store temporary files.')
+flags.DEFINE_integer('version_number', 1, 'Version number of the model')
+flags.DEFINE_bool(
+    'export_for_serving', True,
+    'Whether the exported model accepts serialized tf.Example '
+    'protos as input')
+def get_checkpoint_path():
+  """Returns a path to a checkpoint based on specified commandline flags.
+  In order to specify a full path to a checkpoint use --checkpoint flag.
+  Alternatively, if --train_log_dir was specified it will return a path to the
+  most recent checkpoint.
+  Raises:
+    ValueError: in case it can't find a checkpoint.
+  Returns:
+    A string.
+  """
+  if FLAGS.checkpoint:
+    return FLAGS.checkpoint
+  else:
+    model_save_path = tf.train.latest_checkpoint(FLAGS.train_log_dir)
+    if not model_save_path:
+      raise ValueError('Can\'t find a checkpoint in: %s' % FLAGS.train_log_dir)
+    return model_save_path
+def export_model(export_dir,
+                 export_for_serving,
+                 batch_size=None,
+                 crop_image_width=None,
+                 crop_image_height=None):
+  """Exports a model to the named directory.
+  Note that --datatset_name and --checkpoint are required and parsed by the
+  underlying module common_flags.
+  Args:
+    export_dir: The output dir where model is exported to.
+    export_for_serving: If True, expects a serialized image as input and attach
+      image normalization as part of exported graph.
+    batch_size: For non-serving export, the input batch_size needs to be
+      specified.
+    crop_image_width: Width of the input image. Uses the dataset default if
+      None.
+    crop_image_height: Height of the input image. Uses the dataset default if
+      None.
+  Returns:
+    Returns the model signature_def.
+  """
+  # Dataset object used only to get all parameters for the model.
+  dataset = common_flags.create_dataset(split_name='test')
+  model = common_flags.create_model(
+      dataset.num_char_classes,
+      dataset.max_sequence_length,
+      dataset.num_of_views,
+      dataset.null_code,
+      charset=dataset.charset)
+  dataset_image_height, dataset_image_width, image_depth = dataset.image_shape
+  # Add check for charmap file
+  if not os.path.exists(dataset.charset_file):
+    raise ValueError('No charset defined at {}: export will fail'.format(
+        dataset.charset))
+  # Default to dataset dimensions, otherwise use provided dimensions.
+  image_width = crop_image_width or dataset_image_width
+  image_height = crop_image_height or dataset_image_height
+  if export_for_serving:
+    images_orig = tf.placeholder(
+        tf.string, shape=[batch_size], name='tf_example')
+    images_orig_float = model_export_lib.generate_tfexample_image(
+        images_orig,
+        image_height,
+        image_width,
+        image_depth,
+        name='float_images')
+  else:
+    images_shape = (batch_size, image_height, image_width, image_depth)
+    images_orig = tf.placeholder(
+        tf.uint8, shape=images_shape, name='original_image')
+    images_orig_float = tf.image.convert_image_dtype(
+        images_orig, dtype=tf.float32, name='float_images')
+  endpoints = model.create_base(images_orig_float, labels_one_hot=None)
+  sess = tf.Session()
+  saver = tf.train.Saver(slim.get_variables_to_restore(), sharded=True)
+  saver.restore(sess, get_checkpoint_path())
+  tf.logging.info('Model restored successfully.')
+  # Create model signature.
+  if export_for_serving:
+    input_tensors = {
+        tf.saved_model.signature_constants.CLASSIFY_INPUTS: images_orig
+    }
+  else:
+    input_tensors = {'images': images_orig}
+  signature_inputs = model_export_lib.build_tensor_info(input_tensors)
+  # NOTE: Tensors 'image_float' and 'chars_logit' are used by the inference
+  # or to compute saliency maps.
+  output_tensors = {
+      'images_float': images_orig_float,
+      'predictions': endpoints.predicted_chars,
+      'scores': endpoints.predicted_scores,
+      'chars_logit': endpoints.chars_logit,
+      'predicted_length': endpoints.predicted_length,
+      'predicted_text': endpoints.predicted_text,
+      'predicted_conf': endpoints.predicted_conf,
+      'normalized_seq_conf': endpoints.normalized_seq_conf
+  }
+  for i, t in enumerate(
+      model_export_lib.attention_ocr_attention_masks(
+          dataset.max_sequence_length)):
+    output_tensors['attention_mask_%d' % i] = t
+  signature_outputs = model_export_lib.build_tensor_info(output_tensors)
+  signature_def = tf.saved_model.signature_def_utils.build_signature_def(
+      signature_inputs, signature_outputs,
+      tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
+  # Save model.
+  builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+  builder.add_meta_graph_and_variables(
+      sess, [tf.saved_model.tag_constants.SERVING],
+      signature_def_map={
+          tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              signature_def
+      },
+      main_op=tf.tables_initializer(),
+      strip_default_attrs=True)
+  builder.save()
+  tf.logging.info('Model has been exported to %s' % export_dir)
+  return signature_def
+def main(unused_argv):
+  if os.path.exists(FLAGS.export_dir):
+    raise ValueError('export_dir already exists: exporting will fail')
+  export_model(FLAGS.export_dir, FLAGS.export_for_serving, FLAGS.batch_size,
+               FLAGS.image_width, FLAGS.image_height)
+if __name__ == '__main__':
+  flags.mark_flag_as_required('dataset_name')
+  flags.mark_flag_as_required('export_dir')
+  app.run(main)
--- a/research/attention_ocr/python/model_export_lib.py
+++ b/research/attention_ocr/python/model_export_lib.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for exporting Attention OCR model."""
+import tensorflow as tf
+# Function borrowed from research/object_detection/core/preprocessor.py
+def normalize_image(image, original_minval, original_maxval, target_minval,
+                    target_maxval):
+  """Normalizes pixel values in the image.
+  Moves the pixel values from the current [original_minval, original_maxval]
+  range to a the [target_minval, target_maxval] range.
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width,
+      channels].
+    original_minval: current image minimum value.
+    original_maxval: current image maximum value.
+    target_minval: target image minimum value.
+    target_maxval: target image maximum value.
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('NormalizeImage', values=[image]):
+    original_minval = float(original_minval)
+    original_maxval = float(original_maxval)
+    target_minval = float(target_minval)
+    target_maxval = float(target_maxval)
+    image = tf.cast(image, dtype=tf.float32)
+    image = tf.subtract(image, original_minval)
+    image = tf.multiply(image, (target_maxval - target_minval) /
+                        (original_maxval - original_minval))
+    image = tf.add(image, target_minval)
+    return image
+def generate_tfexample_image(input_example_strings,
+                             image_height,
+                             image_width,
+                             image_channels,
+                             name=None):
+  """Parses a 1D tensor of serialized tf.Example protos and returns image batch.
+  Args:
+    input_example_strings: A 1-Dimensional tensor of size [batch_size] and type
+      tf.string containing a serialized Example proto per image.
+    image_height: First image dimension.
+    image_width: Second image dimension.
+    image_channels: Third image dimension.
+    name: optional tensor name.
+  Returns:
+    A tensor with shape [batch_size, height, width, channels] of type float32
+    with values in the range [0..1]
+  """
+  batch_size = tf.shape(input_example_strings)[0]
+  images_shape = tf.stack(
+      [batch_size, image_height, image_width, image_channels])
+  tf_example_image_key = 'image/encoded'
+  feature_configs = {
+      tf_example_image_key:
+          tf.FixedLenFeature(
+              image_height * image_width * image_channels, dtype=tf.float32)
+  }
+  feature_tensors = tf.parse_example(input_example_strings, feature_configs)
+  float_images = tf.reshape(
+      normalize_image(
+          feature_tensors[tf_example_image_key],
+          original_minval=0.0,
+          original_maxval=255.0,
+          target_minval=0.0,
+          target_maxval=1.0),
+      images_shape,
+      name=name)
+  return float_images
+def attention_ocr_attention_masks(num_characters):
+  # TODO(gorban): use tensors directly after replacing LSTM unroll methods.
+  prefix = ('AttentionOcr_v1/'
+            'sequence_logit_fn/SQLR/LSTM/attention_decoder/Attention_0')
+  names = ['%s/Softmax:0' % (prefix)]
+  for i in range(1, num_characters):
+    names += ['%s_%d/Softmax:0' % (prefix, i)]
+  return [tf.get_default_graph().get_tensor_by_name(n) for n in names]
+def build_tensor_info(tensor_dict):
+  return {
+      k: tf.saved_model.utils.build_tensor_info(t)
+      for k, t in tensor_dict.items()
+  }
--- a/research/attention_ocr/python/model_export_test.py
+++ b/research/attention_ocr/python/model_export_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for model_export."""
+import os
+import numpy as np
+from absl.testing import flagsaver
+import tensorflow as tf
+import common_flags
+import model_export
+_CHECKPOINT = 'model.ckpt-399731'
+_CHECKPOINT_URL = (
+    'http://download.tensorflow.org/models/attention_ocr_2017_08_09.tar.gz')
+def _clean_up():
+  tf.gfile.DeleteRecursively(tf.test.get_temp_dir())
+def _create_tf_example_string(image):
+  """Create a serialized tf.Example proto for feeding the model."""
+  example = tf.train.Example()
+  example.features.feature['image/encoded'].float_list.value.extend(
+      list(np.reshape(image, (-1))))
+  return example.SerializeToString()
+class AttentionOcrExportTest(tf.test.TestCase):
+  """Tests for model_export.export_model."""
+  def setUp(self):
+    for suffix in ['.meta', '.index', '.data-00000-of-00001']:
+      filename = _CHECKPOINT + suffix
+      self.assertTrue(
+          tf.gfile.Exists(filename),
+          msg='Missing checkpoint file %s. '
+          'Please download and extract it from %s' %
+          (filename, _CHECKPOINT_URL))
+    tf.flags.FLAGS.dataset_name = 'fsns'
+    tf.flags.FLAGS.checkpoint = _CHECKPOINT
+    tf.flags.FLAGS.dataset_dir = os.path.join(
+        os.path.dirname(__file__), 'datasets/testdata/fsns')
+    tf.test.TestCase.setUp(self)
+    _clean_up()
+    self.export_dir = os.path.join(tf.test.get_temp_dir(), 'exported_model')
+    self.minimal_output_signature = {
+        'predictions': 'AttentionOcr_v1/predicted_chars:0',
+        'scores': 'AttentionOcr_v1/predicted_scores:0',
+        'predicted_length': 'AttentionOcr_v1/predicted_length:0',
+        'predicted_text': 'AttentionOcr_v1/predicted_text:0',
+        'predicted_conf': 'AttentionOcr_v1/predicted_conf:0',
+        'normalized_seq_conf': 'AttentionOcr_v1/normalized_seq_conf:0'
+    }
+  def create_input_feed(self, graph_def, serving):
+    """Returns the input feed for the model.
+    Creates random images, according to the size specified by dataset_name,
+    format it in the correct way depending on whether the model was exported
+    for serving, and return the correctly keyed feed_dict for inference.
+    Args:
+      graph_def: Graph definition of the loaded model.
+      serving: Whether the model was exported for Serving.
+    Returns:
+      The feed_dict suitable for model inference.
+    """
+    # Creates a dataset based on FLAGS.dataset_name.
+    self.dataset = common_flags.create_dataset('test')
+    # Create some random images to test inference for any dataset.
+    self.images = {
+        'img1':
+            np.random.uniform(low=64, high=192,
+                              size=self.dataset.image_shape).astype('uint8'),
+        'img2':
+            np.random.uniform(low=32, high=224,
+                              size=self.dataset.image_shape).astype('uint8'),
+    }
+    signature_def = graph_def.signature_def[
+        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    if serving:
+      input_name = signature_def.inputs[
+          tf.saved_model.signature_constants.CLASSIFY_INPUTS].name
+      # Model for serving takes input: inputs['inputs'] = 'tf_example:0'
+      feed_dict = {
+          input_name: [
+              _create_tf_example_string(self.images['img1']),
+              _create_tf_example_string(self.images['img2'])
+          ]
+      }
+    else:
+      input_name = signature_def.inputs['images'].name
+      # Model for direct use takes input: inputs['images'] = 'original_image:0'
+      feed_dict = {
+          input_name: np.stack([self.images['img1'], self.images['img2']])
+      }
+    return feed_dict
+  def verify_export_load_and_inference(self, export_for_serving=False):
+    """Verify exported model can be loaded and inference can run successfully.
+    This function will load the exported model in self.export_dir, then create
+    some fake images according to the specification of FLAGS.dataset_name.
+    It then feeds the input through the model, and verify the minimal set of
+    output signatures are present.
+    Note: Model and dataset creation in the underlying library depends on the
+          following commandline flags:
+            FLAGS.dataset_name
+    Args:
+      export_for_serving: True if the model was exported for Serving. This
+        affects how input is fed into the model.
+    """
+    tf.reset_default_graph()
+    sess = tf.Session()
+    graph_def = tf.saved_model.loader.load(
+        sess=sess,
+        tags=[tf.saved_model.tag_constants.SERVING],
+        export_dir=self.export_dir)
+    feed_dict = self.create_input_feed(graph_def, export_for_serving)
+    results = sess.run(self.minimal_output_signature, feed_dict=feed_dict)
+    out_shape = (2,)
+    self.assertEqual(np.shape(results['predicted_conf']), out_shape)
+    self.assertEqual(np.shape(results['predicted_text']), out_shape)
+    self.assertEqual(np.shape(results['predicted_length']), out_shape)
+    self.assertEqual(np.shape(results['normalized_seq_conf']), out_shape)
+    out_shape = (2, self.dataset.max_sequence_length)
+    self.assertEqual(np.shape(results['scores']), out_shape)
+    self.assertEqual(np.shape(results['predictions']), out_shape)
+  @flagsaver.flagsaver
+  def test_fsns_export_for_serving_and_load_inference(self):
+    model_export.export_model(self.export_dir, True)
+    self.verify_export_load_and_inference(True)
+  @flagsaver.flagsaver
+  def test_fsns_export_and_load_inference(self):
+    model_export.export_model(self.export_dir, False, batch_size=2)
+    self.verify_export_load_and_inference(False)
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/attention_ocr/python/model_test.py
+++ b/research/attention_ocr/python/model_test.py
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for the model."""
+import string
 import numpy as np
-import string
 import tensorflow as tf
 from tensorflow.contrib import slim
@@ -32,6 +31,7 @@ def create_fake_charset(num_char_classes):
 class ModelTest(tf.test.TestCase):
  def setUp(self):
    tf.test.TestCase.setUp(self)
@@ -51,18 +51,21 @@ class ModelTest(tf.test.TestCase):
    self.chars_logit_shape = (self.batch_size, self.seq_length,
                              self.num_char_classes)
    self.length_logit_shape = (self.batch_size, self.seq_length + 1)
+    # Placeholder knows image dimensions, but not batch size.
+    self.input_images = tf.placeholder(
+        tf.float32,
+        shape=(None, self.image_height, self.image_width, 3),
+        name='input_node')
    self.initialize_fakes()
  def initialize_fakes(self):
    self.images_shape = (self.batch_size, self.image_height, self.image_width,
                         3)
-    self.fake_images = tf.constant(
+    self.fake_images = self.rng.randint(
-        self.rng.randint(low=0, high=255,
+        low=0, high=255, size=self.images_shape).astype('float32')
-                         size=self.images_shape).astype('float32'),
+    self.fake_conv_tower_np = self.rng.randn(*self.conv_tower_shape).astype(
-        name='input_node')
+        'float32')
-    self.fake_conv_tower_np = self.rng.randn(
-        *self.conv_tower_shape).astype('float32')
    self.fake_conv_tower = tf.constant(self.fake_conv_tower_np)
    self.fake_logits = tf.constant(
        self.rng.randn(*self.chars_logit_shape).astype('float32'))
@@ -74,33 +77,44 @@ class ModelTest(tf.test.TestCase):
  def create_model(self, charset=None):
    return model.Model(
-        self.num_char_classes, self.seq_length, num_views=4, null_code=62,
+        self.num_char_classes,
+        self.seq_length,
+        num_views=4,
+        null_code=62,
        charset=charset)
  def test_char_related_shapes(self):
-    ocr_model = self.create_model()
+    charset = create_fake_charset(self.num_char_classes)
+    ocr_model = self.create_model(charset=charset)
    with self.test_session() as sess:
      endpoints_tf = ocr_model.create_base(
-          images=self.fake_images, labels_one_hot=None)
+          images=self.input_images, labels_one_hot=None)
      sess.run(tf.global_variables_initializer())
-      endpoints = sess.run(endpoints_tf)
+      tf.tables_initializer().run()
+      endpoints = sess.run(
-      self.assertEqual((self.batch_size, self.seq_length,
+          endpoints_tf, feed_dict={self.input_images: self.fake_images})
-                        self.num_char_classes), endpoints.chars_logit.shape)
-      self.assertEqual((self.batch_size, self.seq_length,
+      self.assertEqual(
-                        self.num_char_classes), endpoints.chars_log_prob.shape)
+          (self.batch_size, self.seq_length, self.num_char_classes),
+          endpoints.chars_logit.shape)
+      self.assertEqual(
+          (self.batch_size, self.seq_length, self.num_char_classes),
+          endpoints.chars_log_prob.shape)
      self.assertEqual((self.batch_size, self.seq_length),
                       endpoints.predicted_chars.shape)
      self.assertEqual((self.batch_size, self.seq_length),
                       endpoints.predicted_scores.shape)
+      self.assertEqual((self.batch_size,), endpoints.predicted_text.shape)
+      self.assertEqual((self.batch_size,), endpoints.predicted_conf.shape)
+      self.assertEqual((self.batch_size,), endpoints.normalized_seq_conf.shape)
  def test_predicted_scores_are_within_range(self):
    ocr_model = self.create_model()
    _, _, scores = ocr_model.char_predictions(self.fake_logits)
    with self.test_session() as sess:
-      scores_np = sess.run(scores)
+      scores_np = sess.run(
+          scores, feed_dict={self.input_images: self.fake_images})
    values_in_range = (scores_np >= 0.0) & (scores_np <= 1.0)
    self.assertTrue(
@@ -111,10 +125,11 @@ class ModelTest(tf.test.TestCase):
  def test_conv_tower_shape(self):
    with self.test_session() as sess:
      ocr_model = self.create_model()
-      conv_tower = ocr_model.conv_tower_fn(self.fake_images)
+      conv_tower = ocr_model.conv_tower_fn(self.input_images)
      sess.run(tf.global_variables_initializer())
-      conv_tower_np = sess.run(conv_tower)
+      conv_tower_np = sess.run(
+          conv_tower, feed_dict={self.input_images: self.fake_images})
      self.assertEqual(self.conv_tower_shape, conv_tower_np.shape)
@@ -124,11 +139,12 @@ class ModelTest(tf.test.TestCase):
    # updates, gradients and variances. It also depends on the type of used
    # optimizer.
    ocr_model = self.create_model()
-    ocr_model.create_base(images=self.fake_images, labels_one_hot=None)
+    ocr_model.create_base(images=self.input_images, labels_one_hot=None)
    with self.test_session() as sess:
      tfprof_root = tf.profiler.profile(
          sess.graph,
-          options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
+          options=tf.profiler.ProfileOptionBuilder
+          .trainable_variables_parameter())
      model_size_bytes = 4 * tfprof_root.total_parameters
      self.assertLess(model_size_bytes, 1 * 2**30)
@@ -158,7 +174,7 @@ class ModelTest(tf.test.TestCase):
    loss = model.sequence_loss_fn(self.fake_logits, self.fake_labels)
    with self.test_session() as sess:
-      loss_np = sess.run(loss)
+      loss_np = sess.run(loss, feed_dict={self.input_images: self.fake_images})
    # This test checks that the loss function is 'runnable'.
    self.assertEqual(loss_np.shape, tuple())
@@ -172,19 +188,20 @@ class ModelTest(tf.test.TestCase):
    Returns:
      a list of tensors with encoded image coordinates in them.
    """
-    batch_size, h, w, _ = net.shape.as_list()
+    batch_size = tf.shape(net)[0]
+    _, h, w, _ = net.shape.as_list()
    h_loc = [
-      tf.tile(
+        tf.tile(
-          tf.reshape(
+            tf.reshape(
-              tf.contrib.layers.one_hot_encoding(
+                tf.contrib.layers.one_hot_encoding(
-                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
+                    tf.constant([i]), num_classes=h), [h, 1]), [1, w])
-      for i in range(h)
+        for i in range(h)
    ]
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
-      tf.tile(
+        tf.tile(
-          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
+            tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
-          [h, 1]) for i in range(w)
+            [h, 1]) for i in range(w)
    ]
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
    loc = tf.concat([h_loc, w_loc], 2)
@@ -197,11 +214,12 @@ class ModelTest(tf.test.TestCase):
    conv_w_coords_tf = model.encode_coordinates_fn(self.fake_conv_tower)
    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
+      conv_w_coords = sess.run(
+          conv_w_coords_tf, feed_dict={self.input_images: self.fake_images})
    batch_size, height, width, feature_size = self.conv_tower_shape
-    self.assertEqual(conv_w_coords.shape, (batch_size, height, width,
+    self.assertEqual(conv_w_coords.shape,
-                                           feature_size + height + width))
+                     (batch_size, height, width, feature_size + height + width))
  def test_disabled_coordinate_encoding_returns_features_unchanged(self):
    model = self.create_model()
@@ -209,7 +227,8 @@ class ModelTest(tf.test.TestCase):
    conv_w_coords_tf = model.encode_coordinates_fn(self.fake_conv_tower)
    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
+      conv_w_coords = sess.run(
+          conv_w_coords_tf, feed_dict={self.input_images: self.fake_images})
    self.assertAllEqual(conv_w_coords, self.fake_conv_tower_np)
@@ -221,7 +240,8 @@ class ModelTest(tf.test.TestCase):
    conv_w_coords_tf = model.encode_coordinates_fn(fake_conv_tower)
    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
+      conv_w_coords = sess.run(
+          conv_w_coords_tf, feed_dict={self.input_images: self.fake_images})
    # Original features
    self.assertAllEqual(conv_w_coords[0, :, :, :4],
@@ -261,10 +281,11 @@ class ModelTest(tf.test.TestCase):
 class CharsetMapperTest(tf.test.TestCase):
  def test_text_corresponds_to_ids(self):
    charset = create_fake_charset(36)
-    ids = tf.constant(
+    ids = tf.constant([[17, 14, 21, 21, 24], [32, 24, 27, 21, 13]],
-        [[17, 14, 21, 21, 24], [32, 24, 27, 21, 13]], dtype=tf.int64)
+                      dtype=tf.int64)
    charset_mapper = model.CharsetMapper(charset)
    with self.test_session() as sess:

--- a/research/attention_ocr/python/sequence_layers.py
+++ b/research/attention_ocr/python/sequence_layers.py
@@ -111,7 +111,7 @@ class SequenceLayerBase(object):
    self._mparams = method_params
    self._net = net
    self._labels_one_hot = labels_one_hot
-    self._batch_size = net.get_shape().dims[0].value
+    self._batch_size = tf.shape(net)[0]
    # Initialize parameters for char logits which will be computed on the fly
    # inside an LSTM decoder.
@@ -275,7 +275,7 @@ class NetSlice(SequenceLayerBase):
  def __init__(self, *args, **kwargs):
    super(NetSlice, self).__init__(*args, **kwargs)
    self._zero_label = tf.zeros(
-        [self._batch_size, self._params.num_char_classes])
+        tf.stack([self._batch_size, self._params.num_char_classes]))
  def get_image_feature(self, char_index):
    """Returns a subset of image features for a character.
@@ -352,7 +352,7 @@ class Attention(SequenceLayerBase):
  def __init__(self, *args, **kwargs):
    super(Attention, self).__init__(*args, **kwargs)
    self._zero_label = tf.zeros(
-        [self._batch_size, self._params.num_char_classes])
+        tf.stack([self._batch_size, self._params.num_char_classes]))
  def get_eval_input(self, prev, i):
    """See SequenceLayerBase.get_eval_input for details."""

--- a/research/attention_ocr/python/utils.py
+++ b/research/attention_ocr/python/utils.py
@@ -78,3 +78,20 @@ def variables_to_restore(scope=None, strip_scope=False):
    return variable_map
  else:
    return {v.op.name: v for v in slim.get_variables_to_restore()}
+def ConvertAllInputsToTensors(func):
+  """A decorator to convert all function's inputs into tensors.
+  Args:
+    func: a function to decorate.
+  Returns:
+    A decorated function.
+  """
+  def FuncWrapper(*args):
+    tensors = [tf.convert_to_tensor(a) for a in args]
+    return func(*tensors)
+  return FuncWrapper
--- a/research/delf/INSTALL_INSTRUCTIONS.md
+++ b/research/delf/INSTALL_INSTRUCTIONS.md
 ## DELF installation
+### Installation script
+We now have a script to do the entire installation in one shot. Navigate to the
+directory `models/research/delf/delf/python/training`, then run:
+```bash
+# From models/research/delf/delf/python/training
+bash install_delf.sh
+```
+If this works, you are done! If not, see below for detailed instructions for
+installing this codebase and its dependencies.
+*Please note that this installation script only works on 64 bits Linux
+architectures due to the `protoc` binary that is automatically downloaded. If
+you wish to install the DELF library on other architectures please update the
+[`install_delf.sh`](delf/python/training/install_delf.sh) script by referencing
+the desired `protoc`
+[binary release](https://github.com/protocolbuffers/protobuf/releases).*
+In more detail: the `install_delf.sh` script installs both the DELF library and
+its dependencies in the following sequence:
+*   Install TensorFlow 2.2 and TensorFlow 2.2 for GPU.
+*   Install the [TF-Slim](https://github.com/google-research/tf-slim) library
+    from source.
+*   Download [protoc](https://github.com/protocolbuffers/protobuf) and compile
+    the DELF Protocol Buffers.
+*   Install the matplotlib, numpy, scikit-image, scipy and python3-tk Python
+    libraries.
+*   Install the
+    [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection)
+    from the cloned TensorFlow Model Garden repository.
+*   Install the DELF package.
 ### Tensorflow
-[![TensorFlow 2.1](https://img.shields.io/badge/tensorflow-2.1-brightgreen)](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0)
+[![TensorFlow 2.2](https://img.shields.io/badge/tensorflow-2.2-brightgreen)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
 [![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
 For detailed steps to install Tensorflow, follow the
@@ -11,9 +46,9 @@ typical user can install Tensorflow using one of the following commands:
 ```bash
 # For CPU:
-pip3 install 'tensorflow'
+pip3 install 'tensorflow>=2.2.0'
 # For GPU:
-pip3 install 'tensorflow-gpu'
+pip3 install 'tensorflow-gpu>=2.2.0'
 ```
 ### TF-Slim

--- a/research/delf/README.md
+++ b/research/delf/README.md
 # Deep Local and Global Image Features
-[![TensorFlow 2.1](https://img.shields.io/badge/tensorflow-2.1-brightgreen)](https://github.com/tensorflow/tensorflow/releases/tag/v2.1.0)
+[![TensorFlow 2.2](https://img.shields.io/badge/tensorflow-2.2-brightgreen)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
 [![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
 This project presents code for deep local and global image feature methods,
@@ -41,7 +41,7 @@ DELG:
 ```
 "Unifying Deep Local and Global Features for Image Search",
 B. Cao*, A. Araujo* and J. Sim,
-arxiv:2001.05027
+Proc. ECCV'20
 ```
 GLDv2:
@@ -55,11 +55,11 @@ Proc. CVPR'20
 ## News
+-   [Jul'20] Check out our ECCV'20 paper:
+    ["Unifying Deep Local and Global Features for Image Search"](https://arxiv.org/abs/2001.05027)
 -   [Apr'20] Check out our CVPR'20 paper: ["Google Landmarks Dataset v2 - A
    Large-Scale Benchmark for Instance-Level Recognition and
    Retrieval"](https://arxiv.org/abs/2004.01804)
-   [Jan'20] Check out our new paper:
-    ["Unifying Deep Local and Global Features for Image Search"](https://arxiv.org/abs/2001.05027)
 -   [Jun'19] DELF achieved 2nd place in
    [CVPR Visual Localization challenge (Local Features track)](https://sites.google.com/corp/view/ltvl2019).
    See our slides
@@ -182,104 +182,55 @@ directories therein, `protos` and `python`.
 ### `delf/protos`
-This directory contains protobufs:
+This directory contains protobufs for local feature aggregation
+(`aggregation_config.proto`), serializing detected boxes (`box.proto`),
-   `aggregation_config.proto`: protobuf for configuring local feature
+serializing float tensors (`datum.proto`), configuring DELF/DELG extraction
-    aggregation.
+(`delf_config.proto`), serializing local features (`feature.proto`).
-   `box.proto`: protobuf for serializing detected boxes.
-   `datum.proto`: general-purpose protobuf for serializing float tensors.
-   `delf_config.proto`: protobuf for configuring DELF/DELG extraction.
-   `feature.proto`: protobuf for serializing DELF features.
 ### `delf/python`
-This directory contains files for several different purposes:
+This directory contains files for several different purposes, such as:
+reading/writing tensors/features (`box_io.py`, `datum_io.py`, `feature_io.py`),
-   `box_io.py`, `datum_io.py`, `feature_io.py` are helper files for reading and
+local feature aggregation extraction and similarity computation
-    writing tensors and features.
+(`feature_aggregation_extractor.py`, `feature_aggregation_similarity.py`) and
-   `delf_v1.py` contains code to create DELF models.
+helper functions for image/feature loading/processing (`utils.py`,
-   `feature_aggregation_extractor.py` contains a module to perform local
+`feature_extractor.py`).
-    feature aggregation.
-   `feature_aggregation_similarity.py` contains a module to perform similarity
-    computation for aggregated local features.
-   `feature_extractor.py` contains the code to extract features using DELF.
-    This is particularly useful for extracting features over multiple scales,
-    with keypoint selection based on attention scores, and PCA/whitening
-    post-processing.
-The subdirectory `delf/python/examples` contains sample scripts to run DELF
-feature extraction/matching, and object detection:
-   `delf_config_example.pbtxt` shows an example instantiation of the DelfConfig
-    proto, used for DELF feature extraction.
-   `detector.py` is a module to construct an object detector function.
-   `extract_boxes.py` enables object detection from a list of images.
-   `extract_features.py` enables DELF extraction from a list of images.
-   `extractor.py` is a module to construct a DELF/DELG local feature extraction
-    function.
-   `match_images.py` supports image matching using DELF features extracted
-    using `extract_features.py`.
-The subdirectory `delf/python/delg` contains sample scripts/configs related to
+The subdirectory `delf/python/examples` contains sample scripts to run DELF/DELG
-the DELG paper:
+feature extraction/matching (`extractor.py`, `extract_features.py`,
+`match_images.py`) and object detection (`detector.py`, `extract_boxes.py`).
+`delf_config_example.pbtxt` shows an example instantiation of the DelfConfig
+proto, used for DELF feature extraction.
-   `delg_gld_config.pbtxt` gives the DelfConfig used in DELG paper.
+The subdirectory `delf/python/delg` contains sample scripts/configs related to
-   `extract_features.py` for local+global feature extraction on Revisited
+the DELG paper: `extract_features.py` for local+global feature extraction (with
-    datasets.
+and example `delg_gld_config.pbtxt`) and `perform_retrieval.py` for performing
-   `perform_retrieval.py` for performing retrieval/evaluating methods on
+retrieval/scoring.
-    Revisited datasets.
 The subdirectory `delf/python/detect_to_retrieve` contains sample
-scripts/configs related to the Detect-to-Retrieve paper:
+scripts/configs related to the Detect-to-Retrieve paper, for feature/box
+extraction/aggregation/clustering (`aggregation_extraction.py`,
-   `aggregation_extraction.py` is a library to extract/save feature
+`boxes_and_features_extraction.py`, `cluster_delf_features.py`,
-    aggregation.
+`extract_aggregation.py`, `extract_index_boxes_and_features.py`,
-   `boxes_and_features_extraction.py` is a library to extract/save boxes and
+`extract_query_features.py`), image retrieval/reranking (`perform_retrieval.py`,
-    DELF features.
+`image_reranking.py`), along with configs used for feature
-   `cluster_delf_features.py` for local feature clustering.
+extraction/aggregation (`delf_gld_config.pbtxt`,
-   `dataset.py` for parsing/evaluating results on Revisited Oxford/Paris
+`index_aggregation_config.pbtxt`, `query_aggregation_config.pbtxt`) and
-    datasets.
+Revisited Oxford/Paris dataset parsing/evaluation (`dataset.py`).
-   `delf_gld_config.pbtxt` gives the DelfConfig used in Detect-to-Retrieve
-    paper.
-   `extract_aggregation.py` for aggregated local feature extraction.
-   `extract_index_boxes_and_features.py` for index image local feature
-    extraction / bounding box detection on Revisited datasets.
-   `extract_query_features.py` for query image local feature extraction on
-    Revisited datasets.
-   `image_reranking.py` is a module to re-rank images with geometric
-    verification.
-   `perform_retrieval.py` for performing retrieval/evaluating methods using
-    aggregated local features on Revisited datasets.
-   `index_aggregation_config.pbtxt`, `query_aggregation_config.pbtxt` give
-    AggregationConfig's for Detect-to-Retrieve experiments.
 The subdirectory `delf/python/google_landmarks_dataset` contains sample
-scripts/modules for computing GLD metrics / reproducing results from the GLDv2
+scripts/modules for computing GLD metrics (`metrics.py`,
-paper:
+`compute_recognition_metrics.py`, `compute_retrieval_metrics.py`), GLD file IO
+(`dataset_file_io.py`) / reproducing results from the GLDv2 paper
-   `compute_recognition_metrics.py` performs recognition metric computation
+(`rn101_af_gldv2clean_config.pbtxt` and the instructions therein).
-    given input predictions and solution files.
-   `compute_retrieval_metrics.py` performs retrieval metric computation given
-    input predictions and solution files.
-   `dataset_file_io.py` is a module for dataset-related file IO.
-   `metrics.py` is a module for GLD metric computation.
-   `rn101_af_gldv2clean_config.pbtxt` gives the DelfConfig used in the
-    ResNet101-ArcFace (trained on GLDv2-train-clean) baseline used in the GLDv2
-    paper.
 The subdirectory `delf/python/training` contains sample scripts/modules for
-performing DELF training:
+performing model training (`train.py`) based on a ResNet50 DELF model
+(`model/resnet50.py`, `model/delf_model.py`), also presenting relevant model
-   `datasets/googlelandmarks.py` is the dataset module used for training.
+exporting scripts and associated utils (`model/export_model.py`,
-   `model/delf_model.py` is the model module used for training.
+`model/export_global_model.py`, `model/export_model_utils.py`) and dataset
-   `model/export_model.py` is a script for exporting trained models in the
+downloading/preprocessing (`download_dataset.sh`, `build_image_dataset.py`,
-    format used by the inference code.
+`datasets/googlelandmarks.py`).
-   `model/export_model_utils.py` is a module with utilities for model
-    exporting.
-   `model/resnet50.py` is a module with a backbone RN50 implementation.
-   `build_image_dataset.py` converts downloaded dataset into TFRecords format
-    for training.
-   `train.py` is the main training script.
 Besides these, other files in the different subdirectories contain tests for the
 various modules.
@@ -290,6 +241,16 @@ Andr&eacute; Araujo (@andrefaraujo)
 ## Release history
+### Jul, 2020
+-   Full TF2 support. Only one minor `compat.v1` usage left. Updated
+    instructions to require TF2.2
+-   Refactored / much improved training code, with very detailed, step-by-step
+    instructions
+**Thanks to contributors**: Dan Anghel, Barbara Fusinska and Andr&eacute;
+Araujo.
 ### May, 2020
 -   Codebase is now Python3-first

--- a/research/delf/delf/python/training/README.md
+++ b/research/delf/delf/python/training/README.md
@@ -24,34 +24,9 @@ cd models/research/delf/delf/python/training
 ## Install the DELF Library
-The DELF Python library can be installed by running the
+To be able to use this code, please follow
-[`install_delf.sh`](./install_delf.sh) script using the command:
+[these instructions](../../../INSTALL_INSTRUCTIONS.md) to properly install the
+DELF library.
-```
-bash install_delf.sh
-```
-The script installs both the DELF library and its dependencies in the following
-sequence:
-*   Install TensorFlow 2.2 and TensorFlow 2.2 for GPU.
-*   Install the [TF-Slim](https://github.com/google-research/tf-slim) library
-    from source.
-*   Download [protoc](https://github.com/protocolbuffers/protobuf) and compile
-    the DELF Protocol Buffers.
-*   Install the matplotlib, numpy, scikit-image, scipy and python3-tk Python
-    libraries.
-*   Install the
-    [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection)
-    from the cloned TensorFlow Model Garden repository.
-*   Install the DELF package.
-*Please note that the current installation only works on 64 bits Linux
-architectures due to the `protoc` binary downloaded by the installation script.
-If you wish to install the DELF library on other architectures please update the
-[`install_delf.sh`](./install_delf.sh) script by referencing the desired
-`protoc`
-[binary release](https://github.com/protocolbuffers/protobuf/releases).*
 ## Download the GLDv2 Training Data

--- a/research/delf/setup.py
+++ b/research/delf/setup.py
@@ -22,7 +22,7 @@ install_requires = [
    'pandas >= 0.24.2',
    'numpy >= 1.16.1',
    'scipy >= 1.2.2',
-    'tensorflow >= 2.0.0b1',
+    'tensorflow >= 2.2.0',
    'tf_slim >= 1.1',
    'tensorflow_probability >= 0.9.0',
 ]

--- a/research/object_detection/CONTRIBUTING.md
+++ b/research/object_detection/CONTRIBUTING.md
-# Contributing to the Tensorflow Object Detection API
+# Contributing to the TensorFlow Object Detection API
-Patches to Tensorflow Object Detection API are welcome!
+Patches to TensorFlow Object Detection API are welcome!
 We require contributors to fill out either the individual or corporate
 Contributor License Agreement (CLA).
@@ -9,5 +9,5 @@ Contributor License Agreement (CLA).
  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
 Please follow the
-[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
+[TensorFlow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
 when submitting pull requests.
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
-![TensorFlow Requirement: 1.15](https://img.shields.io/badge/TensorFlow%20Requirement-1.15-brightgreen)
+# TensorFlow Object Detection API
-![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
+[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
+[![TensorFlow 1.15](https://img.shields.io/badge/TensorFlow-1.15-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v1.15.0)
-# Tensorflow Object Detection API
+[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/)
 Creating accurate machine learning models capable of localizing and identifying
 multiple objects in a single image remains a core challenge in computer vision.
@@ -11,7 +11,7 @@ models. At Google we’ve certainly found this codebase to be useful for our
 computer vision needs, and we hope that you will as well. <p align="center">
 <img src="g3doc/img/kites_detections_output.jpg" width=676 height=450> </p>
 Contributions to the codebase are welcome and we would love to hear back from
-you if you find this API useful. Finally if you use the Tensorflow Object
+you if you find this API useful. Finally if you use the TensorFlow Object
 Detection API for a research publication, please consider citing:
 ```
@@ -26,91 +26,93 @@ Song Y, Guadarrama S, Murphy K, CVPR 2017
  <img src="g3doc/img/tf-od-api-logo.png" width=140 height=195>
 </p>
-## Maintainers
+## Support for TensorFlow 2 and 1
+The TensorFlow Object Detection API supports both TensorFlow 2 (TF2) and
+TensorFlow 1 (TF1). A majority of the modules in the library are both TF1 and
+TF2 compatible. In cases where they are not, we provide two versions.
-Name           | GitHub
+Although we will continue to maintain the TF1 models and provide support, we
-------------- | ---------------------------------------------
+encourage users to try the Object Detection API with TF2 for the following
-Jonathan Huang | [jch1](https://github.com/jch1)
+reasons:
-Vivek Rathod   | [tombstone](https://github.com/tombstone)
-Ronny Votel    | [ronnyvotel](https://github.com/ronnyvotel)
-Derek Chow     | [derekjchow](https://github.com/derekjchow)
-Chen Sun       | [jesu9](https://github.com/jesu9)
-Menglong Zhu   | [dreamdragon](https://github.com/dreamdragon)
-Alireza Fathi  | [afathi3](https://github.com/afathi3)
-Zhichao Lu     | [pkulzc](https://github.com/pkulzc)
-## Table of contents
-Setup:
-*   <a href='g3doc/installation.md'>Installation</a><br>
-Quick Start:
-*   <a href='object_detection_tutorial.ipynb'>
-      Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
-*   <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
-Customizing a Pipeline:
-*   <a href='g3doc/configuring_jobs.md'>
-      Configuring an object detection pipeline</a><br>
-*   <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
-Running:
-*   <a href='g3doc/running_locally.md'>Running locally</a><br>
-*   <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
-Extras:
-*   <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
-*   <a href='g3doc/exporting_models.md'>
-      Exporting a trained model for inference</a><br>
-*   <a href='g3doc/tpu_exporters.md'>
-      Exporting a trained model for TPU inference</a><br>
-*   <a href='g3doc/defining_your_own_model.md'>
-      Defining your own model architecture</a><br>
-*   <a href='g3doc/using_your_own_dataset.md'>
-      Bringing in your own dataset</a><br>
-*   <a href='g3doc/evaluation_protocols.md'>
-      Supported object detection evaluation protocols</a><br>
-*   <a href='g3doc/oid_inference_and_evaluation.md'>
-      Inference and evaluation on the Open Images dataset</a><br>
-*   <a href='g3doc/instance_segmentation.md'>
-      Run an instance segmentation model</a><br>
-*   <a href='g3doc/challenge_evaluation.md'>
-      Run the evaluation for the Open Images Challenge 2018/2019</a><br>
-*   <a href='g3doc/tpu_compatibility.md'>
-      TPU compatible detection pipelines</a><br>
-*   <a href='g3doc/running_on_mobile_tensorflowlite.md'>
-      Running object detection on mobile devices with TensorFlow Lite</a><br>
-*   <a href='g3doc/context_rcnn.md'>
-      Context R-CNN documentation for data preparation, training, and export</a><br>
-## Getting Help
+* We provide new architectures supported in TF2 only and we will continue to
+  develop in TF2 going forward.
-To get help with issues you may encounter using the Tensorflow Object Detection
+* The popular models we ported from TF1 to TF2 achieve the same performance.
-API, create a new question on [StackOverflow](https://stackoverflow.com/) with
-the tags "tensorflow" and "object-detection".
-Please report bugs (actually broken code, not usage questions) to the
+* A single training and evaluation binary now supports both GPU and TPU
-tensorflow/models GitHub
+  distribution strategies making it possible to train models with synchronous
-[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
+  SGD by default.
-issue name with "object_detection".
+* Eager execution with new binaries makes debugging easy!
+Finally, if are an existing user of the Object Detection API we have retained
+the same config language you are familiar with and ensured that the
+TF2 training/eval binary takes the same arguments as our TF1 binaries.
+Note: The models we provide in [TF2 Zoo](g3doc/tf2_detection_zoo.md) and
+[TF1 Zoo](g3doc/tf1_detection_zoo.md) are specific to the TensorFlow major
+version and are not interoperable.
-Please check [FAQ](g3doc/faq.md) for frequently asked questions before reporting
+Please select one of the two links below for TensorFlow version specific
-an issue.
+documentation of the Object Detection API:
-## Release information
+<!-- mdlint off(WHITESPACE_LINE_LENGTH) -->
-### June 17th, 2020
+| [![Object Detection API TensorFlow 2](https://img.shields.io/badge/Object%20Detection%20API-TensorFlow%202-orange)](g3doc/tf2.md) | [![TensorFlow 2 Model Zoo](https://img.shields.io/badge/Model%20Zoo-TensorFlow%202-Orange)](g3doc/tf2_detection_zoo.md) |
+|---|---|
+| [![Object Detection API TensorFlow 1](https://img.shields.io/badge/Object%20Detection%20API-TensorFlow%201-orange)](g3doc/tf1.md) | [![TensorFlow 1 Model Zoo](https://img.shields.io/badge/Model%20Zoo-TensorFlow%201-Orange)](g3doc/tf1_detection_zoo.md) |
+<!-- mdlint on -->
+## Whats New
+### TensorFlow 2 Support
+We are happy to announce that the TF OD API officially supports TF2! Our release
+includes:
+* New binaries for train/eval/export that are designed to run in eager mode.
+* A suite of TF2 compatible (Keras-based) models; this includes migrations of
+  our most popular TF1.x models (e.g., SSD with MobileNet, RetinaNet,
+  Faster R-CNN, Mask R-CNN), as well as a few new architectures for which we
+  will only maintain TF2 implementations:
+    1. CenterNet - a simple and effective anchor-free architecture based on
+       the recent [Objects as Points](https://arxiv.org/abs/1904.07850) paper by
+       Zhou et al.
+    2. [EfficientDet](https://arxiv.org/abs/1911.09070) - a recent family of
+       SOTA models discovered with the help of Neural Architecture Search.
+* COCO pre-trained weights for all of the models provided as TF2 style
+  object-based checkpoints.
+* Access to [Distribution Strategies](https://www.tensorflow.org/guide/distributed_training)
+  for distributed training --- our model are designed to be trainable using sync
+  multi-GPU and TPU platforms.
+* Colabs demo’ing eager mode training and inference.
+See our release blogpost [here](https://blog.tensorflow.org/2020/07/tensorflow-2-meets-object-detection-api.html).
+If you are an existing user of the TF OD API using TF 1.x, don’t worry, we’ve
+got you covered.
+**Thanks to contributors**: Akhil Chinnakotla, Allen Lavoie, Anirudh Vegesana,
+Anjali Sridhar, Austin Myers, Dan Kondratyuk, David Ross, Derek Chow, Jaeyoun
+Kim, Jing Li, Jonathan Huang, Jordi Pont-Tuset, Karmel Allison, Kathy Ruan,
+Kaushik Shivakumar, Lu He, Mingxing Tan, Pengchong Jin, Ronny Votel, Sara Beery,
+Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
+Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
+Yixin Shi, Yu-hui Chen, Zhichao Lu.
+### Context R-CNN
 We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
 uses attention to incorporate contextual information images (e.g. from
 temporally nearby frames taken by a static camera) in order to improve accuracy.
 Importantly, these contextual images need not be labeled.
-*   When applied to a challenging wildlife detection dataset ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
+*   When applied to a challenging wildlife detection dataset
+    ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
    Context R-CNN with context from up to a month of images outperforms a
    single-frame baseline by 17.9% mAP, and outperforms S3D (a 3d convolution
    based baseline) by 11.2% mAP.
@@ -118,282 +120,48 @@ Importantly, these contextual images need not be labeled.
    novel camera deployment to improve performance at that camera, boosting
    model generalizeability.
-Read about Context R-CNN on the Google AI blog [here](https://ai.googleblog.com/2020/06/leveraging-temporal-context-for-object.html).
+Read about Context R-CNN on the Google AI blog
+[here](https://ai.googleblog.com/2020/06/leveraging-temporal-context-for-object.html).
 We have provided code for generating data with associated context
-[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN
+[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN model
-model [here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).
+[here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).
 Snapshot Serengeti-trained Faster R-CNN and Context R-CNN models can be found in
-the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#snapshot-serengeti-camera-trap-trained-models).
+the
+[model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md#snapshot-serengeti-camera-trap-trained-models).
 A colab demonstrating Context R-CNN is provided
 [here](colab_tutorials/context_rcnn_tutorial.ipynb).
 <b>Thanks to contributors</b>: Sara Beery, Jonathan Huang, Guanhang Wu, Vivek
-Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and
+Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and the
-the Wildlife Insights AI Team.
+Wildlife Insights AI Team.
-### May 19th, 2020
-We have released [MobileDets](https://arxiv.org/abs/2004.14525), a set of
-high-performance models for mobile CPUs, DSPs and EdgeTPUs.
-*   MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile
-    CPU inference latencies. MobileDets also outperform MobileNetV2+SSDLite by
-    1.9 mAP on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while
-    running equally fast. MobileDets also offer up to 2x speedup over MnasFPN on
-    EdgeTPUs and DSPs.
-For each of the three hardware platforms we have released model definition,
-model checkpoints trained on the COCO14 dataset and converted TFLite models in
-fp32 and/or uint8.
-<b>Thanks to contributors</b>: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin
-Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Bo Chen,
-Quoc Le, Zhichao Lu.
-### May 7th, 2020
-We have released a mobile model with the
-[MnasFPN head](https://arxiv.org/abs/1912.01106).
-*   MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms
-    on Pixel 1) mobile detection model we have released to date. With
-    depth-multiplier, MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than
-    MobileNet-V3-Large with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency
-    (120ms) on Pixel 1.
-We have released model definition, model checkpoints trained on the COCO14
-dataset and a converted TFLite model.
-<b>Thanks to contributors</b>: Bo Chen, Golnaz Ghiasi, Hanxiao Liu, Tsung-Yi
-Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu, Jonathan Huang, Hao
-Xu.
-### Nov 13th, 2019
-We have released MobileNetEdgeTPU SSDLite model.
-*   SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than
-    MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable
-    latency (6.6ms vs 6.8ms).
-Along with the model definition, we are also releasing model checkpoints trained
-on the COCO dataset.
-<b>Thanks to contributors</b>: Yunyang Xiong, Bo Chen, Suyog Gupta, Hanxiao Liu,
-Gabriel Bender, Mingxing Tan, Berkin Akin, Zhichao Lu, Quoc Le
-### Oct 15th, 2019
-We have released two MobileNet V3 SSDLite models (presented in
-[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)).
-*   SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet
-    V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP.
-*   SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet
-    SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP.
-Along with the model definition, we are also releasing model checkpoints trained
-on the COCO dataset.
-<b>Thanks to contributors</b>: Bo Chen, Zhichao Lu, Vivek Rathod, Jonathan Huang
-### July 1st, 2019
-We have released an updated set of utils and an updated
-[tutorial](g3doc/challenge_evaluation.md) for all three tracks of the
-[Open Images Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)!
-The Instance Segmentation metric for
-[Open Images V5](https://storage.googleapis.com/openimages/web/index.html) and
-[Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)
-is part of this release. Check out
-[the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval)
-on the Open Images website.
-<b>Thanks to contributors</b>: Alina Kuznetsova, Rodrigo Benenson
-### Feb 11, 2019
-We have released detection models trained on the Open Images Dataset V4 in our
+## Release Notes
-detection model zoo, including
+See [notes](g3doc/release_notes.md) for all past releases.
-*   Faster R-CNN detector with Inception Resnet V2 feature extractor
+## Getting Help
-*   SSD detector with MobileNet V2 feature extractor
-*   SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101)
-<b>Thanks to contributors</b>: Alina Kuznetsova, Yinxiao Li
-### Sep 17, 2018
-We have released Faster R-CNN detectors with ResNet-50 / ResNet-101 feature
-extractors trained on the
-[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
-The models are trained on the training split of the iNaturalist data for 4M
-iterations, they achieve 55% and 58% mean AP@.5 over 2854 classes respectively.
-For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
-<b>Thanks to contributors</b>: Chen Sun
-### July 13, 2018
-There are many new updates in this release, extending the functionality and
-capability of the API:
-*   Moving from slim-based training to
-    [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based
-    training.
-*   Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a
-    [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html)
-    adaptation of RetinaNet.
-*   A novel SSD-based architecture called the
-    [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN).
-*   Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models.
-    These can be found in the `samples/configs/` directory with a comment in the
-    pipeline configuration files indicating TPU compatibility.
-*   Support for quantized training.
-*   Updated documentation for new binaries, Cloud training, and
-    [Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/).
-See also our
-[expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html)
-and accompanying tutorial at the
-[TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
-<b>Thanks to contributors</b>: Sara Robinson, Aakanksha Chowdhery, Derek Chow,
-Pengchong Jin, Jonathan Huang, Vivek Rathod, Zhichao Lu, Ronny Votel
-### June 25, 2018
-Additional evaluation tools for the
-[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
-are out. Check out our short tutorial on data preparation and running evaluation
-[here](g3doc/challenge_evaluation.md)!
-<b>Thanks to contributors</b>: Alina Kuznetsova
-### June 5, 2018
-We have released the implementation of evaluation metrics for both tracks of the
-[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
-as a part of the Object Detection API - see the
-[evaluation protocols](g3doc/evaluation_protocols.md) for more details.
-Additionally, we have released a tool for hierarchical labels expansion for the
-Open Images Challenge: check out
-[oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py).
-<b>Thanks to contributors</b>: Alina Kuznetsova, Vittorio Ferrari, Jasper
-Uijlings
-### April 30, 2018
-We have released a Faster R-CNN detector with ResNet-101 feature extractor
-trained on [AVA](https://research.google.com/ava/) v2.1. Compared with other
-commonly used object detectors, it changes the action classification loss
-function to per-class Sigmoid loss to handle boxes with multiple labels. The
-model is trained on the training split of AVA v2.1 for 1.5M iterations, it
-achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
-For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
-<b>Thanks to contributors</b>: Chen Sun, David Ross
-### April 2, 2018
-Supercharge your mobile phones with the next generation mobile object detector!
-We are adding support for MobileNet V2 with SSDLite presented in
-[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
-This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU
-(200ms vs. 270ms) at the same accuracy. Along with the model definition, we are
-also releasing a model checkpoint trained on the COCO dataset.
-<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek
-Rathod, Jonathan Huang
-### February 9, 2018
-We now support instance segmentation!! In this API update we support a number of
-instance segmentation models similar to those discussed in the
-[Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer
-to [our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the
-2017 Coco + Places Workshop. Refer to the section on
-[Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for
-instructions on how to configure a model that predicts masks in addition to
-object bounding boxes.
-<b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny
-Votel, Jonathan Huang
-### November 17, 2017
-As a part of the Open Images V3 release we have released:
-*   An implementation of the Open Images evaluation metric and the
-    [protocol](g3doc/evaluation_protocols.md#open-images).
-*   Additional tools to separate inference of detection and evaluation (see
-    [this tutorial](g3doc/oid_inference_and_evaluation.md)).
-*   A new detection model trained on the Open Images V2 data release (see
-    [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
-See more information on the
-[Open Images website](https://github.com/openimages/dataset)!
-<b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova
-### November 6, 2017
-We have re-released faster versions of our (pre-trained) models in the
-<a href='g3doc/detection_model_zoo.md'>model zoo</a>. In addition to what was
-available before, we are also adding Faster R-CNN models trained on COCO with
-Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN with
-Resnet-101 model trained on the KITTI dataset.
-<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Tal
-Remez, Chen Sun.
-### October 31, 2017
-We have released a new state-of-the-art model for object detection using the
-Faster-RCNN with the
-[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This model
-achieves mAP of 43.1% on the test-dev validation dataset for COCO, improving on
-the best available model in the zoo by 6% in terms of absolute mAP.
-<b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens,
-Quoc Le
-### August 11, 2017
-We have released an update to the
+To get help with issues you may encounter using the TensorFlow Object Detection
-[Android Detect demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
+API, create a new question on [StackOverflow](https://stackoverflow.com/) with
-which will now run models trained using the Tensorflow Object Detection API on
+the tags "tensorflow" and "object-detection".
-an Android device. By default, it currently runs a frozen SSD w/Mobilenet
-detector trained on COCO, but we encourage you to try out other detection
-models!
-<b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp
+Please report bugs (actually broken code, not usage questions) to the
+tensorflow/models GitHub
+[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
+issue name with "object_detection".
-### June 15, 2017
+Please check the [FAQ](g3doc/faq.md) for frequently asked questions before
+reporting an issue.
-In addition to our base Tensorflow detection model definitions, this release
+## Maintainers
-includes:
-*   A selection of trainable detection models, including:
+* Jonathan Huang ([@GitHub jch1](https://github.com/jch1))
-    *   Single Shot Multibox Detector (SSD) with MobileNet,
+* Vivek Rathod ([@GitHub tombstone](https://github.com/tombstone))
-    *   SSD with Inception V2,
+* Vighnesh Birodkar ([@GitHub vighneshbirodkar](https://github.com/vighneshbirodkar))
-    *   Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
+* Austin Myers ([@GitHub austin-myers](https://github.com/austin-myers))
-    *   Faster RCNN with Resnet 101,
+* Zhichao Lu ([@GitHub pkulzc](https://github.com/pkulzc))
-    *   Faster RCNN with Inception Resnet v2
+* Ronny Votel ([@GitHub ronnyvotel](https://github.com/ronnyvotel))
-*   Frozen weights (trained on the COCO dataset) for each of the above models to
+* Yu-hui Chen ([@GitHub yuhuichen1015](https://github.com/yuhuichen1015))
-    be used for out-of-the-box inference purposes.
+* Derek Chow  ([@GitHub derekjchow](https://github.com/derekjchow))
-*   A [Jupyter notebook](colab_tutorials/object_detection_tutorial.ipynb) for
-    performing out-of-the-box inference with one of our released models
-*   Convenient [local training](g3doc/running_locally.md) scripts as well as
-    distributed training and evaluation pipelines via
-    [Google Cloud](g3doc/running_on_cloud.md).
-<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Chen
-Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer,
-Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, Viacheslav
-Kovalevskyi, Kevin Murphy
--- a/research/object_detection/builders/box_predictor_builder_test.py
+++ b/research/object_detection/builders/box_predictor_builder_test.py
@@ -17,9 +17,8 @@
 """Tests for box_predictor_builder."""
 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
 from google.protobuf import text_format
 from object_detection.builders import box_predictor_builder
 from object_detection.builders import hyperparams_builder

--- a/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
+++ b/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 """Tests for graph_rewriter_builder."""
 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
 import tf_slim as slim

--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -16,6 +16,7 @@
 """A function to build a DetectionModel from configuration."""
 import functools
+import sys
 from object_detection.builders import anchor_generator_builder
 from object_detection.builders import box_coder_builder
 from object_detection.builders import box_predictor_builder
@@ -58,6 +59,8 @@ if tf_version.is_tf2():
  from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
  from object_detection.predictors import rfcn_keras_box_predictor
+  if sys.version_info[0] >= 3:
+    from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn
 if tf_version.is_tf1():
  from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
@@ -99,6 +102,22 @@ if tf_version.is_tf2():
          ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
      'ssd_resnet152_v1_fpn_keras':
          ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
+      'ssd_efficientnet-b0_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b1_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b2_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b3_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b4_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b5_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b6_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b7_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
  }
  FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -110,11 +129,11 @@ if tf_version.is_tf2():
          frcnn_resnet_keras.FasterRCNNResnet152KerasFeatureExtractor,
      'faster_rcnn_inception_resnet_v2_keras':
      frcnn_inc_res_keras.FasterRCNNInceptionResnetV2KerasFeatureExtractor,
-      'fasret_rcnn_resnet50_fpn_keras':
+      'faster_rcnn_resnet50_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet50FpnKerasFeatureExtractor,
-      'fasret_rcnn_resnet101_fpn_keras':
+      'faster_rcnn_resnet101_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet101FpnKerasFeatureExtractor,
-      'fasret_rcnn_resnet152_fpn_keras':
+      'faster_rcnn_resnet152_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet152FpnKerasFeatureExtractor,
  }
@@ -310,6 +329,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
            feature_extractor_config.fpn.additional_layer_depth,
    })
+  if feature_extractor_config.HasField('bifpn'):
+    kwargs.update({
+        'bifpn_min_level': feature_extractor_config.bifpn.min_level,
+        'bifpn_max_level': feature_extractor_config.bifpn.max_level,
+        'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
+        'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
+        'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
+    })
  return feature_extractor_class(**kwargs)
@@ -843,6 +870,22 @@ def mask_proto_to_params(mask_config):
      heatmap_bias_init=mask_config.heatmap_bias_init)
+def densepose_proto_to_params(densepose_config):
+  """Converts CenterNet.DensePoseEstimation proto to parameter namedtuple."""
+  classification_loss, localization_loss, _, _, _, _, _ = (
+      losses_builder.build(densepose_config.loss))
+  return center_net_meta_arch.DensePoseParams(
+      class_id=densepose_config.class_id,
+      classification_loss=classification_loss,
+      localization_loss=localization_loss,
+      part_loss_weight=densepose_config.part_loss_weight,
+      coordinate_loss_weight=densepose_config.coordinate_loss_weight,
+      num_parts=densepose_config.num_parts,
+      task_loss_weight=densepose_config.task_loss_weight,
+      upsample_to_input_res=densepose_config.upsample_to_input_res,
+      heatmap_bias_init=densepose_config.heatmap_bias_init)
 def _build_center_net_model(center_net_config, is_training, add_summaries):
  """Build a CenterNet detection model.
@@ -895,6 +938,11 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
  if center_net_config.HasField('mask_estimation_task'):
    mask_params = mask_proto_to_params(center_net_config.mask_estimation_task)
+  densepose_params = None
+  if center_net_config.HasField('densepose_estimation_task'):
+    densepose_params = densepose_proto_to_params(
+        center_net_config.densepose_estimation_task)
  return center_net_meta_arch.CenterNetMetaArch(
      is_training=is_training,
      add_summaries=add_summaries,
@@ -904,7 +952,8 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
      object_center_params=object_center_params,
      object_detection_params=object_detection_params,
      keypoint_params_dict=keypoint_params_dict,
-      mask_params=mask_params)
+      mask_params=mask_params,
+      densepose_params=densepose_params)
 def _build_center_net_feature_extractor(