Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

47bc1813 · syiming · d8611151 · b035a227 · 47bc1813 · 47bc1813
Commit 47bc1813 authored Jul 01, 2020 by syiming
20 changed files
--- a/official/vision/detection/modeling/architecture/factory.py
+++ b/official/vision/detection/modeling/architecture/factory.py
@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads
 from official.vision.detection.modeling.architecture import identity
 from official.vision.detection.modeling.architecture import nn_ops
 from official.vision.detection.modeling.architecture import resnet
+from official.vision.detection.modeling.architecture import spinenet
 def norm_activation_generator(params):
@@ -42,6 +43,9 @@ def backbone_generator(params):
        activation=params.norm_activation.activation,
        norm_activation=norm_activation_generator(
            params.norm_activation))
+  elif params.architecture.backbone == 'spinenet':
+    spinenet_params = params.spinenet
+    backbone_fn = spinenet.SpineNetBuilder(model_id=spinenet_params.model_id)
  else:
    raise ValueError('Backbone model `{}` is not supported.'
                     .format(params.architecture.backbone))

--- a/official/vision/detection/modeling/architecture/nn_blocks.py
+++ b/official/vision/detection/modeling/architecture/nn_blocks.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains common building blocks for neural networks."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+from official.modeling import tf_utils
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class ResidualBlock(tf.keras.layers.Layer):
+  """A residual block."""
+  def __init__(self,
+               filters,
+               strides,
+               use_projection=False,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               **kwargs):
+    """A residual block with BN after convolutions.
+    Args:
+      filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+      strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+      use_projection: `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+        Default to None.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+        Default to None.
+      activation: `str` name of the activation function.
+      use_sync_bn: if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization omentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(ResidualBlock, self).__init__(**kwargs)
+    self._filters = filters
+    self._strides = strides
+    self._use_projection = use_projection
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_fn = tf_utils.get_activation(activation)
+  def build(self, input_shape):
+    if self._use_projection:
+      self._shortcut = tf.keras.layers.Conv2D(
+          filters=self._filters,
+          kernel_size=1,
+          strides=self._strides,
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)
+      self._norm0 = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=3,
+        strides=self._strides,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._conv2 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=3,
+        strides=1,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    super(ResidualBlock, self).build(input_shape)
+  def get_config(self):
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'use_projection': self._use_projection,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(ResidualBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+  def call(self, inputs):
+    shortcut = inputs
+    if self._use_projection:
+      shortcut = self._shortcut(shortcut)
+      shortcut = self._norm0(shortcut)
+    x = self._conv1(inputs)
+    x = self._norm1(x)
+    x = self._activation_fn(x)
+    x = self._conv2(x)
+    x = self._norm2(x)
+    return self._activation_fn(x + shortcut)
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class BottleneckBlock(tf.keras.layers.Layer):
+  """A standard bottleneck block."""
+  def __init__(self,
+               filters,
+               strides,
+               use_projection=False,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               **kwargs):
+    """A standard bottleneck block with BN after convolutions.
+    Args:
+      filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+      strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+      use_projection: `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+      kernel_initializer: kernel_initializer for convolutional layers.
+      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+        Default to None.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+        Default to None.
+      activation: `str` name of the activation function.
+      use_sync_bn: if True, use synchronized batch normalization.
+      norm_momentum: `float` normalization omentum for the moving average.
+      norm_epsilon: `float` small float added to variance to avoid dividing by
+        zero.
+      **kwargs: keyword arguments to be passed.
+    """
+    super(BottleneckBlock, self).__init__(**kwargs)
+    self._filters = filters
+    self._strides = strides
+    self._use_projection = use_projection
+    self._use_sync_bn = use_sync_bn
+    self._activation = activation
+    self._kernel_initializer = kernel_initializer
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    if use_sync_bn:
+      self._norm = tf.keras.layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = tf.keras.layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    self._activation_fn = tf_utils.get_activation(activation)
+  def build(self, input_shape):
+    if self._use_projection:
+      self._shortcut = tf.keras.layers.Conv2D(
+          filters=self._filters * 4,
+          kernel_size=1,
+          strides=self._strides,
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)
+      self._norm0 = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)
+    self._conv1 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm1 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._conv2 = tf.keras.layers.Conv2D(
+        filters=self._filters,
+        kernel_size=3,
+        strides=self._strides,
+        padding='same',
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm2 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    self._conv3 = tf.keras.layers.Conv2D(
+        filters=self._filters * 4,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)
+    self._norm3 = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)
+    super(BottleneckBlock, self).build(input_shape)
+  def get_config(self):
+    config = {
+        'filters': self._filters,
+        'strides': self._strides,
+        'use_projection': self._use_projection,
+        'kernel_initializer': self._kernel_initializer,
+        'kernel_regularizer': self._kernel_regularizer,
+        'bias_regularizer': self._bias_regularizer,
+        'activation': self._activation,
+        'use_sync_bn': self._use_sync_bn,
+        'norm_momentum': self._norm_momentum,
+        'norm_epsilon': self._norm_epsilon
+    }
+    base_config = super(BottleneckBlock, self).get_config()
+    return dict(list(base_config.items()) + list(config.items()))
+  def call(self, inputs):
+    shortcut = inputs
+    if self._use_projection:
+      shortcut = self._shortcut(shortcut)
+      shortcut = self._norm0(shortcut)
+    x = self._conv1(inputs)
+    x = self._norm1(x)
+    x = self._activation_fn(x)
+    x = self._conv2(x)
+    x = self._norm2(x)
+    x = self._activation_fn(x)
+    x = self._conv3(x)
+    x = self._norm3(x)
+    return self._activation_fn(x + shortcut)
--- a/official/vision/detection/modeling/architecture/spinenet.py
+++ b/official/vision/detection/modeling/architecture/spinenet.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of SpineNet model.
+X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
+SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
+https://arxiv.org/abs/1912.05027
+"""
+import math
+from absl import logging
+import tensorflow as tf
+from tensorflow.python.keras import backend
+from official.modeling import tf_utils
+from official.vision.detection.modeling.architecture import nn_blocks
+layers = tf.keras.layers
+FILTER_SIZE_MAP = {
+    1: 32,
+    2: 64,
+    3: 128,
+    4: 256,
+    5: 256,
+    6: 256,
+    7: 256,
+}
+# The fixed SpineNet architecture discovered by NAS.
+# Each element represents a specification of a building block:
+#   (block_level, block_fn, (input_offset0, input_offset1), is_output).
+SPINENET_BLOCK_SPECS = [
+    (2, 'bottleneck', (0, 1), False),
+    (4, 'residual', (0, 1), False),
+    (3, 'bottleneck', (2, 3), False),
+    (4, 'bottleneck', (2, 4), False),
+    (6, 'residual', (3, 5), False),
+    (4, 'bottleneck', (3, 5), False),
+    (5, 'residual', (6, 7), False),
+    (7, 'residual', (6, 8), False),
+    (5, 'bottleneck', (8, 9), False),
+    (5, 'bottleneck', (8, 10), False),
+    (4, 'bottleneck', (5, 10), True),
+    (3, 'bottleneck', (4, 10), True),
+    (5, 'bottleneck', (7, 12), True),
+    (7, 'bottleneck', (5, 14), True),
+    (6, 'bottleneck', (12, 14), True),
+]
+SCALING_MAP = {
+    '49S': {
+        'endpoints_num_filters': 128,
+        'filter_size_scale': 0.65,
+        'resample_alpha': 0.5,
+        'block_repeats': 1,
+    },
+    '49': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 0.5,
+        'block_repeats': 1,
+    },
+    '96': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 0.5,
+        'block_repeats': 2,
+    },
+    '143': {
+        'endpoints_num_filters': 256,
+        'filter_size_scale': 1.0,
+        'resample_alpha': 1.0,
+        'block_repeats': 3,
+    },
+    '190': {
+        'endpoints_num_filters': 512,
+        'filter_size_scale': 1.3,
+        'resample_alpha': 1.0,
+        'block_repeats': 4,
+    },
+}
+class BlockSpec(object):
+  """A container class that specifies the block configuration for SpineNet."""
+  def __init__(self, level, block_fn, input_offsets, is_output):
+    self.level = level
+    self.block_fn = block_fn
+    self.input_offsets = input_offsets
+    self.is_output = is_output
+def build_block_specs(block_specs=None):
+  """Builds the list of BlockSpec objects for SpineNet."""
+  if not block_specs:
+    block_specs = SPINENET_BLOCK_SPECS
+  logging.info('Building SpineNet block specs: %s', block_specs)
+  return [BlockSpec(*b) for b in block_specs]
+@tf.keras.utils.register_keras_serializable(package='Vision')
+class SpineNet(tf.keras.Model):
+  """Class to build SpineNet models."""
+  def __init__(self,
+               input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
+               min_level=3,
+               max_level=7,
+               block_specs=build_block_specs(),
+               endpoints_num_filters=256,
+               resample_alpha=0.5,
+               block_repeats=1,
+               filter_size_scale=1.0,
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               norm_momentum=0.99,
+               norm_epsilon=0.001,
+               **kwargs):
+    """SpineNet model."""
+    self._min_level = min_level
+    self._max_level = max_level
+    self._block_specs = block_specs
+    self._endpoints_num_filters = endpoints_num_filters
+    self._resample_alpha = resample_alpha
+    self._block_repeats = block_repeats
+    self._filter_size_scale = filter_size_scale
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+    if activation == 'relu':
+      self._activation = tf.nn.relu
+    elif activation == 'swish':
+      self._activation = tf.nn.swish
+    else:
+      raise ValueError('Activation {} not implemented.'.format(activation))
+    self._init_block_fn = 'bottleneck'
+    self._num_init_blocks = 2
+    if use_sync_bn:
+      self._norm = layers.experimental.SyncBatchNormalization
+    else:
+      self._norm = layers.BatchNormalization
+    if tf.keras.backend.image_data_format() == 'channels_last':
+      self._bn_axis = -1
+    else:
+      self._bn_axis = 1
+    # Build SpineNet.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+    net = self._build_stem(inputs=inputs)
+    net = self._build_scale_permuted_network(
+        net=net, input_width=input_specs.shape[1])
+    net = self._build_endpoints(net=net)
+    super(SpineNet, self).__init__(inputs=inputs, outputs=net)
+  def _block_group(self,
+                   inputs,
+                   filters,
+                   strides,
+                   block_fn_cand,
+                   block_repeats=1,
+                   name='block_group'):
+    """Creates one group of blocks for the SpineNet model."""
+    block_fn_candidates = {
+        'bottleneck': nn_blocks.BottleneckBlock,
+        'residual': nn_blocks.ResidualBlock,
+    }
+    block_fn = block_fn_candidates[block_fn_cand]
+    _, _, _, num_filters = inputs.get_shape().as_list()
+    if block_fn_cand == 'bottleneck':
+      use_projection = not (num_filters == (filters * 4) and strides == 1)
+    else:
+      use_projection = not (num_filters == filters and strides == 1)
+    x = block_fn(
+        filters=filters,
+        strides=strides,
+        use_projection=use_projection,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer,
+        activation=self._activation,
+        use_sync_bn=self._use_sync_bn,
+        norm_momentum=self._norm_momentum,
+        norm_epsilon=self._norm_epsilon)(
+            inputs)
+    for _ in range(1, block_repeats):
+      x = block_fn(
+          filters=filters,
+          strides=1,
+          use_projection=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._activation,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon)(
+              x)
+    return tf.identity(x, name=name)
+  def _build_stem(self, inputs):
+    """Build SpineNet stem."""
+    x = layers.Conv2D(
+        filters=64,
+        kernel_size=7,
+        strides=2,
+        use_bias=False,
+        padding='same',
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            inputs)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    x = tf_utils.get_activation(self._activation)(x)
+    x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
+    net = []
+    # Build the initial level 2 blocks.
+    for i in range(self._num_init_blocks):
+      x = self._block_group(
+          inputs=x,
+          filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
+          strides=1,
+          block_fn_cand=self._init_block_fn,
+          block_repeats=self._block_repeats,
+          name='stem_block_{}'.format(i + 1))
+      net.append(x)
+    return net
+  def _build_scale_permuted_network(self,
+                                    net,
+                                    input_width,
+                                    weighted_fusion=False):
+    """Build scale-permuted network."""
+    net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
+    net_block_fns = [self._init_block_fn] * len(net)
+    num_outgoing_connections = [0] * len(net)
+    endpoints = {}
+    for i, block_spec in enumerate(self._block_specs):
+      # Find out specs for the target block.
+      target_width = int(math.ceil(input_width / 2**block_spec.level))
+      target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
+                               self._filter_size_scale)
+      target_block_fn = block_spec.block_fn
+      # Resample then merge input0 and input1.
+      parents = []
+      input0 = block_spec.input_offsets[0]
+      input1 = block_spec.input_offsets[1]
+      x0 = self._resample_with_alpha(
+          inputs=net[input0],
+          input_width=net_sizes[input0],
+          input_block_fn=net_block_fns[input0],
+          target_width=target_width,
+          target_num_filters=target_num_filters,
+          target_block_fn=target_block_fn,
+          alpha=self._resample_alpha)
+      parents.append(x0)
+      num_outgoing_connections[input0] += 1
+      x1 = self._resample_with_alpha(
+          inputs=net[input1],
+          input_width=net_sizes[input1],
+          input_block_fn=net_block_fns[input1],
+          target_width=target_width,
+          target_num_filters=target_num_filters,
+          target_block_fn=target_block_fn,
+          alpha=self._resample_alpha)
+      parents.append(x1)
+      num_outgoing_connections[input1] += 1
+      # Merge 0 outdegree blocks to the output block.
+      if block_spec.is_output:
+        for j, (j_feat,
+                j_connections) in enumerate(zip(net, num_outgoing_connections)):
+          if j_connections == 0 and (j_feat.shape[2] == target_width and
+                                     j_feat.shape[3] == x0.shape[3]):
+            parents.append(j_feat)
+            num_outgoing_connections[j] += 1
+      # pylint: disable=g-direct-tensorflow-import
+      if weighted_fusion:
+        dtype = parents[0].dtype
+        parent_weights = [
+            tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
+                i, j)), dtype=dtype)) for j in range(len(parents))]
+        weights_sum = tf.add_n(parent_weights)
+        parents = [
+            parents[i] * parent_weights[i] / (weights_sum + 0.0001)
+            for i in range(len(parents))
+        ]
+      # Fuse all parent nodes then build a new block.
+      x = tf_utils.get_activation(self._activation)(tf.add_n(parents))
+      x = self._block_group(
+          inputs=x,
+          filters=target_num_filters,
+          strides=1,
+          block_fn_cand=target_block_fn,
+          block_repeats=self._block_repeats,
+          name='scale_permuted_block_{}'.format(i + 1))
+      net.append(x)
+      net_sizes.append(target_width)
+      net_block_fns.append(target_block_fn)
+      num_outgoing_connections.append(0)
+      # Save output feats.
+      if block_spec.is_output:
+        if block_spec.level in endpoints:
+          raise ValueError('Duplicate feats found for output level {}.'.format(
+              block_spec.level))
+        if (block_spec.level < self._min_level or
+            block_spec.level > self._max_level):
+          raise ValueError('Output level is out of range [{}, {}]'.format(
+              self._min_level, self._max_level))
+        endpoints[block_spec.level] = x
+    return endpoints
+  def _build_endpoints(self, net):
+    """Match filter size for endpoints before sharing conv layers."""
+    endpoints = {}
+    for level in range(self._min_level, self._max_level + 1):
+      x = layers.Conv2D(
+          filters=self._endpoints_num_filters,
+          kernel_size=1,
+          strides=1,
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              net[level])
+      x = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(self._activation)(x)
+      endpoints[level] = x
+    return endpoints
+  def _resample_with_alpha(self,
+                           inputs,
+                           input_width,
+                           input_block_fn,
+                           target_width,
+                           target_num_filters,
+                           target_block_fn,
+                           alpha=0.5):
+    """Match resolution and feature dimension."""
+    _, _, _, input_num_filters = inputs.get_shape().as_list()
+    if input_block_fn == 'bottleneck':
+      input_num_filters /= 4
+    new_num_filters = int(input_num_filters * alpha)
+    x = layers.Conv2D(
+        filters=new_num_filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            inputs)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    x = tf_utils.get_activation(self._activation)(x)
+    # Spatial resampling.
+    if input_width > target_width:
+      x = layers.Conv2D(
+          filters=new_num_filters,
+          kernel_size=3,
+          strides=2,
+          padding='SAME',
+          use_bias=False,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer)(
+              x)
+      x = self._norm(
+          axis=self._bn_axis,
+          momentum=self._norm_momentum,
+          epsilon=self._norm_epsilon)(
+              x)
+      x = tf_utils.get_activation(self._activation)(x)
+      input_width /= 2
+      while input_width > target_width:
+        x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
+        input_width /= 2
+    elif input_width < target_width:
+      scale = target_width // input_width
+      x = layers.UpSampling2D(size=(scale, scale))(x)
+    # Last 1x1 conv to match filter size.
+    if target_block_fn == 'bottleneck':
+      target_num_filters *= 4
+    x = layers.Conv2D(
+        filters=target_num_filters,
+        kernel_size=1,
+        strides=1,
+        use_bias=False,
+        kernel_initializer=self._kernel_initializer,
+        kernel_regularizer=self._kernel_regularizer,
+        bias_regularizer=self._bias_regularizer)(
+            x)
+    x = self._norm(
+        axis=self._bn_axis,
+        momentum=self._norm_momentum,
+        epsilon=self._norm_epsilon)(
+            x)
+    return x
+class SpineNetBuilder(object):
+  """SpineNet builder."""
+  def __init__(self,
+               model_id,
+               input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
+               min_level=3,
+               max_level=7,
+               block_specs=build_block_specs(),
+               kernel_initializer='VarianceScaling',
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activation='relu',
+               use_sync_bn=False,
+               norm_momentum=0.99,
+               norm_epsilon=0.001):
+    if model_id not in SCALING_MAP:
+      raise ValueError(
+          'SpineNet {} is not a valid architecture.'.format(model_id))
+    scaling_params = SCALING_MAP[model_id]
+    self._input_specs = input_specs
+    self._min_level = min_level
+    self._max_level = max_level
+    self._block_specs = block_specs
+    self._endpoints_num_filters = scaling_params['endpoints_num_filters']
+    self._resample_alpha = scaling_params['resample_alpha']
+    self._block_repeats = scaling_params['block_repeats']
+    self._filter_size_scale = scaling_params['filter_size_scale']
+    self._kernel_initializer = kernel_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._activation = activation
+    self._use_sync_bn = use_sync_bn
+    self._norm_momentum = norm_momentum
+    self._norm_epsilon = norm_epsilon
+  def __call__(self, inputs, is_training=None):
+    with backend.get_graph().as_default():
+      model = SpineNet(
+          input_specs=self._input_specs,
+          min_level=self._min_level,
+          max_level=self._max_level,
+          block_specs=self._block_specs,
+          endpoints_num_filters=self._endpoints_num_filters,
+          resample_alpha=self._resample_alpha,
+          block_repeats=self._block_repeats,
+          filter_size_scale=self._filter_size_scale,
+          kernel_initializer=self._kernel_initializer,
+          kernel_regularizer=self._kernel_regularizer,
+          bias_regularizer=self._bias_regularizer,
+          activation=self._activation,
+          use_sync_bn=self._use_sync_bn,
+          norm_momentum=self._norm_momentum,
+          norm_epsilon=self._norm_epsilon)
+      return model(inputs)
--- a/official/vision/image_classification/README.md
+++ b/official/vision/image_classification/README.md
@@ -119,6 +119,24 @@ python3 classifier_trainer.py \
  --params_override='runtime.num_gpus=$NUM_GPUS'
 ```
+To train on multiple hosts, each with GPUs attached using
+[MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy)
+please update `runtime` section in gpu.yaml
+(or override using `--params_override`) with:
+```YAML
+# gpu.yaml
+runtime:
+  distribution_strategy: 'multi_worker_mirrored'
+  worker_hosts: '$HOST1:port,$HOST2:port'
+  num_gpus: $NUM_GPUS
+  task_index: 0
+```
+By having `task_index: 0` on the first host and `task_index: 1` on the second
+and so on. `$HOST1` and `$HOST2` are the IP addresses of the hosts, and `port`
+can be chosen any free port on the hosts. Only the first host will write
+TensorBoard Summaries and save checkpoints.
 #### On TPU:
 ```bash
 python3 classifier_trainer.py \

--- a/official/vision/image_classification/classifier_trainer.py
+++ b/official/vision/image_classification/classifier_trainer.py
@@ -235,9 +235,6 @@ def initialize(params: base_configs.ExperimentConfig,
  else:
    data_format = 'channels_last'
  tf.keras.backend.set_image_data_format(data_format)
-  distribution_utils.configure_cluster(
-      params.runtime.worker_hosts,
-      params.runtime.task_index)
  if params.runtime.run_eagerly:
    # Enable eager execution to allow step-by-step debugging
    tf.config.experimental_run_functions_eagerly(True)
@@ -296,6 +293,10 @@ def train_and_eval(
  """Runs the train and eval path using compile/fit."""
  logging.info('Running train and eval.')
+  distribution_utils.configure_cluster(
+      params.runtime.worker_hosts,
+      params.runtime.task_index)
  # Note: for TPUs, strategy and scope should be created before the dataset
  strategy = strategy_override or distribution_utils.get_distribution_strategy(
      distribution_strategy=params.runtime.distribution_strategy,
@@ -338,7 +339,8 @@ def train_and_eval(
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=params.model.optimizer.name,
        base_learning_rate=learning_rate,
-        params=params.model.optimizer.as_dict())
+        params=params.model.optimizer.as_dict(),
+        model=model)
    metrics_map = _get_metrics(one_hot)
    metrics = [metrics_map[metric] for metric in params.train.metrics]

--- a/official/vision/image_classification/dataset_factory.py
+++ b/official/vision/image_classification/dataset_factory.py
@@ -100,6 +100,9 @@ class DatasetConfig(base_config.Config):
    skip_decoding: Whether to skip image decoding when loading from TFDS.
    cache: whether to cache to dataset examples. Can be used to avoid re-reading
      from disk on the second epoch. Requires significant memory overhead.
+    tf_data_service: The URI of a tf.data service to offload preprocessing onto
+      during training. The URI should be in the format "protocol://address",
+      e.g. "grpc://tf-data-service:5050".
    mean_subtract: whether or not to apply mean subtraction to the dataset.
    standardize: whether or not to apply standardization to the dataset.
  """
@@ -123,6 +126,7 @@ class DatasetConfig(base_config.Config):
  file_shuffle_buffer_size: int = 1024
  skip_decoding: bool = True
  cache: bool = False
+  tf_data_service: Optional[str] = None
  mean_subtract: bool = False
  standardize: bool = False
@@ -449,6 +453,18 @@ class DatasetBuilder:
    # Prefetch overlaps in-feed with training
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+    if self.config.tf_data_service:
+      if not hasattr(tf.data.experimental, 'service'):
+        raise ValueError('The tf_data_service flag requires Tensorflow version '
+                         '>= 2.3.0, but the version is {}'.format(
+                             tf.__version__))
+      dataset = dataset.apply(
+          tf.data.experimental.service.distribute(
+              processing_mode='parallel_epochs',
+              service=self.config.tf_data_service,
+              job_name='resnet_train'))
+      dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset
  def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:

--- a/official/vision/image_classification/optimizer_factory.py
+++ b/official/vision/image_classification/optimizer_factory.py
@@ -18,11 +18,12 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function
+from typing import Any, Dict, Text, List
 from absl import logging
 import tensorflow as tf
 import tensorflow_addons as tfa
-from typing import Any, Dict, Text, List
 from official.vision.image_classification import learning_rate
 from official.vision.image_classification.configs import base_configs
@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
 def build_optimizer(
    optimizer_name: Text,
    base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
-    params: Dict[Text, Any]):
+    params: Dict[Text, Any],
+    model: tf.keras.Model = None):
  """Build the optimizer based on name.
  Args:
@@ -261,6 +263,8 @@ def build_optimizer(
    params: String -> Any dictionary representing the optimizer params.
      This should contain optimizer specific parameters such as
      `base_learning_rate`, `decay`, etc.
+    model: The `tf.keras.Model`. This is used for the shadow copy if using
+      `MovingAverage`.
  Returns:
    A tf.keras.Optimizer.
@@ -322,10 +326,13 @@ def build_optimizer(
  # Moving average should be applied last, as it's applied at test time
  moving_average_decay = params.get('moving_average_decay', 0.)
  if moving_average_decay is not None and moving_average_decay > 0.:
+    if model is None:
+      raise ValueError('`model` must be provided if using `MovingAverage`.')
    logging.info('Including moving average decay.')
    optimizer = MovingAverage(
-        optimizer,
+        optimizer=optimizer,
        average_decay=moving_average_decay)
+    optimizer.shadow_copy(model)
  return optimizer

--- a/official/vision/image_classification/optimizer_factory_test.py
+++ b/official/vision/image_classification/optimizer_factory_test.py
@@ -19,15 +19,21 @@ from __future__ import division
 # from __future__ import google_type_annotations
 from __future__ import print_function
-import tensorflow as tf
 from absl.testing import parameterized
+import tensorflow as tf
 from official.vision.image_classification import optimizer_factory
 from official.vision.image_classification.configs import base_configs
 class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
+  def build_toy_model(self) -> tf.keras.Model:
+    """Creates a toy `tf.Keras.Model`."""
+    model = tf.keras.Sequential()
+    model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
+    return model
  @parameterized.named_parameters(
      ('sgd', 'sgd', 0., False),
      ('momentum', 'momentum', 0., False),
@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
      ('rmsprop_ema', 'rmsprop', 0.999, False))
  def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
    """Smoke test to be sure no syntax errors."""
+    model = self.build_toy_model()
    params = {
        'learning_rate': 0.001,
        'rho': 0.09,
@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
    optimizer = optimizer_factory.build_optimizer(
        optimizer_name=optimizer_name,
        base_learning_rate=params['learning_rate'],
-        params=params)
+        params=params,
+        model=model)
    self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
  def test_unknown_optimizer(self):

--- a/research/README.md
+++ b/research/README.md
@@ -20,49 +20,49 @@ The research models are maintained by their respective authors.
 | Directory | Name | Description | Maintainer(s) |
 |-----------|------|-------------|---------------|
-| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| @jch1, @tombstone, @pkulzc |
+| [object_detection](object_detection) | TensorFlow Object Detection API | A framework that makes it easy to construct, train and deploy object detection models<br /><br />A collection of object detection models pre-trained on the COCO dataset, the Kitti dataset, the Open Images dataset, the AVA v2.1 dataset, and the iNaturalist Species Detection Dataset| jch1, tombstone, pkulzc |
-| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | @sguada, @marksandler2 |
+| [slim](slim) | TensorFlow-Slim Image Classification Model Library | A lightweight high-level API of TensorFlow for defining, training and evaluating image classification models <br />• Inception V1/V2/V3/V4<br />• Inception-ResNet-v2<br />• ResNet V1/V2<br />• VGG 16/19<br />• MobileNet V1/V2/V3<br />• NASNet-A_Mobile/Large<br />• PNASNet-5_Large/Mobile | sguada, marksandler2 |
 ## Models and Implementations
 ### Computer Vision
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | xavigibert |
+| [attention_ocr](attention_ocr) | [Attention-based Extraction of Structured Information from Street View Imagery](https://arxiv.org/abs/1704.03549) | ICDAR 2017 | xavigibert |
-| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | barretzoph |
+| [autoaugment](autoaugment) | [1] [AutoAugment](https://arxiv.org/abs/1805.09501)<br />[2] [Wide Residual Networks](https://arxiv.org/abs/1605.07146)<br />[3] [Shake-Shake regularization](https://arxiv.org/abs/1705.07485)<br />[4] [ShakeDrop Regularization for Deep Residual Learning](https://arxiv.org/abs/1802.02375) | [1] CVPR 2019<br />[2] BMVC 2016<br /> [3] ICLR 2017<br /> [4] ICLR 2018 | barretzoph |
-| [deeplab](deeplab) | [1] [DeepLabv1](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3](https://arxiv.org/abs/1802.02611)<br />[4] [DeepLabv3+](https://arxiv.org/abs/1706.05587) | aquariusjay, yknzhu |
+| [deeplab](deeplab) | [1] [DeepLabv1: Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected CRFs](https://arxiv.org/abs/1412.7062)<br />[2] [DeepLabv2: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915)<br />[3] [DeepLabv3: Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587)<br />[4] [DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611)<br />| [1] ICLR 2015 <br />[2] TPAMI 2017 <br />[4] ECCV 2018 | aquariusjay, yknzhu |
-| [delf](delf)  | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve](https://arxiv.org/abs/1812.01584) | andrefaraujo |
+| [delf](delf)  | [1] DELF (DEep Local Features): [Large-Scale Image Retrieval with Attentive Deep Local Features](https://arxiv.org/abs/1612.06321)<br />[2] [Detect-to-Retrieve: Efficient Regional Aggregation for Image Search](https://arxiv.org/abs/1812.01584)<br />[3] DELG (DEep Local and Global features): [Unifying Deep Local and Global Features for Image Search](https://arxiv.org/abs/2001.05027)<br />[4] GLDv2: [Google Landmarks Dataset v2 -- A Large-Scale Benchmark for Instance-Level Recognition and Retrieval](https://arxiv.org/abs/2004.01804) | [1] ICCV 2017<br />[2] CVPR 2019<br />[4] CVPR 2020 | andrefaraujo |
-| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | yinxiaoli, yongzhe2160, lzyuan |
+| [lstm_object_detection](lstm_object_detection) | [Mobile Video Object Detection with Temporally-Aware Feature Maps](https://arxiv.org/abs/1711.06368) | CVPR 2018 | yinxiaoli, yongzhe2160, lzyuan |
-| [marco](marco) | [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | vincentvanhoucke |
+| [marco](marco) | MARCO: [Classification of crystallization outcomes using deep convolutional neural networks](https://arxiv.org/abs/1803.10342) | | vincentvanhoucke |
-| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | rezama |
+| [vid2depth](vid2depth) | [Unsupervised Learning of Depth and Ego-Motion from Monocular Video Using 3D Geometric Constraints](https://arxiv.org/abs/1802.05522) | CVPR 2018 | rezama |
 ### Natural Language Processing
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | rsepassi, a-dai |
+| [adversarial_text](adversarial_text) | [1] [Adversarial Training Methods for Semi-Supervised Text](https://arxiv.org/abs/1605.07725) Classification<br />[2] [Semi-supervised Sequence Learning](https://arxiv.org/abs/1511.01432) | [1] ICLR 2017<br />[2] NIPS 2015 | rsepassi, a-dai |
-| [cvt_text](cvt_text) | [Semi-supervised sequence learning with cross-view training](https://arxiv.org/abs/1809.08370) | clarkkev, lmthang |
+| [cvt_text](cvt_text) | [Semi-Supervised Sequence Modeling with Cross-View Training](https://arxiv.org/abs/1809.08370) | EMNLP 2018 | clarkkev, lmthang |
 ### Audio and Speech
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [audioset](audioset) | [1] [AudioSet: A Large Scale Dataset of Audio Events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | plakal, dpwe |
+| [audioset](audioset) | [1] [Audio Set: An ontology and human-labeled dataset for audio events](https://research.google/pubs/pub45857/)<br />[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | ICASSP 2017 | plakal, dpwe |
 ### Reinforcement Learning
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | ofirnachum |
+| [efficient-hrl](efficient-hrl) | [1] [Data-Efficient Hierarchical Reinforcement Learning](https://arxiv.org/abs/1805.08296)<br />[2] [Near-Optimal Representation Learning for Hierarchical Reinforcement Learning](https://arxiv.org/abs/1810.01257) | [1] NIPS 2018<br /> [2] ICLR 2019 | ofirnachum |
-| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | ofirnachum |
+| [pcl_rl](pcl_rl) | [1] [Improving Policy Gradient by Exploring Under-appreciated Rewards](https://arxiv.org/abs/1611.09321)<br />[2] [Bridging the Gap Between Value and Policy Based Reinforcement Learning](https://arxiv.org/abs/1702.08892)<br />[3] [Trust-PCL: An Off-Policy Trust Region Method for Continuous Control](https://arxiv.org/abs/1707.01891) | [1] ICLR 2017<br />[2] NIPS 2017<br />[3] ICLR 2018 | ofirnachum |
 ### Others
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://doi.org/10.1101/152884) | jazcollins, sussillo |
+| [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://arxiv.org/abs/1608.06315) | | jazcollins, sussillo |
-| [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | gjtucker |
+| [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | NIPS 2017 | gjtucker |
 ---
@@ -70,55 +70,55 @@ The research models are maintained by their respective authors.
 The following research models are no longer maintained.
-**Note**: We will remove archived models from the master branch in June, 2020. 
+**Note**: We will remove archived models from the master branch in June, 2020.
 After removal, you will still be able to access archived models in the archive branch.
-| Directory | Referenece (Paper) | Maintainer(s) |
+| Directory | Paper(s) | Conference | Maintainer(s) |
-|-----------|--------------------|---------------|
+|-----------|----------|------------|---------------|
-| [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br />[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | alexeykurakin |
+| [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)<br />[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | [1] ICLR 2017<br /> [2] ICLR 2018 | alexeykurakin |
-| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | dave-andersen |
+| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | | dave-andersen |
-| [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | alexeykurakin |
+| [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | | alexeykurakin |
-| [autoencoder](autoencoder) | Various autoencoders | snurkabill |
+| [autoencoder](autoencoder) | Various autoencoders | | snurkabill |
-| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | danabo, mnorouzi |
+| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | | danabo, mnorouzi |
-| [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | s-gupta |
+| [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | CVPR 2017 | s-gupta |
-| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | nmjohn |
+| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | CVPR 2017 | nmjohn |
-| [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | rikel |
+| [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | ICLR 2018 | rikel |
-| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | yhliang2018 |
+| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | ICLR 2016 | yhliang2018 |
-| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br />[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | bousmalis, dmrd |
+| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019) <br />[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | NIPS 2016 | bousmalis, dmrd |
-| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | pvoigtlaender, yuningchai, aquariusjay |
+| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | CVPR 2019 | pvoigtlaender, yuningchai, aquariusjay |
-| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | dieterichlawson |
+| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | NIPS 2017 | dieterichlawson |
-| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | mackeya-google |
+| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | AISTATS 2017 | mackeya-google |
-| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | cshallue |
+| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | TPAMI 2016 | cshallue |
-| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | shlens, vincentvanhoucke |
+| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | CVPR 2016 | shlens, vincentvanhoucke |
-| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | mnorouzi |
+| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | | mnorouzi |
-| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | olganw, nirum |
+| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | ICML 2017 | olganw, nirum |
-| [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | lukaszkaiser, ofirnachum |
+| [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | ICLR 2017| lukaszkaiser, ofirnachum |
-| [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | lukemetz, nirum |
+| [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | ICLR 2019 | lukemetz, nirum |
-| [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | vered1986, waterson |
+| [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | NAACL 2018 | vered1986, waterson |
-| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | oriolvinyals, panyx0718 |
+| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | | oriolvinyals, panyx0718 |
-| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | thtrieu |
+| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | | thtrieu |
-| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the______](https://arxiv.org/abs/1801.07736) | liamb315, a-dai |
+| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the](https://arxiv.org/abs/1801.07736) | ICLR 2018 | liamb315, a-dai |
-| [namignizer](namignizer)| Namignizer | knathanieltucker |
+| [namignizer](namignizer)| Namignizer | | knathanieltucker |
-| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | lukaszkaiser |
+| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | | lukaszkaiser |
-| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | arvind2505 |
+| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | ICLR 2017 | arvind2505 |
-| [next_frame_prediction](next_frame_prediction) | [Visual Dynamics](https://arxiv.org/abs/1607.02586) | panyx0718 |
+| [next_frame_prediction](next_frame_prediction) | [Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks](https://arxiv.org/abs/1607.02586) | NIPS 2016 | panyx0718 |
-| [ptn](ptn) | [Perspective Transformer Nets](https://arxiv.org/abs/1612.00814) | xcyan, arkanath, hellojas, honglaklee |
+| [ptn](ptn) | [Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision](https://arxiv.org/abs/1612.00814) | NIPS 2016 | xcyan, arkanath, hellojas, honglaklee |
-| [qa_kg](qa_kg) | [Learning to Reason](https://arxiv.org/abs/1704.05526) | yuyuz |
+| [qa_kg](qa_kg) | [Learning to Reason: End-to-End Module Networks for Visual Question Answering](https://arxiv.org/abs/1704.05526) | ICCV 2017 | yuyuz |
-| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | laurent-dinh |
+| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | ICLR 2017 | laurent-dinh |
-| [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | sculd |
+| [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | NAACL HLT 2015 | sculd |
-| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | apbusia, depristo |
+| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | | apbusia, depristo |
-| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | cshallue |
+| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | | cshallue |
-| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | buckman-google |
+| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | NeurIPS 2018 | buckman-google |
-| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | theraysmith |
+| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | ECCV 2016 | theraysmith |
-| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | aneliaangelova |
+| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | AAAI 2019 | aneliaangelova |
-| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | waterson |
+| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | | waterson |
-| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | coreylynch, sermanet |
+| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | ICRA 2018 | coreylynch, sermanet |
-| [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | panyx0718, peterjliu |
+| [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | EMNLP 2015 | panyx0718, peterjliu |
-| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | daviddao|
+| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | NIPS 2015 | daviddao|
-| [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | cbfinn |
+| [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | NIPS 2016 | cbfinn |
 ---
 ## Contributions
-If you want to contribute, please review the [contribution guidelines](../../../wiki/How-to-contribute).
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
--- a/research/deep_speech/deep_speech.py
+++ b/research/deep_speech/deep_speech.py
@@ -28,8 +28,6 @@ import data.dataset as dataset
 import decoder
 import deep_speech_model
 from official.utils.flags import core as flags_core
-from official.utils.logs import hooks_helper
-from official.utils.logs import logger
 from official.utils.misc import distribution_utils
 from official.utils.misc import model_helpers
@@ -276,16 +274,6 @@ def run_deep_speech(_):
      "use_bias": flags_obj.use_bias
  }
-  dataset_name = "LibriSpeech"
-  benchmark_logger = logger.get_benchmark_logger()
-  benchmark_logger.log_run_info("deep_speech", dataset_name, run_params,
-                                test_id=flags_obj.benchmark_test_id)
-  train_hooks = hooks_helper.get_train_hooks(
-      flags_obj.hooks,
-      model_dir=flags_obj.model_dir,
-      batch_size=flags_obj.batch_size)
  per_replica_batch_size = per_device_batch_size(flags_obj.batch_size, num_gpus)
  def input_fn_train():
@@ -307,7 +295,7 @@ def run_deep_speech(_):
        train_speech_dataset.entries, cycle_index, flags_obj.sortagrad,
        flags_obj.batch_size)
-    estimator.train(input_fn=input_fn_train, hooks=train_hooks)
+    estimator.train(input_fn=input_fn_train)
    # Evaluation
    tf.logging.info("Starting to evaluate...")
@@ -433,8 +421,7 @@ def define_deep_speech_flags():
 def main(_):
-  with logger.benchmark_context(flags_obj):
+  run_deep_speech(flags_obj)
-    run_deep_speech(flags_obj)
 if __name__ == "__main__":

--- a/research/delf/delf/__init__.py
+++ b/research/delf/delf/__init__.py
@@ -25,7 +25,6 @@ from delf.protos import delf_config_pb2
 from delf.protos import feature_pb2
 from delf.python import box_io
 from delf.python import datum_io
-from delf.python import delf_v1
 from delf.python import feature_aggregation_extractor
 from delf.python import feature_aggregation_similarity
 from delf.python import feature_extractor

--- a/research/delf/delf/protos/delf_config.proto
+++ b/research/delf/delf/protos/delf_config.proto
@@ -86,6 +86,9 @@ message DelfConfig {
  // Path to DELF model.
  optional string model_path = 1;  // Required.
+  // Whether model has been exported using TF version 2+.
+  optional bool is_tf2_exported = 10 [default = false];
  // Image scales to be used.
  repeated float image_scales = 2;

--- a/research/delf/delf/python/box_io_test.py
+++ b/research/delf/delf/python/box_io_test.py
@@ -20,11 +20,14 @@ from __future__ import print_function
 import os
+from absl import flags
 import numpy as np
 import tensorflow as tf
 from delf import box_io
+FLAGS = flags.FLAGS
 class BoxesIoTest(tf.test.TestCase):
@@ -57,8 +60,7 @@ class BoxesIoTest(tf.test.TestCase):
  def testWriteAndReadToFile(self):
    boxes, scores, class_indices = self._create_data()
-    tmpdir = tf.compat.v1.test.get_temp_dir()
+    filename = os.path.join(FLAGS.test_tmpdir, 'test.boxes')
-    filename = os.path.join(tmpdir, 'test.boxes')
    box_io.WriteToFile(filename, boxes, scores, class_indices)
    data_read = box_io.ReadFromFile(filename)
@@ -67,8 +69,7 @@ class BoxesIoTest(tf.test.TestCase):
    self.assertAllEqual(class_indices, data_read[2])
  def testWriteAndReadToFileEmptyFile(self):
-    tmpdir = tf.compat.v1.test.get_temp_dir()
+    filename = os.path.join(FLAGS.test_tmpdir, 'test.box')
-    filename = os.path.join(tmpdir, 'test.box')
    box_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]))
    data_read = box_io.ReadFromFile(filename)

--- a/research/delf/delf/python/datum_io_test.py
+++ b/research/delf/delf/python/datum_io_test.py
@@ -20,11 +20,14 @@ from __future__ import print_function
 import os
+from absl import flags
 import numpy as np
 import tensorflow as tf
 from delf import datum_io
+FLAGS = flags.FLAGS
 class DatumIoTest(tf.test.TestCase):
@@ -69,8 +72,7 @@ class DatumIoTest(tf.test.TestCase):
  def testWriteAndReadToFile(self):
    data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
                     [[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
-    tmpdir = tf.compat.v1.test.get_temp_dir()
+    filename = os.path.join(FLAGS.test_tmpdir, 'test.datum')
-    filename = os.path.join(tmpdir, 'test.datum')
    datum_io.WriteToFile(data, filename)
    data_read = datum_io.ReadFromFile(filename)
    self.assertAllEqual(data_read, data)
@@ -84,8 +86,7 @@ class DatumIoTest(tf.test.TestCase):
    data_2 = np.array(
        [[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]],
        dtype='uint32')
-    tmpdir = tf.compat.v1.test.get_temp_dir()
+    filename = os.path.join(FLAGS.test_tmpdir, 'test.datum_pair')
-    filename = os.path.join(tmpdir, 'test.datum_pair')
    datum_io.WritePairToFile(data_1, data_2, filename)
    data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename)
    self.assertAllEqual(data_read_1, data_1)

--- a/research/delf/delf/python/delf_v1.py
+++ b/research/delf/delf/python/delf_v1.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""DELF model implementation based on the following paper.
-  Large-Scale Image Retrieval with Attentive Deep Local Features
-  https://arxiv.org/abs/1612.06321
-Please refer to the README.md file for detailed explanations on using the DELF
-model.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import tensorflow as tf
-from tf_slim import layers
-from tf_slim.nets import resnet_v1
-from tf_slim.ops.arg_scope import arg_scope
-_SUPPORTED_TARGET_LAYER = ['resnet_v1_50/block3', 'resnet_v1_50/block4']
-# The variable scope for the attention portion of the model.
-_ATTENTION_VARIABLE_SCOPE = 'attention_block'
-# The attention_type determines whether the attention based feature aggregation
-# is performed on the L2-normalized feature map or on the default feature map
-# where L2-normalization is not applied. Note that in both cases, attention
-# functions are built on the un-normalized feature map. This is only relevant
-# for the training stage.
-# Currently supported options are as follows:
-# * use_l2_normalized_feature:
-#   The option use_l2_normalized_feature first applies L2-normalization on the
-#   feature map and then applies attention based feature aggregation. This
-#   option is used for the DELF+FT+Att model in the paper.
-# * use_default_input_feature:
-#   The option use_default_input_feature aggregates unnormalized feature map
-#   directly.
-_SUPPORTED_ATTENTION_TYPES = [
-    'use_l2_normalized_feature', 'use_default_input_feature'
-]
-# Supported types of non-lineary for the attention score function.
-_SUPPORTED_ATTENTION_NONLINEARITY = ['softplus']
-class DelfV1(object):
-  """Creates a DELF model.
-  Args:
-    target_layer_type: The name of target CNN architecture and its layer.
-  Raises:
-    ValueError: If an unknown target_layer_type is provided.
-  """
-  def __init__(self, target_layer_type=_SUPPORTED_TARGET_LAYER[0]):
-    print('Creating model %s ' % target_layer_type)
-    self._target_layer_type = target_layer_type
-    if self._target_layer_type not in _SUPPORTED_TARGET_LAYER:
-      raise ValueError('Unknown model type.')
-  @property
-  def target_layer_type(self):
-    return self._target_layer_type
-  def _PerformAttention(self,
-                        attention_feature_map,
-                        feature_map,
-                        attention_nonlinear,
-                        kernel=1):
-    """Helper function to construct the attention part of the model.
-    Computes attention score map and aggregates the input feature map based on
-    the attention score map.
-    Args:
-      attention_feature_map: Potentially normalized feature map that will be
-        aggregated with attention score map.
-      feature_map: Unnormalized feature map that will be used to compute
-        attention score map.
-      attention_nonlinear: Type of non-linearity that will be applied to
-        attention value.
-      kernel: Convolutional kernel to use in attention layers (eg: 1, [3, 3]).
-    Returns:
-      attention_feat: Aggregated feature vector.
-      attention_prob: Attention score map after the non-linearity.
-      attention_score: Attention score map before the non-linearity.
-    Raises:
-      ValueError: If unknown attention non-linearity type is provided.
-    """
-    with tf.compat.v1.variable_scope(
-        'attention', values=[attention_feature_map, feature_map]):
-      with tf.compat.v1.variable_scope('compute', values=[feature_map]):
-        activation_fn_conv1 = tf.nn.relu
-        feature_map_conv1 = layers.conv2d(
-            feature_map,
-            512,
-            kernel,
-            rate=1,
-            activation_fn=activation_fn_conv1,
-            scope='conv1')
-        attention_score = layers.conv2d(
-            feature_map_conv1,
-            1,
-            kernel,
-            rate=1,
-            activation_fn=None,
-            normalizer_fn=None,
-            scope='conv2')
-      # Set activation of conv2 layer of attention model.
-      with tf.compat.v1.variable_scope(
-          'merge', values=[attention_feature_map, attention_score]):
-        if attention_nonlinear not in _SUPPORTED_ATTENTION_NONLINEARITY:
-          raise ValueError('Unknown attention non-linearity.')
-        if attention_nonlinear == 'softplus':
-          with tf.compat.v1.variable_scope(
-              'softplus_attention',
-              values=[attention_feature_map, attention_score]):
-            attention_prob = tf.nn.softplus(attention_score)
-            attention_feat = tf.reduce_mean(
-                tf.multiply(attention_feature_map, attention_prob), [1, 2])
-        attention_feat = tf.expand_dims(tf.expand_dims(attention_feat, 1), 2)
-    return attention_feat, attention_prob, attention_score
-  def _GetAttentionSubnetwork(
-      self,
-      feature_map,
-      end_points,
-      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
-      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
-      kernel=1,
-      reuse=False):
-    """Constructs the part of the model performing attention.
-    Args:
-      feature_map: A tensor of size [batch, height, width, channels]. Usually it
-        corresponds to the output feature map of a fully-convolutional network.
-      end_points: Set of activations of the network constructed so far.
-      attention_nonlinear: Type of non-linearity on top of the attention
-        function.
-      attention_type: Type of the attention structure.
-      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
-      reuse: Whether or not the layer and its variables should be reused.
-    Returns:
-      prelogits: A tensor of size [batch, 1, 1, channels].
-      attention_prob: Attention score after the non-linearity.
-      attention_score: Attention score before the non-linearity.
-      end_points: Updated set of activations, for external use.
-    Raises:
-      ValueError: If unknown attention_type is provided.
-    """
-    with tf.compat.v1.variable_scope(
-        _ATTENTION_VARIABLE_SCOPE,
-        values=[feature_map, end_points],
-        reuse=reuse):
-      if attention_type not in _SUPPORTED_ATTENTION_TYPES:
-        raise ValueError('Unknown attention_type.')
-      if attention_type == 'use_l2_normalized_feature':
-        attention_feature_map = tf.nn.l2_normalize(
-            feature_map, 3, name='l2_normalize')
-      elif attention_type == 'use_default_input_feature':
-        attention_feature_map = feature_map
-      end_points['attention_feature_map'] = attention_feature_map
-      attention_outputs = self._PerformAttention(attention_feature_map,
-                                                 feature_map,
-                                                 attention_nonlinear, kernel)
-      prelogits, attention_prob, attention_score = attention_outputs
-      end_points['prelogits'] = prelogits
-      end_points['attention_prob'] = attention_prob
-      end_points['attention_score'] = attention_score
-    return prelogits, attention_prob, attention_score, end_points
-  def GetResnet50Subnetwork(self,
-                            images,
-                            is_training=False,
-                            global_pool=False,
-                            reuse=None):
-    """Constructs resnet_v1_50 part of the DELF model.
-    Args:
-      images: A tensor of size [batch, height, width, channels].
-      is_training: Whether or not the model is in training mode.
-      global_pool: If True, perform global average pooling after feature
-        extraction. This may be useful for DELF's descriptor fine-tuning stage.
-      reuse: Whether or not the layer and its variables should be reused.
-    Returns:
-      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
-        If global_pool is True, height_out = width_out = 1.
-      end_points: A set of activations for external use.
-    """
-    block = resnet_v1.resnet_v1_block
-    blocks = [
-        block('block1', base_depth=64, num_units=3, stride=2),
-        block('block2', base_depth=128, num_units=4, stride=2),
-        block('block3', base_depth=256, num_units=6, stride=2),
-    ]
-    if self._target_layer_type == 'resnet_v1_50/block4':
-      blocks.append(block('block4', base_depth=512, num_units=3, stride=1))
-    net, end_points = resnet_v1.resnet_v1(
-        images,
-        blocks,
-        is_training=is_training,
-        global_pool=global_pool,
-        reuse=reuse,
-        scope='resnet_v1_50')
-    return net, end_points
-  def GetAttentionPrelogit(
-      self,
-      images,
-      weight_decay=0.0001,
-      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
-      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
-      kernel=1,
-      training_resnet=False,
-      training_attention=False,
-      reuse=False,
-      use_batch_norm=True):
-    """Constructs attention model on resnet_v1_50.
-    Args:
-      images: A tensor of size [batch, height, width, channels].
-      weight_decay: The parameters for weight_decay regularizer.
-      attention_nonlinear: Type of non-linearity on top of the attention
-        function.
-      attention_type: Type of the attention structure.
-      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
-      training_resnet: Whether or not the Resnet blocks from the model are in
-        training mode.
-      training_attention: Whether or not the attention part of the model is in
-        training mode.
-      reuse: Whether or not the layer and its variables should be reused.
-      use_batch_norm: Whether or not to use batch normalization.
-    Returns:
-      prelogits: A tensor of size [batch, 1, 1, channels].
-      attention_prob: Attention score after the non-linearity.
-      attention_score: Attention score before the non-linearity.
-      feature_map: Features extracted from the model, which are not
-        l2-normalized.
-      end_points: Set of activations for external use.
-    """
-    # Construct Resnet50 features.
-    with arg_scope(resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
-      _, end_points = self.GetResnet50Subnetwork(
-          images, is_training=training_resnet, reuse=reuse)
-    feature_map = end_points[self._target_layer_type]
-    # Construct attention subnetwork on top of features.
-    with arg_scope(
-        resnet_v1.resnet_arg_scope(
-            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
-      with arg_scope([layers.batch_norm], is_training=training_attention):
-        (prelogits, attention_prob, attention_score,
-         end_points) = self._GetAttentionSubnetwork(
-             feature_map,
-             end_points,
-             attention_nonlinear=attention_nonlinear,
-             attention_type=attention_type,
-             kernel=kernel,
-             reuse=reuse)
-    return prelogits, attention_prob, attention_score, feature_map, end_points
-  def _GetAttentionModel(
-      self,
-      images,
-      num_classes,
-      weight_decay=0.0001,
-      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
-      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
-      kernel=1,
-      training_resnet=False,
-      training_attention=False,
-      reuse=False):
-    """Constructs attention model on resnet_v1_50.
-    Args:
-      images: A tensor of size [batch, height, width, channels]
-      num_classes: The number of output classes.
-      weight_decay: The parameters for weight_decay regularizer.
-      attention_nonlinear: Type of non-linearity on top of the attention
-        function.
-      attention_type: Type of the attention structure.
-      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
-      training_resnet: Whether or not the Resnet blocks from the model are in
-        training mode.
-      training_attention: Whether or not the attention part of the model is in
-        training mode.
-      reuse: Whether or not the layer and its variables should be reused.
-    Returns:
-      logits: A tensor of size [batch, num_classes].
-      attention_prob: Attention score after the non-linearity.
-      attention_score: Attention score before the non-linearity.
-      feature_map: Features extracted from the model, which are not
-        l2-normalized.
-    """
-    attention_feat, attention_prob, attention_score, feature_map, _ = (
-        self.GetAttentionPrelogit(
-            images,
-            weight_decay,
-            attention_nonlinear=attention_nonlinear,
-            attention_type=attention_type,
-            kernel=kernel,
-            training_resnet=training_resnet,
-            training_attention=training_attention,
-            reuse=reuse))
-    with arg_scope(
-        resnet_v1.resnet_arg_scope(
-            weight_decay=weight_decay, batch_norm_scale=True)):
-      with arg_scope([layers.batch_norm], is_training=training_attention):
-        with tf.compat.v1.variable_scope(
-            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
-          logits = layers.conv2d(
-              attention_feat,
-              num_classes, [1, 1],
-              activation_fn=None,
-              normalizer_fn=None,
-              scope='logits')
-          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
-    return logits, attention_prob, attention_score, feature_map
-  def AttentionModel(self,
-                     images,
-                     num_classes,
-                     weight_decay=0.0001,
-                     attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
-                     attention_type=_SUPPORTED_ATTENTION_TYPES[0],
-                     kernel=1,
-                     training_resnet=False,
-                     training_attention=False,
-                     reuse=False):
-    """Constructs attention based classification model for training.
-    Args:
-      images: A tensor of size [batch, height, width, channels]
-      num_classes: The number of output classes.
-      weight_decay: The parameters for weight_decay regularizer.
-      attention_nonlinear: Type of non-linearity on top of the attention
-        function.
-      attention_type: Type of the attention structure.
-      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
-      training_resnet: Whether or not the Resnet blocks from the model are in
-        training mode.
-      training_attention: Whether or not the model is in training mode. Note
-        that this function only supports training the attention part of the
-        model, ie, the feature extraction layers are not trained.
-      reuse: Whether or not the layer and its variables should be reused.
-    Returns:
-      logit: A tensor of size [batch, num_classes]
-      attention: Attention score after the non-linearity.
-      feature_map: Features extracted from the model, which are not
-        l2-normalized.
-    Raises:
-      ValueError: If unknown target_layer_type is provided.
-    """
-    if 'resnet_v1_50' in self._target_layer_type:
-      net_outputs = self._GetAttentionModel(
-          images,
-          num_classes,
-          weight_decay,
-          attention_nonlinear=attention_nonlinear,
-          attention_type=attention_type,
-          kernel=kernel,
-          training_resnet=training_resnet,
-          training_attention=training_attention,
-          reuse=reuse)
-      logits, attention, _, feature_map = net_outputs
-    else:
-      raise ValueError('Unknown target_layer_type.')
-    return logits, attention, feature_map
--- a/research/delf/delf/python/detect_to_retrieve/aggregation_extraction.py
+++ b/research/delf/delf/python/detect_to_retrieve/aggregation_extraction.py
@@ -124,71 +124,70 @@ def ExtractAggregatedRepresentationsToFiles(image_names, features_dir,
  if not tf.io.gfile.exists(output_aggregation_dir):
    tf.io.gfile.makedirs(output_aggregation_dir)
-  with tf.compat.v1.Session() as sess:
+  extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+      config)
-        sess, config)
+  start = time.time()
-    start = time.clock()
+  for i in range(num_images):
-    for i in range(num_images):
+    if i == 0:
-      if i == 0:
+      print('Starting to extract aggregation from images...')
-        print('Starting to extract aggregation from images...')
+    elif i % _STATUS_CHECK_ITERATIONS == 0:
-      elif i % _STATUS_CHECK_ITERATIONS == 0:
+      elapsed = (time.time() - start)
-        elapsed = (time.clock() - start)
+      print('Processing image %d out of %d, last %d '
-        print('Processing image %d out of %d, last %d '
+            'images took %f seconds' %
-              'images took %f seconds' %
+            (i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
-              (i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
+      start = time.time()
-        start = time.clock()
+    image_name = image_names[i]
-      image_name = image_names[i]
+    # Compose output file name, skip extraction for this image if it already
-      # Compose output file name, skip extraction for this image if it already
+    # exists.
-      # exists.
+    output_aggregation_filename = os.path.join(output_aggregation_dir,
-      output_aggregation_filename = os.path.join(output_aggregation_dir,
+                                               image_name + output_extension)
-                                                 image_name + output_extension)
+    if tf.io.gfile.exists(output_aggregation_filename):
-      if tf.io.gfile.exists(output_aggregation_filename):
+      print('Skipping %s' % image_name)
-        print('Skipping %s' % image_name)
+      continue
-        continue
+    # Load DELF features.
-      # Load DELF features.
+    if config.use_regional_aggregation:
-      if config.use_regional_aggregation:
+      if not mapping_path:
-        if not mapping_path:
+        raise ValueError(
-          raise ValueError(
+            'Requested regional aggregation, but mapping_path was not '
-              'Requested regional aggregation, but mapping_path was not '
+            'provided')
-              'provided')
+      descriptors_list = []
-        descriptors_list = []
+      num_features_per_box = []
-        num_features_per_box = []
+      for box_feature_file in images_to_box_feature_files[image_name]:
-        for box_feature_file in images_to_box_feature_files[image_name]:
+        delf_filename = os.path.join(features_dir,
-          delf_filename = os.path.join(features_dir,
+                                     box_feature_file + _DELF_EXTENSION)
-                                       box_feature_file + _DELF_EXTENSION)
+        _, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
-          _, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
+        # If `box_descriptors` is empty, reshape it such that it can be
-          # If `box_descriptors` is empty, reshape it such that it can be
+        # concatenated with other descriptors.
-          # concatenated with other descriptors.
+        if not box_descriptors.shape[0]:
-          if not box_descriptors.shape[0]:
+          box_descriptors = np.reshape(box_descriptors,
-            box_descriptors = np.reshape(box_descriptors,
+                                       [0, config.feature_dimensionality])
-                                         [0, config.feature_dimensionality])
+        descriptors_list.append(box_descriptors)
-          descriptors_list.append(box_descriptors)
+        num_features_per_box.append(box_descriptors.shape[0])
-          num_features_per_box.append(box_descriptors.shape[0])
+      descriptors = np.concatenate(descriptors_list)
-        descriptors = np.concatenate(descriptors_list)
+    else:
-      else:
+      input_delf_filename = os.path.join(features_dir,
-        input_delf_filename = os.path.join(features_dir,
+                                         image_name + _DELF_EXTENSION)
-                                           image_name + _DELF_EXTENSION)
+      _, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
-        _, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
+      # If `descriptors` is empty, reshape it to avoid extraction failure.
-        # If `descriptors` is empty, reshape it to avoid extraction failure.
+      if not descriptors.shape[0]:
-        if not descriptors.shape[0]:
+        descriptors = np.reshape(descriptors,
-          descriptors = np.reshape(descriptors,
+                                 [0, config.feature_dimensionality])
-                                   [0, config.feature_dimensionality])
+      num_features_per_box = None
-        num_features_per_box = None
+    # Extract and save aggregation. If using VLAD, only
-      # Extract and save aggregation. If using VLAD, only
+    # `aggregated_descriptors` needs to be saved.
-      # `aggregated_descriptors` needs to be saved.
+    (aggregated_descriptors,
-      (aggregated_descriptors,
+     feature_visual_words) = extractor.Extract(descriptors,
-       feature_visual_words) = extractor.Extract(descriptors,
+                                               num_features_per_box)
-                                                 num_features_per_box)
+    if config.aggregation_type == _VLAD:
-      if config.aggregation_type == _VLAD:
+      datum_io.WriteToFile(aggregated_descriptors,
-        datum_io.WriteToFile(aggregated_descriptors,
+                           output_aggregation_filename)
-                             output_aggregation_filename)
+    else:
-      else:
+      datum_io.WritePairToFile(aggregated_descriptors,
-        datum_io.WritePairToFile(aggregated_descriptors,
+                               feature_visual_words.astype('uint32'),
-                                 feature_visual_words.astype('uint32'),
+                               output_aggregation_filename)
-                                 output_aggregation_filename)
--- a/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
+++ b/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
@@ -131,7 +131,7 @@ def main(argv):
      delf_dataset = tf.data.Dataset.from_tensor_slices((features_placeholder))
      delf_dataset = delf_dataset.shuffle(1000).batch(
          features_for_clustering.shape[0])
-      iterator = delf_dataset.make_initializable_iterator()
+      iterator = tf.compat.v1.data.make_initializable_iterator(delf_dataset)
      def _initializer_fn(sess):
        """Initialize dataset iterator, feed in the data."""

--- a/research/delf/delf/python/examples/extractor.py
+++ b/research/delf/delf/python/examples/extractor.py
@@ -102,7 +102,15 @@ def MakeExtractor(config):
  Returns:
    Function that receives an image and returns features.
+  Raises:
+    ValueError: if config is invalid.
  """
+  # Assert the configuration
+  if config.use_global_features and hasattr(
+      config, 'is_tf2_exported') and config.is_tf2_exported:
+    raise ValueError('use_global_features is incompatible with is_tf2_exported')
  # Load model.
  model = tf.saved_model.load(config.model_path)
@@ -178,7 +186,8 @@ def MakeExtractor(config):
      else:
        global_pca_parameters['variances'] = None
-  model = model.prune(feeds=feeds, fetches=fetches)
+  if not hasattr(config, 'is_tf2_exported') or not config.is_tf2_exported:
+    model = model.prune(feeds=feeds, fetches=fetches)
  def ExtractorFn(image, resize_factor=1.0):
    """Receives an image and returns DELF global and/or local features.
@@ -197,7 +206,6 @@ def MakeExtractor(config):
        features (key 'local_features' mapping to a dict with keys 'locations',
        'descriptors', 'scales', 'attention').
    """
    resized_image, scale_factors = ResizeImage(
        image, config, resize_factor=resize_factor)
@@ -224,8 +232,20 @@ def MakeExtractor(config):
    output = None
    if config.use_local_features:
-      output = model(image_tensor, image_scales_tensor, score_threshold_tensor,
+      if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported:
-                     max_feature_num_tensor)
+        predict = model.signatures['serving_default']
+        output_dict = predict(
+            input_image=image_tensor,
+            input_scales=image_scales_tensor,
+            input_max_feature_num=max_feature_num_tensor,
+            input_abs_thres=score_threshold_tensor)
+        output = [
+            output_dict['boxes'], output_dict['features'],
+            output_dict['scales'], output_dict['scores']
+        ]
+      else:
+        output = model(image_tensor, image_scales_tensor,
+                       score_threshold_tensor, max_feature_num_tensor)
    else:
      output = model(image_tensor, image_scales_tensor)

--- a/research/delf/delf/python/feature_aggregation_extractor.py
+++ b/research/delf/delf/python/feature_aggregation_extractor.py
@@ -40,7 +40,6 @@ class ExtractAggregatedRepresentation(object):
  """Class for extraction of aggregated local feature representation.
  Args:
-    sess: TensorFlow session to use.
    aggregation_config: AggregationConfig object defining type of aggregation to
      use.
@@ -48,65 +47,28 @@ class ExtractAggregatedRepresentation(object):
    ValueError: If aggregation type is invalid.
  """
-  def __init__(self, sess, aggregation_config):
+  def __init__(self, aggregation_config):
-    self._sess = sess
    self._codebook_size = aggregation_config.codebook_size
    self._feature_dimensionality = aggregation_config.feature_dimensionality
    self._aggregation_type = aggregation_config.aggregation_type
    self._feature_batch_size = aggregation_config.feature_batch_size
+    self._codebook_path = aggregation_config.codebook_path
+    self._use_regional_aggregation = aggregation_config.use_regional_aggregation
+    self._use_l2_normalization = aggregation_config.use_l2_normalization
+    self._num_assignments = aggregation_config.num_assignments
-    # Inputs to extraction function.
+    if self._aggregation_type  not in [_VLAD, _ASMK, _ASMK_STAR]:
-    self._features = tf.compat.v1.placeholder(tf.float32, [None, None])
-    self._num_features_per_region = tf.compat.v1.placeholder(tf.int32, [None])
-    # Load codebook into graph.
-    codebook = tf.compat.v1.get_variable(
-        "codebook",
-        shape=[
-            aggregation_config.codebook_size,
-            aggregation_config.feature_dimensionality
-        ])
-    tf.compat.v1.train.init_from_checkpoint(
-        aggregation_config.codebook_path, {_CLUSTER_CENTERS_VAR_NAME: codebook})
-    # Construct extraction graph based on desired options.
-    if self._aggregation_type == _VLAD:
-      # Feature visual words are unused in the case of VLAD, so just return
-      # dummy constant.
-      self._feature_visual_words = tf.constant(-1, dtype=tf.int32)
-      if aggregation_config.use_regional_aggregation:
-        self._aggregated_descriptors = self._ComputeRvlad(
-            self._features,
-            self._num_features_per_region,
-            codebook,
-            use_l2_normalization=aggregation_config.use_l2_normalization,
-            num_assignments=aggregation_config.num_assignments)
-      else:
-        self._aggregated_descriptors = self._ComputeVlad(
-            self._features,
-            codebook,
-            use_l2_normalization=aggregation_config.use_l2_normalization,
-            num_assignments=aggregation_config.num_assignments)
-    elif (self._aggregation_type == _ASMK or
-          self._aggregation_type == _ASMK_STAR):
-      if aggregation_config.use_regional_aggregation:
-        (self._aggregated_descriptors,
-         self._feature_visual_words) = self._ComputeRasmk(
-             self._features,
-             self._num_features_per_region,
-             codebook,
-             num_assignments=aggregation_config.num_assignments)
-      else:
-        (self._aggregated_descriptors,
-         self._feature_visual_words) = self._ComputeAsmk(
-             self._features,
-             codebook,
-             num_assignments=aggregation_config.num_assignments)
-    else:
      raise ValueError("Invalid aggregation type: %d" % self._aggregation_type)
-    # Initialize variables in the TF graph.
+    # Load codebook
-    sess.run(tf.compat.v1.global_variables_initializer())
+    codebook = tf.Variable(
+        tf.zeros([self._codebook_size, self._feature_dimensionality],
+                 dtype=tf.float32),
+        name=_CLUSTER_CENTERS_VAR_NAME)
+    ckpt = tf.train.Checkpoint(codebook=codebook)
+    ckpt.restore(self._codebook_path)
+    self._codebook = codebook
  def Extract(self, features, num_features_per_region=None):
    """Extracts aggregated representation.
@@ -127,10 +89,13 @@ class ExtractAggregatedRepresentation(object):
    Raises:
      ValueError: If inputs are misconfigured.
    """
+    features = tf.cast(features, dtype=tf.float32)
    if num_features_per_region is None:
      # Use dummy value since it is unused.
      num_features_per_region = []
    else:
+      num_features_per_region = tf.cast(num_features_per_region, dtype=tf.int32)
      if len(num_features_per_region
            ) and sum(num_features_per_region) != features.shape[0]:
        raise ValueError(
@@ -138,12 +103,41 @@ class ExtractAggregatedRepresentation(object):
            "features.shape[0] are different: %d vs %d" %
            (sum(num_features_per_region), features.shape[0]))
-    aggregated_descriptors, feature_visual_words = self._sess.run(
+    # Extract features based on desired options.
-        [self._aggregated_descriptors, self._feature_visual_words],
+    if self._aggregation_type == _VLAD:
-        feed_dict={
+      # Feature visual words are unused in the case of VLAD, so just return
-            self._features: features,
+      # dummy constant.
-            self._num_features_per_region: num_features_per_region
+      feature_visual_words = tf.constant(-1, dtype=tf.int32)
-        })
+      if self._use_regional_aggregation:
+        aggregated_descriptors = self._ComputeRvlad(
+            features,
+            num_features_per_region,
+            self._codebook,
+            use_l2_normalization=self._use_l2_normalization,
+            num_assignments=self._num_assignments)
+      else:
+        aggregated_descriptors = self._ComputeVlad(
+            features,
+            self._codebook,
+            use_l2_normalization=self._use_l2_normalization,
+            num_assignments=self._num_assignments)
+    elif (self._aggregation_type == _ASMK or
+          self._aggregation_type == _ASMK_STAR):
+      if self._use_regional_aggregation:
+        (aggregated_descriptors,
+         feature_visual_words) = self._ComputeRasmk(
+             features,
+             num_features_per_region,
+             self._codebook,
+             num_assignments=self._num_assignments)
+      else:
+        (aggregated_descriptors,
+         feature_visual_words) = self._ComputeAsmk(
+             features,
+             self._codebook,
+             num_assignments=self._num_assignments)
+    feature_visual_words_output = feature_visual_words.numpy()
    # If using ASMK*/RASMK*, binarize the aggregated descriptors.
    if self._aggregation_type == _ASMK_STAR:
@@ -151,9 +145,11 @@ class ExtractAggregatedRepresentation(object):
          aggregated_descriptors, [-1, self._feature_dimensionality])
      packed_descriptors = np.packbits(
          reshaped_aggregated_descriptors > 0, axis=1)
-      aggregated_descriptors = np.reshape(packed_descriptors, [-1])
+      aggregated_descriptors_output = np.reshape(packed_descriptors, [-1])
+    else:
+      aggregated_descriptors_output = aggregated_descriptors.numpy()
-    return aggregated_descriptors, feature_visual_words
+    return aggregated_descriptors_output, feature_visual_words_output
  def _ComputeVlad(self,
                   features,
@@ -268,11 +264,12 @@ class ExtractAggregatedRepresentation(object):
          output_vlad: VLAD descriptor updated to take into account contribution
            from ind-th feature.
        """
+        diff = tf.tile(
+            tf.expand_dims(features[ind],
+                           axis=0), [num_assignments, 1]) - tf.gather(
+                               codebook, selected_visual_words[ind])
        return ind + 1, tf.tensor_scatter_nd_add(
-            vlad, tf.expand_dims(selected_visual_words[ind], axis=1),
+            vlad, tf.expand_dims(selected_visual_words[ind], axis=1), diff)
-            tf.tile(
-                tf.expand_dims(features[ind], axis=0), [num_assignments, 1]) -
-            tf.gather(codebook, selected_visual_words[ind]))
      ind_vlad = tf.constant(0, dtype=tf.int32)
      keep_going = lambda j, vlad: tf.less(j, num_features)

--- a/research/delf/delf/python/feature_aggregation_extractor_test.py
+++ b/research/delf/delf/python/feature_aggregation_extractor_test.py
@@ -20,12 +20,15 @@ from __future__ import print_function
 import os
+from absl import flags
 import numpy as np
 import tensorflow as tf
 from delf import aggregation_config_pb2
 from delf import feature_aggregation_extractor
+FLAGS = flags.FLAGS
 class FeatureAggregationTest(tf.test.TestCase):
@@ -35,17 +38,15 @@ class FeatureAggregationTest(tf.test.TestCase):
    Args:
      checkpoint_path: Directory where codebook is saved to.
    """
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    codebook = tf.Variable(
-      codebook = tf.Variable(
+        [[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
-          [[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
+        name='clusters',
-          name='clusters')
+        dtype=tf.float32)
-      saver = tf.compat.v1.train.Saver([codebook])
+    ckpt = tf.train.Checkpoint(codebook=codebook)
-      sess.run(tf.compat.v1.global_variables_initializer())
+    ckpt.write(checkpoint_path)
-      saver.save(sess, checkpoint_path)
  def setUp(self):
-    self._codebook_path = os.path.join(tf.compat.v1.test.get_temp_dir(),
+    self._codebook_path = os.path.join(FLAGS.test_tmpdir, 'test_codebook')
-                                       'test_codebook')
    self._CreateCodebook(self._codebook_path)
  def testComputeNormalizedVladWorks(self):
@@ -61,10 +62,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 1
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    vlad, extra_output = extractor.Extract(features)
-      vlad, extra_output = extractor.Extract(features)
    # Define expected results.
    exp_vlad = [
@@ -90,10 +90,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.feature_batch_size = 2
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    vlad, extra_output = extractor.Extract(features)
-      vlad, extra_output = extractor.Extract(features)
    # Define expected results.
    exp_vlad = [
@@ -118,10 +117,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 1
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    vlad, extra_output = extractor.Extract(features)
-      vlad, extra_output = extractor.Extract(features)
    # Define expected results.
    exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0]
@@ -144,10 +142,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 3
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    vlad, extra_output = extractor.Extract(features)
-      vlad, extra_output = extractor.Extract(features)
    # Define expected results.
    exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0]
@@ -168,10 +165,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.codebook_path = self._codebook_path
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    vlad, extra_output = extractor.Extract(features)
-      vlad, extra_output = extractor.Extract(features)
    # Define expected results.
    exp_vlad = np.zeros([10], dtype=float)
@@ -197,10 +193,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rvlad, extra_output = extractor.Extract(features, num_features_per_region)
-      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rvlad = [
@@ -228,10 +223,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rvlad, extra_output = extractor.Extract(features, num_features_per_region)
-      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rvlad = [
@@ -256,10 +250,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rvlad, extra_output = extractor.Extract(features, num_features_per_region)
-      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rvlad = np.zeros([10], dtype=float)
@@ -286,10 +279,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rvlad, extra_output = extractor.Extract(features, num_features_per_region)
-      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rvlad = [
@@ -318,10 +310,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rvlad, extra_output = extractor.Extract(features, num_features_per_region)
-      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rvlad = [
@@ -349,14 +340,13 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    with self.assertRaisesRegex(
-      with self.assertRaisesRegex(
+        ValueError,
-          ValueError,
+        r'Incorrect arguments: sum\(num_features_per_region\) and '
-          r'Incorrect arguments: sum\(num_features_per_region\) and '
+        r'features.shape\[0\] are different'):
-          r'features.shape\[0\] are different'):
+      extractor.Extract(features, num_features_per_region)
-        extractor.Extract(features, num_features_per_region)
  def testComputeAsmkWorks(self):
    # Construct inputs.
@@ -370,10 +360,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 1
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    asmk, visual_words = extractor.Extract(features)
-      asmk, visual_words = extractor.Extract(features)
    # Define expected results.
    exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107]
@@ -395,10 +384,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 1
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    asmk_star, visual_words = extractor.Extract(features)
-      asmk_star, visual_words = extractor.Extract(features)
    # Define expected results.
    exp_asmk_star = [64, 192]
@@ -420,10 +408,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.num_assignments = 3
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    asmk, visual_words = extractor.Extract(features)
-      asmk, visual_words = extractor.Extract(features)
    # Define expected results.
    exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107]
@@ -448,10 +435,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rasmk, visual_words = extractor.Extract(features, num_features_per_region)
-      rasmk, visual_words = extractor.Extract(features, num_features_per_region)
    # Define expected results.
    exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465]
@@ -476,11 +462,10 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
-      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        config)
-          sess, config)
+    rasmk_star, visual_words = extractor.Extract(features,
-      rasmk_star, visual_words = extractor.Extract(features,
+                                                 num_features_per_region)
-                                                   num_features_per_region)
    # Define expected results.
    exp_rasmk_star = [64, 192]
@@ -500,10 +485,9 @@ class FeatureAggregationTest(tf.test.TestCase):
    config.use_regional_aggregation = True
    # Run tested function.
-    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+    with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
-      with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
+      feature_aggregation_extractor.ExtractAggregatedRepresentation(
-        feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          config)
-            sess, config)
 if __name__ == '__main__':