resovle merge conflicts

31ca3b97 · Kaushik Shivakumar · 3e9d886d · 7fcd7cba · 31ca3b97 · 31ca3b97
Commit 31ca3b97 authored Jul 23, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/delf/delf/python/feature_aggregation_extractor.py
+++ b/research/delf/delf/python/feature_aggregation_extractor.py
@@ -269,8 +269,7 @@ class ExtractAggregatedRepresentation(object):
                           axis=0), [num_assignments, 1]) - tf.gather(
                               codebook, selected_visual_words[ind])
        return ind + 1, tf.tensor_scatter_nd_add(
-            vlad, tf.expand_dims(selected_visual_words[ind], axis=1),
-            tf.cast(diff, dtype=tf.float32))
+            vlad, tf.expand_dims(selected_visual_words[ind], axis=1), diff)

      ind_vlad = tf.constant(0, dtype=tf.int32)
      keep_going = lambda j, vlad: tf.less(j, num_features)
@@ -396,9 +395,7 @@ class ExtractAggregatedRepresentation(object):

    visual_words = tf.reshape(
        tf.where(
-            tf.greater(
-                per_centroid_norms,
-                tf.cast(tf.sqrt(_NORM_SQUARED_TOLERANCE), dtype=tf.float32))),
+            tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
        [-1])

    per_centroid_normalized_vector = tf.math.l2_normalize(

--- a/research/delf/delf/python/training/README.md
+++ b/research/delf/delf/python/training/README.md
 # DELF Training Instructions

-This README documents the end-to-end process for training a landmark detection and retrieval
-model using the DELF library on the [Google Landmarks Dataset v2](https://github.com/cvdfoundation/google-landmark) (GLDv2). This can be achieved following these steps:
-1. Install the DELF Python library.
-2. Download the raw images of the GLDv2 dataset.
-3. Prepare the training data.
-4. Run the training.
+This README documents the end-to-end process for training a landmark detection
+and retrieval model using the DELF library on the
+[Google Landmarks Dataset v2](https://github.com/cvdfoundation/google-landmark)
+(GLDv2). This can be achieved following these steps:
+
+1.  Install the DELF Python library.
+2.  Download the raw images of the GLDv2 dataset.
+3.  Prepare the training data.
+4.  Run the training.

 The next sections will cove each of these steps in greater detail.

 ## Prerequisites

-Clone the [TensorFlow Model Garden](https://github.com/tensorflow/models) repository and move
-into the `models/research/delf/delf/python/training`folder.
+Clone the [TensorFlow Model Garden](https://github.com/tensorflow/models)
+repository and move into the `models/research/delf/delf/python/training`folder.
+
 ```
 git clone https://github.com/tensorflow/models.git
 cd models/research/delf/delf/python/training
@@ -20,109 +24,245 @@ cd models/research/delf/delf/python/training

 ## Install the DELF Library

-The DELF Python library can be installed by running the [`install_delf.sh`](./install_delf.sh)
-script using the command:
-```
-bash install_delf.sh
-```
-The script installs both the DELF library and its dependencies in the following sequence:
-* Install TensorFlow 2.2 and TensorFlow 2.2 for GPU.
-* Install the [TF-Slim](https://github.com/google-research/tf-slim) library from source.
-* Download [protoc](https://github.com/protocolbuffers/protobuf) and compile the DELF Protocol
-Buffers.
-* Install the matplotlib, numpy, scikit-image, scipy and python3-tk Python libraries.
-* Install the [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) from the cloned TensorFlow Model Garden repository.
-* Install the DELF package.
-
-*Please note that the current installation only works on 64 bits Linux architectures due to the 
-`protoc` binary downloaded by the installation script. If you wish to install the DELF library on
-other architectures please update the [`install_delf.sh`](./install_delf.sh) script by referencing
-the desired `protoc` [binary release](https://github.com/protocolbuffers/protobuf/releases).*
+To be able to use this code, please follow
+[these instructions](../../../INSTALL_INSTRUCTIONS.md) to properly install the
+DELF library.

 ## Download the GLDv2 Training Data

-The [GLDv2](https://github.com/cvdfoundation/google-landmark) images are grouped in 3 datasets: TRAIN, INDEX, TEST. Images in each dataset are grouped into `*.tar` files and individually
-referenced in `*.csv`files containing training metadata and licensing information. The number of
-`*.tar` files per dataset is as follows:
-* TRAIN: 500 files.
-* INDEX: 100 files.
-* TEST: 20 files.
+The [GLDv2](https://github.com/cvdfoundation/google-landmark) images are grouped
+in 3 datasets: TRAIN, INDEX, TEST. Images in each dataset are grouped into
+`*.tar` files and individually referenced in `*.csv`files containing training
+metadata and licensing information. The number of `*.tar` files per dataset is
+as follows:
+
+*   TRAIN: 500 files.
+*   INDEX: 100 files.
+*   TEST: 20 files.
+
+To download the GLDv2 images, run the
+[`download_dataset.sh`](./download_dataset.sh) script like in the following
+example:

-To download the GLDv2 images, run the [`download_dataset.sh`](./download_dataset.sh) script like in
-the following example:
 ```
 bash download_dataset.sh 500 100 20
 ```
+
 The script takes the following parameters, in order:
-* The number of image files from the TRAIN dataset to download (maximum 500).
-* The number of image files from the INDEX dataset to download (maximum 100).
-* The number of image files from the TEST dataset to download (maximum 20).
+
+*   The number of image files from the TRAIN dataset to download (maximum 500).
+*   The number of image files from the INDEX dataset to download (maximum 100).
+*   The number of image files from the TEST dataset to download (maximum 20).

 The script downloads the GLDv2 images under the following directory structure:
-* gldv2_dataset/
-  * train/ - Contains raw images from the TRAIN dataset.
-  * index/ - Contains raw images from the INDEX dataset.
-  * test/ - Contains raw images from the TEST dataset.
-
-Each of the three folders `gldv2_dataset/train/`, `gldv2_dataset/index/` and `gldv2_dataset/test/`
-contains the following:
-* The downloaded `*.tar` files.
-* The corresponding MD5 checksum files, `*.txt`.
-* The unpacked content of the downloaded files. (*Images are organized in folders and subfolders
-based on the first, second and third character in their file name.*)
-* The CSV files containing training and licensing metadata of the downloaded images.
-
-*Please note that due to the large size of the GLDv2 dataset, the download can take up to 12 
-hours and up to 1 TB of disk space. In order to save bandwidth and disk space, you may want to start by downloading only the TRAIN dataset, the only one required for the training, thus saving
-approximately ~95 GB, the equivalent of the INDEX and TEST datasets. To further save disk space,
-the `*.tar` files can be deleted after downloading and upacking them.*
+
+*   gldv2_dataset/
+    *   train/ - Contains raw images from the TRAIN dataset.
+    *   index/ - Contains raw images from the INDEX dataset.
+    *   test/ - Contains raw images from the TEST dataset.
+
+Each of the three folders `gldv2_dataset/train/`, `gldv2_dataset/index/` and
+`gldv2_dataset/test/` contains the following:
+
+*   The downloaded `*.tar` files.
+*   The corresponding MD5 checksum files, `*.txt`.
+*   The unpacked content of the downloaded files. (*Images are organized in
+    folders and subfolders based on the first, second and third character in
+    their file name.*)
+*   The CSV files containing training and licensing metadata of the downloaded
+    images.
+
+*Please note that due to the large size of the GLDv2 dataset, the download can
+take up to 12 hours and up to 1 TB of disk space. In order to save bandwidth and
+disk space, you may want to start by downloading only the TRAIN dataset, the
+only one required for the training, thus saving approximately ~95 GB, the
+equivalent of the INDEX and TEST datasets. To further save disk space, the
+`*.tar` files can be deleted after downloading and upacking them.*

 ## Prepare the Data for Training

-Preparing the data for training consists of creating [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord)
-files from the raw GLDv2 images grouped into TRAIN and VALIDATION splits. The training set
-produced contains only the *clean* subset of the GLDv2 dataset. The [CVPR'20 paper](https://arxiv.org/abs/2004.01804)
-introducing the GLDv2 dataset contains a detailed description of the *clean* subset.
+Preparing the data for training consists of creating
+[TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) files from
+the raw GLDv2 images grouped into TRAIN and VALIDATION splits. The training set
+produced contains only the *clean* subset of the GLDv2 dataset. The
+[CVPR'20 paper](https://arxiv.org/abs/2004.01804) introducing the GLDv2 dataset
+contains a detailed description of the *clean* subset.
+
+Generating the TFRecord files containing the TRAIN and VALIDATION splits of the
+*clean* GLDv2 subset can be achieved by running the
+[`build_image_dataset.py`](./build_image_dataset.py) script. Assuming that the
+GLDv2 images have been downloaded to the `gldv2_dataset` folder, the script can
+be run as follows:

-Generating the TFRecord files containing the TRAIN and VALIDATION splits of the *clean* GLDv2 
-subset can be achieved by running the [`build_image_dataset.py`](./build_image_dataset.py) 
-script. Assuming that the GLDv2 images have been downloaded to the `gldv2_dataset` folder, the 
-script can be run as follows:
 ```
 python3 build_image_dataset.py \
-    --train_csv_path=gldv2_dataset/train/train.csv \
-    --train_clean_csv_path=gldv2_dataset/train/train_clean.csv \
-    --train_directory=gldv2_dataset/train/*/*/*/ \
-    --output_directory=gldv2_dataset/tfrecord/ \
-    --num_shards=128 \
-    --generate_train_validation_splits \
-    --validation_split_size=0.2
+  --train_csv_path=gldv2_dataset/train/train.csv \
+  --train_clean_csv_path=gldv2_dataset/train/train_clean.csv \
+  --train_directory=gldv2_dataset/train/*/*/*/ \
+  --output_directory=gldv2_dataset/tfrecord/ \
+  --num_shards=128 \
+  --generate_train_validation_splits \
+  --validation_split_size=0.2
 ```
-*Please refer to the source code of the [`build_image_dataset.py`](./build_image_dataset.py) script for a detailed description of its parameters.*

-The TFRecord files written in the `OUTPUT_DIRECTORY` will be prefixed as follows:
-* TRAIN split: `train-*`
-* VALIDATION split: `validation-*`
+*Please refer to the source code of the
+[`build_image_dataset.py`](./build_image_dataset.py) script for a detailed
+description of its parameters.*
+
+The TFRecord files written in the `OUTPUT_DIRECTORY` will be prefixed as
+follows:
+
+*   TRAIN split: `train-*`
+*   VALIDATION split: `validation-*`
+
+The same script can be used to generate TFRecord files for the TEST split for
+post-training evaluation purposes. This can be achieved by adding the
+parameters:

-The same script can be used to generate TFRecord files for the TEST split for post-training
-evaluation purposes. This can be achieved by adding the parameters:
 ```
-    --test_csv_path=gldv2_dataset/train/test.csv \
-    --test_directory=gldv2_dataset/test/*/*/*/ \
+--test_csv_path=gldv2_dataset/train/test.csv \
+--test_directory=gldv2_dataset/test/*/*/*/ \
 ```
-In this scenario, the TFRecord files of the TEST split written in the `OUTPUT_DIRECTORY` will be
-named according to the pattern `test-*`.

-*Please note that due to the large size of the GLDv2 dataset, the generation of the TFRecord 
-files can take up to 12 hours and up to 500 GB of space disk.*
+In this scenario, the TFRecord files of the TEST split written in the
+`OUTPUT_DIRECTORY` will be named according to the pattern `test-*`.
+
+*Please note that due to the large size of the GLDv2 dataset, the generation of
+the TFRecord files can take up to 12 hours and up to 500 GB of space disk.*

 ## Running the Training

-Assuming the TFRecord files were generated in the `gldv2_dataset/tfrecord/` directory, running 
-the following command should start training a model:
+For the training to converge faster, it is possible to initialize the ResNet
+backbone with the weights of a pretrained ImageNet model. The ImageNet
+checkpoint is available at the following location:
+[`http://storage.googleapis.com/delf/resnet50_imagenet_weights.tar.gz`](http://storage.googleapis.com/delf/resnet50_imagenet_weights.tar.gz).
+To download and unpack it run the following commands on a Linux box:
+
+```
+curl -Os http://storage.googleapis.com/delf/resnet50_imagenet_weights.tar.gz
+tar -xzvf resnet50_imagenet_weights.tar.gz
+```
+
+Assuming the TFRecord files were generated in the `gldv2_dataset/tfrecord/`
+directory, running the following command should start training a model and
+output the results in the `gldv2_training` directory:

 ```
 python3 train.py \
  --train_file_pattern=gldv2_dataset/tfrecord/train* \
-  --validation_file_pattern=gldv2_dataset/tfrecord/validation*
+  --validation_file_pattern=gldv2_dataset/tfrecord/validation* \
+  --imagenet_checkpoint=resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 \
+  --dataset_version=gld_v2_clean \
+  --logdir=gldv2_training/
 ```
+
+On a multi-GPU machine the batch size can be increased to speed up the training
+using the `--batch_size` parameter. On a 8 Tesla P100 GPUs machine you can set
+the batch size to `256`:
+
+```
+--batch_size=256
+```
+
+## Exporting the Trained Model
+
+Assuming the training output, the TensorFlow checkpoint, is in the
+`gldv2_training` directory, running the following commands exports the model.
+
+### DELF local feature model
+
+```
+python3 model/export_model.py \
+  --ckpt_path=gldv2_training/delf_weights \
+  --export_path=gldv2_model_local \
+  --block3_strides
+```
+
+### Kaggle-compatible global feature model
+
+To export a global feature model in the format required by the
+[2020 Landmark Retrieval challenge](https://www.kaggle.com/c/landmark-retrieval-2020),
+you can use the following command:
+
+```
+python3 model/export_global_model.py \
+  --ckpt_path=gldv2_training/delf_weights \
+  --export_path=gldv2_model_global \
+  --input_scales_list=0.70710677,1.0,1.4142135 \
+  --multi_scale_pool_type=sum \
+  --normalize_global_descriptor
+```
+
+## Testing the Trained Model
+
+After the trained model has been exported, it can be used to extract DELF
+features from 2 images of the same landmark and to perform a matching test
+between the 2 images based on the extracted features to validate they represent
+the same landmark.
+
+Start by downloading the Oxford buildings dataset:
+
+```
+mkdir data && cd data
+wget http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/oxbuild_images.tgz
+mkdir oxford5k_images oxford5k_features
+tar -xvzf oxbuild_images.tgz -C oxford5k_images/
+cd ../
+echo data/oxford5k_images/hertford_000056.jpg >> list_images.txt
+echo data/oxford5k_images/oxford_000317.jpg >> list_images.txt
+```
+
+Make a copy of the
+[`delf_config_example.pbtxt`](../examples/delf_config_example.pbtxt) protobuffer
+file which configures the DELF feature extraction. Update the file by making the
+following changes:
+
+*   set the `model_path` attribute to the directory containing the exported
+    model, `gldv2_model_local` in this example
+*   add at the root level the attribute `is_tf2_exported` with the value `true`
+*   set to `false` the `use_pca` attribute inside `delf_local_config`
+
+The ensuing file should resemble the following:
+
+```
+model_path: "gldv2_model_local"
+image_scales: .25
+image_scales: .3536
+image_scales: .5
+image_scales: .7071
+image_scales: 1.0
+image_scales: 1.4142
+image_scales: 2.0
+is_tf2_exported: true
+delf_local_config {
+  use_pca: false
+  max_feature_num: 1000
+  score_threshold: 100.0
+}
+```
+
+Run the following command to extract DELF features for the images
+`hertford_000056.jpg` and `oxford_000317.jpg`:
+
+```
+python3 ../examples/extract_features.py \
+  --config_path delf_config_example.pbtxt \
+  --list_images_path list_images.txt \
+  --output_dir data/oxford5k_features
+```
+
+Run the following command to perform feature matching between the images
+`hertford_000056.jpg` and `oxford_000317.jpg`:
+
+```
+python3 ../examples/match_images.py \
+  --image_1_path data/oxford5k_images/hertford_000056.jpg \
+  --image_2_path data/oxford5k_images/oxford_000317.jpg \
+  --features_1_path data/oxford5k_features/hertford_000056.delf \
+  --features_2_path data/oxford5k_features/oxford_000317.delf \
+  --output_image matched_images.png
+```
+
+The generated image `matched_images.png` should look similar to this one:
+
+![MatchedImagesDemo](./matched_images_demo.png)
--- a/research/delf/delf/python/training/build_image_dataset.py
+++ b/research/delf/delf/python/training/build_image_dataset.py
@@ -302,6 +302,21 @@ def _write_relabeling_rules(relabeling_rules):
      csv_writer.writerow([new_label, old_label])


+def _shuffle_by_columns(np_array, random_state):
+  """Shuffle the columns of a 2D numpy array.
+
+  Args:
+    np_array: array to shuffle.
+    random_state: numpy RandomState to be used for shuffling.
+  Returns:
+    The shuffled array.
+  """
+  columns = np_array.shape[1]
+  columns_indices = np.arange(columns)
+  random_state.shuffle(columns_indices)
+  return np_array[:, columns_indices]
+
+
 def _build_train_and_validation_splits(image_paths, file_ids, labels,
                                       validation_split_size, seed):
  """Create TRAIN and VALIDATION splits containg all labels in equal proportion.
@@ -353,19 +368,21 @@ def _build_train_and_validation_splits(image_paths, file_ids, labels,
  for label, indexes in image_attrs_idx_by_label.items():
    # Create the subset for the current label.
    image_attrs_label = image_attrs[:, indexes]
-    images_per_label = image_attrs_label.shape[1]
    # Shuffle the current label subset.
-    columns_indices = np.arange(images_per_label)
-    rs.shuffle(columns_indices)
-    image_attrs_label = image_attrs_label[:, columns_indices]
+    image_attrs_label = _shuffle_by_columns(image_attrs_label, rs)
    # Split the current label subset into TRAIN and VALIDATION splits and add
    # each split to the list of all splits.
+    images_per_label = image_attrs_label.shape[1]
    cutoff_idx = max(1, int(validation_split_size * images_per_label))
    splits[_VALIDATION_SPLIT].append(image_attrs_label[:, 0 : cutoff_idx])
    splits[_TRAIN_SPLIT].append(image_attrs_label[:, cutoff_idx : ])

-  validation_split = np.concatenate(splits[_VALIDATION_SPLIT], axis=1)
-  train_split = np.concatenate(splits[_TRAIN_SPLIT], axis=1)
+  # Concatenate all subsets of image attributes into TRAIN and VALIDATION splits
+  # and reshuffle them again to ensure variance of labels across batches.
+  validation_split = _shuffle_by_columns(
+      np.concatenate(splits[_VALIDATION_SPLIT], axis=1), rs)
+  train_split = _shuffle_by_columns(
+      np.concatenate(splits[_TRAIN_SPLIT], axis=1), rs)

  # Unstack the image attribute arrays in the TRAIN and VALIDATION splits and
  # convert them back to lists. Convert labels back to 'int' from 'str'

--- a/research/delf/delf/python/training/datasets/googlelandmarks.py
+++ b/research/delf/delf/python/training/datasets/googlelandmarks.py
@@ -29,11 +29,7 @@ import tensorflow as tf

 class _GoogleLandmarksInfo(object):
  """Metadata about the Google Landmarks dataset."""
-  num_classes = {
-      'gld_v1': 14951,
-      'gld_v2': 203094,
-      'gld_v2_clean': 81313
-  }
+  num_classes = {'gld_v1': 14951, 'gld_v2': 203094, 'gld_v2_clean': 81313}


 class _DataAugmentationParams(object):
@@ -123,6 +119,8 @@ def _ParseFunction(example, name_to_features, image_size, augmentation):
  # Parse to get image.
  image = parsed_example['image/encoded']
  image = tf.io.decode_jpeg(image)
+  image = NormalizeImages(
+      image, pixel_value_scale=128.0, pixel_value_offset=128.0)
  if augmentation:
    image = _ImageNetCrop(image)
  else:
@@ -130,6 +128,7 @@ def _ParseFunction(example, name_to_features, image_size, augmentation):
    image.set_shape([image_size, image_size, 3])
  # Parse to get label.
  label = parsed_example['image/class/label']
+
  return image, label


@@ -162,6 +161,7 @@ def CreateDataset(file_pattern,
      'image/width': tf.io.FixedLenFeature([], tf.int64, default_value=0),
      'image/channels': tf.io.FixedLenFeature([], tf.int64, default_value=0),
      'image/format': tf.io.FixedLenFeature([], tf.string, default_value=''),
+      'image/id': tf.io.FixedLenFeature([], tf.string, default_value=''),
      'image/filename': tf.io.FixedLenFeature([], tf.string, default_value=''),
      'image/encoded': tf.io.FixedLenFeature([], tf.string, default_value=''),
      'image/class/label': tf.io.FixedLenFeature([], tf.int64, default_value=0),

--- a/research/delf/delf/python/training/matched_images_demo.png
+++ b/research/delf/delf/python/training/matched_images_demo.png
--- a/research/delf/delf/python/training/model/delf_model.py
+++ b/research/delf/delf/python/training/model/delf_model.py
@@ -132,10 +132,12 @@ class Delf(tf.keras.Model):
            self.attn_classification.trainable_weights)

  def call(self, input_image, training=True):
-    blocks = {'block3': None}
-    self.backbone(input_image, intermediates_dict=blocks, training=training)
+    blocks = {}

-    features = blocks['block3']
+    self.backbone.build_call(
+        input_image, intermediates_dict=blocks, training=training)
+
+    features = blocks['block3']  # pytype: disable=key-error
    _, probs, _ = self.attention(features, training=training)

    return probs, features
--- a/research/delf/delf/python/training/model/export_global_model.py
+++ b/research/delf/delf/python/training/model/export_global_model.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Export global feature tensorflow inference model.
+
+This model includes image pyramids for multi-scale processing.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl import app
+from absl import flags
+import tensorflow as tf
+
+from delf.python.training.model import delf_model
+from delf.python.training.model import export_model_utils
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('ckpt_path', '/tmp/delf-logdir/delf-weights',
+                    'Path to saved checkpoint.')
+flags.DEFINE_string('export_path', None, 'Path where model will be exported.')
+flags.DEFINE_list(
+    'input_scales_list', None,
+    'Optional input image scales to use. If None (default), an input end-point '
+    '"input_scales" is added for the exported model. If not None, the '
+    'specified list of floats will be hard-coded as the desired input scales.')
+flags.DEFINE_enum(
+    'multi_scale_pool_type', 'None', ['None', 'average', 'sum'],
+    "If 'None' (default), the model is exported with an output end-point "
+    "'global_descriptors', where the global descriptor for each scale is "
+    "returned separately. If not 'None', the global descriptor of each scale is"
+    ' pooled and a 1D global descriptor is returned, with output end-point '
+    "'global_descriptor'.")
+flags.DEFINE_boolean('normalize_global_descriptor', False,
+                     'If True, L2-normalizes global descriptor.')
+
+
+class _ExtractModule(tf.Module):
+  """Helper module to build and save global feature model."""
+
+  def __init__(self,
+               multi_scale_pool_type='None',
+               normalize_global_descriptor=False,
+               input_scales_tensor=None):
+    """Initialization of global feature model.
+
+    Args:
+      multi_scale_pool_type: Type of multi-scale pooling to perform.
+      normalize_global_descriptor: Whether to L2-normalize global descriptor.
+      input_scales_tensor: If None, the exported function to be used should be
+        ExtractFeatures, where an input end-point "input_scales" is added for
+        the exported model. If not None, the specified 1D tensor of floats will
+        be hard-coded as the desired input scales, in conjunction with
+        ExtractFeaturesFixedScales.
+    """
+    self._multi_scale_pool_type = multi_scale_pool_type
+    self._normalize_global_descriptor = normalize_global_descriptor
+    if input_scales_tensor is None:
+      self._input_scales_tensor = []
+    else:
+      self._input_scales_tensor = input_scales_tensor
+
+    # Setup the DELF model for extraction.
+    self._model = delf_model.Delf(block3_strides=False, name='DELF')
+
+  def LoadWeights(self, checkpoint_path):
+    self._model.load_weights(checkpoint_path)
+
+  @tf.function(input_signature=[
+      tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'),
+      tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'),
+      tf.TensorSpec(
+          shape=[None], dtype=tf.int32, name='input_global_scales_ind')
+  ])
+  def ExtractFeatures(self, input_image, input_scales, input_global_scales_ind):
+    extracted_features = export_model_utils.ExtractGlobalFeatures(
+        input_image,
+        input_scales,
+        input_global_scales_ind,
+        lambda x: self._model.backbone.build_call(x, training=False),
+        multi_scale_pool_type=self._multi_scale_pool_type,
+        normalize_global_descriptor=self._normalize_global_descriptor)
+
+    named_output_tensors = {}
+    if self._multi_scale_pool_type == 'None':
+      named_output_tensors['global_descriptors'] = tf.identity(
+          extracted_features, name='global_descriptors')
+    else:
+      named_output_tensors['global_descriptor'] = tf.identity(
+          extracted_features, name='global_descriptor')
+
+    return named_output_tensors
+
+  @tf.function(input_signature=[
+      tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image')
+  ])
+  def ExtractFeaturesFixedScales(self, input_image):
+    return self.ExtractFeatures(input_image, self._input_scales_tensor,
+                                tf.range(tf.size(self._input_scales_tensor)))
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  export_path = FLAGS.export_path
+  if os.path.exists(export_path):
+    raise ValueError('export_path %s already exists.' % export_path)
+
+  if FLAGS.input_scales_list is None:
+    input_scales_tensor = None
+  else:
+    input_scales_tensor = tf.constant(
+        [float(s) for s in FLAGS.input_scales_list],
+        dtype=tf.float32,
+        shape=[len(FLAGS.input_scales_list)],
+        name='input_scales')
+  module = _ExtractModule(FLAGS.multi_scale_pool_type,
+                          FLAGS.normalize_global_descriptor,
+                          input_scales_tensor)
+
+  # Load the weights.
+  checkpoint_path = FLAGS.ckpt_path
+  module.LoadWeights(checkpoint_path)
+  print('Checkpoint loaded from ', checkpoint_path)
+
+  # Save the module
+  if FLAGS.input_scales_list is None:
+    served_function = module.ExtractFeatures
+  else:
+    served_function = module.ExtractFeaturesFixedScales
+
+  tf.saved_model.save(
+      module, export_path, signatures={'serving_default': served_function})
+
+
+if __name__ == '__main__':
+  app.run(main)
--- a/research/delf/delf/python/training/model/export_model.py
+++ b/research/delf/delf/python/training/model/export_model.py
@@ -42,67 +42,39 @@ flags.DEFINE_boolean('block3_strides', False,
 flags.DEFINE_float('iou', 1.0, 'IOU for non-max suppression.')


-def _build_tensor_info(tensor_dict):
-  """Replace the dict's value by the tensor info.
-
-  Args:
-    tensor_dict: A dictionary contains <string, tensor>.
-
-  Returns:
-    dict: New dictionary contains <string, tensor_info>.
-  """
-  return {
-      k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
-      for k, t in tensor_dict.items()
-  }
-
-
-def main(argv):
-  if len(argv) > 1:
-    raise app.UsageError('Too many command-line arguments.')
-
-  export_path = FLAGS.export_path
-  if os.path.exists(export_path):
-    raise ValueError('Export_path already exists.')
-
-  with tf.Graph().as_default() as g, tf.compat.v1.Session(graph=g) as sess:
-
+class _ExtractModule(tf.Module):
+  """Helper module to build and save DELF model."""
+
+  def __init__(self, block3_strides, iou):
+    """Initialization of DELF model.
+
+    Args:
+      block3_strides: bool, whether to add strides to the output of block3.
+      iou: IOU for non-max suppression.
+    """
+    self._stride_factor = 2.0 if block3_strides else 1.0
+    self._iou = iou
    # Setup the DELF model for extraction.
-    model = delf_model.Delf(block3_strides=FLAGS.block3_strides, name='DELF')
-
-    # Initial forward pass to build model.
-    images = tf.zeros((1, 321, 321, 3), dtype=tf.float32)
-    model(images)
+    self._model = delf_model.Delf(
+        block3_strides=block3_strides, name='DELF')

-    stride_factor = 2.0 if FLAGS.block3_strides else 1.0
+  def LoadWeights(self, checkpoint_path):
+    self._model.load_weights(checkpoint_path)

-    # Setup the multiscale keypoint extraction.
-    input_image = tf.compat.v1.placeholder(
-        tf.uint8, shape=(None, None, 3), name='input_image')
-    input_abs_thres = tf.compat.v1.placeholder(
-        tf.float32, shape=(), name='input_abs_thres')
-    input_scales = tf.compat.v1.placeholder(
-        tf.float32, shape=[None], name='input_scales')
-    input_max_feature_num = tf.compat.v1.placeholder(
-        tf.int32, shape=(), name='input_max_feature_num')
+  @tf.function(input_signature=[
+      tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'),
+      tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'),
+      tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'),
+      tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres')
+  ])
+  def ExtractFeatures(self, input_image, input_scales, input_max_feature_num,
+                      input_abs_thres):

    extracted_features = export_model_utils.ExtractLocalFeatures(
        input_image, input_scales, input_max_feature_num, input_abs_thres,
-        FLAGS.iou, lambda x: model(x, training=False), stride_factor)
+        self._iou, lambda x: self._model(x, training=False),
+        self._stride_factor)

-    # Load the weights.
-    checkpoint_path = FLAGS.ckpt_path
-    model.load_weights(checkpoint_path)
-    print('Checkpoint loaded from ', checkpoint_path)
-
-    named_input_tensors = {
-        'input_image': input_image,
-        'input_scales': input_scales,
-        'input_abs_thres': input_abs_thres,
-        'input_max_feature_num': input_max_feature_num,
-    }
-
-    # Outputs to the exported model.
    named_output_tensors = {}
    named_output_tensors['boxes'] = tf.identity(
        extracted_features[0], name='boxes')
@@ -112,25 +84,27 @@ def main(argv):
        extracted_features[2], name='scales')
    named_output_tensors['scores'] = tf.identity(
        extracted_features[3], name='scores')
+    return named_output_tensors
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  export_path = FLAGS.export_path
+  if os.path.exists(export_path):
+    raise ValueError(f'Export_path {export_path} already exists. Please '
+                     'specify a different path or delete the existing one.')
+
+  module = _ExtractModule(FLAGS.block3_strides, FLAGS.iou)
+
+  # Load the weights.
+  checkpoint_path = FLAGS.ckpt_path
+  module.LoadWeights(checkpoint_path)
+  print('Checkpoint loaded from ', checkpoint_path)

-    # Export the model.
-    signature_def = tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
-        inputs=_build_tensor_info(named_input_tensors),
-        outputs=_build_tensor_info(named_output_tensors))
-
-    print('Exporting trained model to:', export_path)
-    builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)
-
-    init_op = None
-    builder.add_meta_graph_and_variables(
-        sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
-        signature_def_map={
-            tf.compat.v1.saved_model.signature_constants
-            .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-                signature_def
-        },
-        main_op=init_op)
-    builder.save()
+  # Save the module
+  tf.saved_model.save(module, export_path)


 if __name__ == '__main__':

--- a/research/delf/delf/python/training/model/export_model_utils.py
+++ b/research/delf/delf/python/training/model/export_model_utils.py
@@ -142,20 +142,21 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
  keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales)

  (_, output_boxes, output_features, output_scales,
-   output_scores) = tf.while_loop(
-       cond=keep_going,
-       body=_ProcessSingleScale,
-       loop_vars=[
-           i, output_boxes, output_features, output_scales, output_scores
-       ],
-       shape_invariants=[
-           i.get_shape(),
-           tf.TensorShape([None, 4]),
-           tf.TensorShape([None, feature_depth]),
-           tf.TensorShape([None]),
-           tf.TensorShape([None])
-       ],
-       back_prop=False)
+   output_scores) = tf.nest.map_structure(
+       tf.stop_gradient,
+       tf.while_loop(
+           cond=keep_going,
+           body=_ProcessSingleScale,
+           loop_vars=[
+               i, output_boxes, output_features, output_scales, output_scores
+           ],
+           shape_invariants=[
+               i.get_shape(),
+               tf.TensorShape([None, 4]),
+               tf.TensorShape([None, feature_depth]),
+               tf.TensorShape([None]),
+               tf.TensorShape([None])
+           ]))

  feature_boxes = box_list.BoxList(output_boxes)
  feature_boxes.add_field('features', output_features)
@@ -169,3 +170,99 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou,
  return final_boxes.get(), final_boxes.get_field(
      'features'), final_boxes.get_field('scales'), tf.expand_dims(
          final_boxes.get_field('scores'), 1)
+
+
+@tf.function
+def ExtractGlobalFeatures(image,
+                          image_scales,
+                          global_scales_ind,
+                          model_fn,
+                          multi_scale_pool_type='None',
+                          normalize_global_descriptor=False):
+  """Extract global features for input image.
+
+  Args:
+    image: image tensor of type tf.uint8 with shape [h, w, channels].
+    image_scales: 1D float tensor which contains float scales used for image
+      pyramid construction.
+    global_scales_ind: Feature extraction happens only for a subset of
+      `image_scales`, those with corresponding indices from this tensor.
+    model_fn: model function. Follows the signature:
+      * Args:
+        * `images`: Image tensor which is re-scaled.
+      * Returns:
+        * `global_descriptors`: Global descriptors for input images.
+    multi_scale_pool_type: If set, the global descriptor of each scale is pooled
+      and a 1D global descriptor is returned.
+    normalize_global_descriptor: If True, output global descriptors are
+      L2-normalized.
+
+  Returns:
+    global_descriptors: If `multi_scale_pool_type` is 'None', returns a [S, D]
+      float tensor. S is the number of scales, and D the global descriptor
+      dimensionality. Each D-dimensional entry is a global descriptor, which may
+      be L2-normalized depending on `normalize_global_descriptor`. If
+      `multi_scale_pool_type` is not 'None', returns a [D] float tensor with the
+      pooled global descriptor.
+
+  """
+  original_image_shape_float = tf.gather(
+      tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1])
+  image_tensor = gld.NormalizeImages(
+      image, pixel_value_offset=128.0, pixel_value_scale=128.0)
+  image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')
+
+  def _ResizeAndExtract(scale_index):
+    """Helper function to resize image then extract global feature.
+
+    Args:
+      scale_index: A valid index in image_scales.
+
+    Returns:
+      global_descriptor: [1,D] tensor denoting the extracted global descriptor.
+    """
+    scale = tf.gather(image_scales, scale_index)
+    new_image_size = tf.dtypes.cast(
+        tf.round(original_image_shape_float * scale), tf.int32)
+    resized_image = tf.image.resize(image_tensor, new_image_size)
+    global_descriptor = model_fn(resized_image)
+    return global_descriptor
+
+  # First loop to find initial scale to be used.
+  num_scales = tf.shape(image_scales)[0]
+  initial_scale_index = tf.constant(-1, dtype=tf.int32)
+  for scale_index in tf.range(num_scales):
+    if tf.reduce_any(tf.equal(global_scales_ind, scale_index)):
+      initial_scale_index = scale_index
+      break
+
+  output_global = _ResizeAndExtract(initial_scale_index)
+
+  # Loop over subsequent scales.
+  for scale_index in tf.range(initial_scale_index + 1, num_scales):
+    # Allow an undefined number of global feature scales to be extracted.
+    tf.autograph.experimental.set_loop_options(
+        shape_invariants=[(output_global, tf.TensorShape([None, None]))])
+
+    if tf.reduce_any(tf.equal(global_scales_ind, scale_index)):
+      global_descriptor = _ResizeAndExtract(scale_index)
+      output_global = tf.concat([output_global, global_descriptor], 0)
+
+  normalization_axis = 1
+  if multi_scale_pool_type == 'average':
+    output_global = tf.reduce_mean(
+        output_global,
+        axis=0,
+        keepdims=False,
+        name='multi_scale_average_pooling')
+    normalization_axis = 0
+  elif multi_scale_pool_type == 'sum':
+    output_global = tf.reduce_sum(
+        output_global, axis=0, keepdims=False, name='multi_scale_sum_pooling')
+    normalization_axis = 0
+
+  if normalize_global_descriptor:
+    output_global = tf.nn.l2_normalize(
+        output_global, axis=normalization_axis, name='l2_normalization')
+
+  return output_global
--- a/research/delf/delf/python/training/model/resnet50.py
+++ b/research/delf/delf/python/training/model/resnet50.py
@@ -22,9 +22,14 @@ from __future__ import division
 from __future__ import print_function

 import functools
+import os
+import tempfile

+from absl import logging
+import h5py
 import tensorflow as tf

+
 layers = tf.keras.layers


@@ -284,8 +289,8 @@ class ResNet50(tf.keras.Model):
      else:
        self.global_pooling = None

-  def call(self, inputs, training=True, intermediates_dict=None):
-    """Call the ResNet50 model.
+  def build_call(self, inputs, training=True, intermediates_dict=None):
+    """Building the ResNet50 model.

    Args:
      inputs: Images to compute features for.
@@ -356,3 +361,79 @@ class ResNet50(tf.keras.Model):
      return self.global_pooling(x)
    else:
      return x
+
+  def call(self, inputs, training=True, intermediates_dict=None):
+    """Call the ResNet50 model.
+
+    Args:
+      inputs: Images to compute features for.
+      training: Whether model is in training phase.
+      intermediates_dict: `None` or dictionary. If not None, accumulate feature
+        maps from intermediate blocks into the dictionary. ""
+
+    Returns:
+      Tensor with featuremap.
+    """
+    return self.build_call(inputs, training, intermediates_dict)
+
+  def restore_weights(self, filepath):
+    """Load pretrained weights.
+
+    This function loads a .h5 file from the filepath with saved model weights
+    and assigns them to the model.
+
+    Args:
+      filepath: String, path to the .h5 file
+    Raises:
+      ValueError: if the file referenced by `filepath` does not exist.
+    """
+    if not tf.io.gfile.exists(filepath):
+      raise ValueError('Unable to load weights from %s. You must provide a'
+                       'valid file.' % (filepath))
+
+    # Create a local copy of the weights file for h5py to be able to read it.
+    local_filename = os.path.basename(filepath)
+    tmp_filename = os.path.join(tempfile.gettempdir(), local_filename)
+    tf.io.gfile.copy(filepath, tmp_filename, overwrite=True)
+
+    # Load the content of the weights file.
+    f = h5py.File(tmp_filename, mode='r')
+    saved_layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
+
+    try:
+      # Iterate through all the layers assuming the max `depth` is 2.
+      for layer in self.layers:
+        if hasattr(layer, 'layers'):
+          for inlayer in layer.layers:
+            # Make sure the weights are in the saved model, and that we are in
+            # the innermost layer.
+            if inlayer.name not in saved_layer_names:
+              raise ValueError('Layer %s absent from the pretrained weights.'
+                               'Unable to load its weights.' % (inlayer.name))
+            if hasattr(inlayer, 'layers'):
+              raise ValueError('Layer %s is not a depth 2 layer. Unable to load'
+                               'its weights.' % (inlayer.name))
+            # Assign the weights in the current layer.
+            g = f[inlayer.name]
+            weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
+            weight_values = [g[weight_name] for weight_name in weight_names]
+            print('Setting the weights for layer %s' % (inlayer.name))
+            inlayer.set_weights(weight_values)
+    finally:
+      # Clean up the temporary file.
+      tf.io.gfile.remove(tmp_filename)
+
+  def log_weights(self):
+    """Log backbone weights."""
+    logging.info('Logging backbone weights')
+    logging.info('------------------------')
+    for layer in self.layers:
+      if hasattr(layer, 'layers'):
+        for inlayer in layer.layers:
+          logging.info('Weights for layer: %s, inlayer % s', layer.name,
+                       inlayer.name)
+          weights = inlayer.get_weights()
+          logging.info(weights)
+      else:
+        logging.info('Layer %s does not have inner layers.',
+                     layer.name)
--- a/research/delf/delf/python/training/train.py
+++ b/research/delf/delf/python/training/train.py
@@ -43,17 +43,20 @@ flags.DEFINE_string('train_file_pattern', '/tmp/data/train*',
                    'File pattern of training dataset files.')
 flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*',
                    'File pattern of validation dataset files.')
-flags.DEFINE_enum('dataset_version', 'gld_v1',
-                  ['gld_v1', 'gld_v2', 'gld_v2_clean'],
-                  'Google Landmarks dataset version, used to determine the'
-                  'number of classes.')
+flags.DEFINE_enum(
+    'dataset_version', 'gld_v1', ['gld_v1', 'gld_v2', 'gld_v2_clean'],
+    'Google Landmarks dataset version, used to determine the'
+    'number of classes.')
 flags.DEFINE_integer('seed', 0, 'Seed to training dataset.')
-flags.DEFINE_float('initial_lr', 0.001, 'Initial learning rate.')
+flags.DEFINE_float('initial_lr', 0.01, 'Initial learning rate.')
 flags.DEFINE_integer('batch_size', 32, 'Global batch size.')
 flags.DEFINE_integer('max_iters', 500000, 'Maximum iterations.')
-flags.DEFINE_boolean('block3_strides', False, 'Whether to use block3_strides.')
+flags.DEFINE_boolean('block3_strides', True, 'Whether to use block3_strides.')
 flags.DEFINE_boolean('use_augmentation', True,
                     'Whether to use ImageNet style augmentation.')
+flags.DEFINE_string(
+    'imagenet_checkpoint', None,
+    'ImageNet checkpoint for ResNet backbone. If None, no checkpoint is used.')


 def _record_accuracy(metric, logits, labels):
@@ -64,6 +67,10 @@ def _record_accuracy(metric, logits, labels):

 def _attention_summaries(scores, global_step):
  """Record statistics of the attention score."""
+  tf.summary.image(
+      'batch_attention',
+      scores / tf.reduce_max(scores + 1e-3),
+      step=global_step)
  tf.summary.scalar('attention/max', tf.reduce_max(scores), step=global_step)
  tf.summary.scalar('attention/min', tf.reduce_min(scores), step=global_step)
  tf.summary.scalar('attention/mean', tf.reduce_mean(scores), step=global_step)
@@ -124,7 +131,7 @@ def main(argv):
  max_iters = FLAGS.max_iters
  global_batch_size = FLAGS.batch_size
  image_size = 321
-  num_eval = 1000
+  num_eval_batches = int(50000 / global_batch_size)
  report_interval = 100
  eval_interval = 1000
  save_interval = 20000
@@ -134,9 +141,10 @@ def main(argv):
  clip_val = tf.constant(10.0)

  if FLAGS.debug:
+    tf.config.run_functions_eagerly(True)
    global_batch_size = 4
-    max_iters = 4
-    num_eval = 1
+    max_iters = 100
+    num_eval_batches = 1
    save_interval = 1
    report_interval = 1

@@ -159,11 +167,12 @@ def main(argv):
      augmentation=False,
      seed=FLAGS.seed)

-  train_iterator = strategy.make_dataset_iterator(train_dataset)
-  validation_iterator = strategy.make_dataset_iterator(validation_dataset)
+  train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
+  validation_dist_dataset = strategy.experimental_distribute_dataset(
+      validation_dataset)

-  train_iterator.initialize()
-  validation_iterator.initialize()
+  train_iter = iter(train_dist_dataset)
+  validation_iter = iter(validation_dist_dataset)

  # Create a checkpoint directory to store the checkpoints.
  checkpoint_prefix = os.path.join(FLAGS.logdir, 'delf_tf2-ckpt')
@@ -219,11 +228,14 @@ def main(argv):
      labels = tf.clip_by_value(labels, 0, model.num_classes)

      global_step = optimizer.iterations
+      tf.summary.image('batch_images', (images + 1.0) / 2.0, step=global_step)
      tf.summary.scalar(
          'image_range/max', tf.reduce_max(images), step=global_step)
      tf.summary.scalar(
          'image_range/min', tf.reduce_min(images), step=global_step)

+      # TODO(andrearaujo): we should try to unify the backprop into a single
+      # function, instead of applying once to descriptor then to attention.
      def _backprop_loss(tape, loss, weights):
        """Backpropogate losses using clipped gradients.

@@ -344,12 +356,25 @@ def main(argv):
      with tf.summary.record_if(
          tf.math.equal(0, optimizer.iterations % report_interval)):

+        # TODO(dananghel): try to load pretrained weights at backbone creation.
+        # Load pretrained weights for ResNet50 trained on ImageNet.
+        if FLAGS.imagenet_checkpoint is not None:
+          logging.info('Attempting to load ImageNet pretrained weights.')
+          input_batch = next(train_iter)
+          _, _ = distributed_train_step(input_batch)
+          model.backbone.restore_weights(FLAGS.imagenet_checkpoint)
+          logging.info('Done.')
+        else:
+          logging.info('Skip loading ImageNet pretrained weights.')
+        if FLAGS.debug:
+          model.backbone.log_weights()
+
        global_step_value = optimizer.iterations.numpy()
        while global_step_value < max_iters:

          # input_batch : images(b, h, w, c), labels(b,).
          try:
-            input_batch = train_iterator.get_next()
+            input_batch = next(train_iter)
          except tf.errors.OutOfRangeError:
            # Break if we run out of data in the dataset.
            logging.info('Stopping training at global step %d, no more data',
@@ -392,9 +417,9 @@ def main(argv):

          # Validate once in {eval_interval*n, n \in N} steps.
          if global_step_value % eval_interval == 0:
-            for i in range(num_eval):
+            for i in range(num_eval_batches):
              try:
-                validation_batch = validation_iterator.get_next()
+                validation_batch = next(validation_iter)
                desc_validation_result, attn_validation_result = (
                    distributed_validation_step(validation_batch))
              except tf.errors.OutOfRangeError:
@@ -416,13 +441,17 @@ def main(argv):
              print('          : attn:', attn_validation_result.numpy())

          # Save checkpoint once (each save_interval*n, n \in N) steps.
+          # TODO(andrearaujo): save only in one of the two ways. They are
+          # identical, the only difference is that the manager adds some extra
+          # prefixes and variables (eg, optimizer variables).
          if global_step_value % save_interval == 0:
            save_path = manager.save()
-            logging.info('Saved({global_step_value}) at %s', save_path)
+            logging.info('Saved (%d) at %s', global_step_value, save_path)

            file_path = '%s/delf_weights' % FLAGS.logdir
            model.save_weights(file_path, save_format='tf')
-            logging.info('Saved weights({global_step_value}) at %s', file_path)
+            logging.info('Saved weights (%d) at %s', global_step_value,
+                         file_path)

          # Reset metrics for next step.
          desc_train_accuracy.reset_states()

--- a/research/delf/setup.py
+++ b/research/delf/setup.py
@@ -22,7 +22,7 @@ install_requires = [
    'pandas >= 0.24.2',
    'numpy >= 1.16.1',
    'scipy >= 1.2.2',
-    'tensorflow >= 2.0.0b1',
+    'tensorflow >= 2.2.0',
    'tf_slim >= 1.1',
    'tensorflow_probability >= 0.9.0',
 ]

--- a/research/object_detection/CONTRIBUTING.md
+++ b/research/object_detection/CONTRIBUTING.md
-# Contributing to the Tensorflow Object Detection API
+# Contributing to the TensorFlow Object Detection API

-Patches to Tensorflow Object Detection API are welcome!
+Patches to TensorFlow Object Detection API are welcome!

 We require contributors to fill out either the individual or corporate
 Contributor License Agreement (CLA).
@@ -9,5 +9,5 @@ Contributor License Agreement (CLA).
  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).

 Please follow the
-[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
+[TensorFlow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
 when submitting pull requests.
--- a/research/object_detection/README.md
+++ b/research/object_detection/README.md
-![TensorFlow Requirement: 1.15](https://img.shields.io/badge/TensorFlow%20Requirement-1.15-brightgreen)
-![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
-
-# Tensorflow Object Detection API
+# TensorFlow Object Detection API
+[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
+[![TensorFlow 1.15](https://img.shields.io/badge/TensorFlow-1.15-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v1.15.0)
+[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/)

 Creating accurate machine learning models capable of localizing and identifying
 multiple objects in a single image remains a core challenge in computer vision.
@@ -11,7 +11,7 @@ models. At Google we’ve certainly found this codebase to be useful for our
 computer vision needs, and we hope that you will as well. <p align="center">
 <img src="g3doc/img/kites_detections_output.jpg" width=676 height=450> </p>
 Contributions to the codebase are welcome and we would love to hear back from
-you if you find this API useful. Finally if you use the Tensorflow Object
+you if you find this API useful. Finally if you use the TensorFlow Object
 Detection API for a research publication, please consider citing:

 ```
@@ -26,91 +26,110 @@ Song Y, Guadarrama S, Murphy K, CVPR 2017
  <img src="g3doc/img/tf-od-api-logo.png" width=140 height=195>
 </p>

-## Maintainers
+## Support for TensorFlow 2 and 1
+The TensorFlow Object Detection API supports both TensorFlow 2 (TF2) and
+TensorFlow 1 (TF1). A majority of the modules in the library are both TF1 and
+TF2 compatible. In cases where they are not, we provide two versions.

-Name           | GitHub
-------------- | ---------------------------------------------
-Jonathan Huang | [jch1](https://github.com/jch1)
-Vivek Rathod   | [tombstone](https://github.com/tombstone)
-Ronny Votel    | [ronnyvotel](https://github.com/ronnyvotel)
-Derek Chow     | [derekjchow](https://github.com/derekjchow)
-Chen Sun       | [jesu9](https://github.com/jesu9)
-Menglong Zhu   | [dreamdragon](https://github.com/dreamdragon)
-Alireza Fathi  | [afathi3](https://github.com/afathi3)
-Zhichao Lu     | [pkulzc](https://github.com/pkulzc)
-
-## Table of contents
-
-Setup:
-
-*   <a href='g3doc/installation.md'>Installation</a><br>
-
-Quick Start:
-
-*   <a href='object_detection_tutorial.ipynb'>
-      Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
-*   <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
-
-Customizing a Pipeline:
-
-*   <a href='g3doc/configuring_jobs.md'>
-      Configuring an object detection pipeline</a><br>
-*   <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
-
-Running:
-
-*   <a href='g3doc/running_locally.md'>Running locally</a><br>
-*   <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
-
-Extras:
-
-*   <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
-*   <a href='g3doc/exporting_models.md'>
-      Exporting a trained model for inference</a><br>
-*   <a href='g3doc/tpu_exporters.md'>
-      Exporting a trained model for TPU inference</a><br>
-*   <a href='g3doc/defining_your_own_model.md'>
-      Defining your own model architecture</a><br>
-*   <a href='g3doc/using_your_own_dataset.md'>
-      Bringing in your own dataset</a><br>
-*   <a href='g3doc/evaluation_protocols.md'>
-      Supported object detection evaluation protocols</a><br>
-*   <a href='g3doc/oid_inference_and_evaluation.md'>
-      Inference and evaluation on the Open Images dataset</a><br>
-*   <a href='g3doc/instance_segmentation.md'>
-      Run an instance segmentation model</a><br>
-*   <a href='g3doc/challenge_evaluation.md'>
-      Run the evaluation for the Open Images Challenge 2018/2019</a><br>
-*   <a href='g3doc/tpu_compatibility.md'>
-      TPU compatible detection pipelines</a><br>
-*   <a href='g3doc/running_on_mobile_tensorflowlite.md'>
-      Running object detection on mobile devices with TensorFlow Lite</a><br>
-*   <a href='g3doc/context_rcnn.md'>
-      Context R-CNN documentation for data preparation, training, and export</a><br>
+Although we will continue to maintain the TF1 models and provide support, we
+encourage users to try the Object Detection API with TF2 for the following
+reasons:

-## Getting Help
+* We provide new architectures supported in TF2 only and we will continue to
+  develop in TF2 going forward.

-To get help with issues you may encounter using the Tensorflow Object Detection
-API, create a new question on [StackOverflow](https://stackoverflow.com/) with
-the tags "tensorflow" and "object-detection".
+* The popular models we ported from TF1 to TF2 achieve the same performance.

-Please report bugs (actually broken code, not usage questions) to the
-tensorflow/models GitHub
-[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
-issue name with "object_detection".
+* A single training and evaluation binary now supports both GPU and TPU
+  distribution strategies making it possible to train models with synchronous
+  SGD by default.
+
+* Eager execution with new binaries makes debugging easy!
+
+Finally, if are an existing user of the Object Detection API we have retained
+the same config language you are familiar with and ensured that the
+TF2 training/eval binary takes the same arguments as our TF1 binaries.
+
+Note: The models we provide in [TF2 Zoo](g3doc/tf2_detection_zoo.md) and
+[TF1 Zoo](g3doc/tf1_detection_zoo.md) are specific to the TensorFlow major
+version and are not interoperable.
+
+Please select one of the links below for TensorFlow version-specific
+documentation of the Object Detection API:

-Please check [FAQ](g3doc/faq.md) for frequently asked questions before reporting
-an issue.
+<!-- mdlint off(WHITESPACE_LINE_LENGTH) -->
+### Tensorflow 2.x
+  *   <a href='g3doc/tf2.md'>
+        Object Detection API TensorFlow 2</a><br>
+  *   <a href='g3doc/tf2_detection_zoo.md'>
+        TensorFlow 2 Model Zoo</a><br>

-## Release information
-### June 17th, 2020
+### Tensorflow 1.x
+  *   <a href='g3doc/tf1.md'>
+        Object Detection API TensorFlow 1</a><br>
+  *   <a href='g3doc/tf1_detection_zoo.md'>
+        TensorFlow 1 Model Zoo</a><br>
+<!-- mdlint on -->
+
+## Whats New
+
+### TensorFlow 2 Support
+
+We are happy to announce that the TF OD API officially supports TF2! Our release
+includes:
+
+* New binaries for train/eval/export that are designed to run in eager mode.
+* A suite of TF2 compatible (Keras-based) models; this includes migrations of
+  our most popular TF1.x models (e.g., SSD with MobileNet, RetinaNet,
+  Faster R-CNN, Mask R-CNN), as well as a few new architectures for which we
+  will only maintain TF2 implementations:
+
+    1. CenterNet - a simple and effective anchor-free architecture based on
+       the recent [Objects as Points](https://arxiv.org/abs/1904.07850) paper by
+       Zhou et al.
+    2. [EfficientDet](https://arxiv.org/abs/1911.09070) - a recent family of
+       SOTA models discovered with the help of Neural Architecture Search.
+
+* COCO pre-trained weights for all of the models provided as TF2 style
+  object-based checkpoints.
+* Access to [Distribution Strategies](https://www.tensorflow.org/guide/distributed_training)
+  for distributed training --- our model are designed to be trainable using sync
+  multi-GPU and TPU platforms.
+* Colabs demo’ing eager mode training and inference.
+
+See our release blogpost [here](https://blog.tensorflow.org/2020/07/tensorflow-2-meets-object-detection-api.html).
+If you are an existing user of the TF OD API using TF 1.x, don’t worry, we’ve
+got you covered.
+
+**Thanks to contributors**: Akhil Chinnakotla, Allen Lavoie, Anirudh Vegesana,
+Anjali Sridhar, Austin Myers, Dan Kondratyuk, David Ross, Derek Chow, Jaeyoun
+Kim, Jing Li, Jonathan Huang, Jordi Pont-Tuset, Karmel Allison, Kathy Ruan,
+Kaushik Shivakumar, Lu He, Mingxing Tan, Pengchong Jin, Ronny Votel, Sara Beery,
+Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
+Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
+Yixin Shi, Yu-hui Chen, Zhichao Lu.
+
+### MobileDet GPU
+
+We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
+higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
+Xavier at comparable latency (3.2ms vs 3.3ms).
+
+Along with the model definition, we are also releasing model checkpoints trained
+on the COCO dataset.
+
+<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
+(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
+
+### Context R-CNN

 We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
 uses attention to incorporate contextual information images (e.g. from
 temporally nearby frames taken by a static camera) in order to improve accuracy.
 Importantly, these contextual images need not be labeled.

-*   When applied to a challenging wildlife detection dataset ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
+*   When applied to a challenging wildlife detection dataset
+    ([Snapshot Serengeti](http://lila.science/datasets/snapshot-serengeti)),
    Context R-CNN with context from up to a month of images outperforms a
    single-frame baseline by 17.9% mAP, and outperforms S3D (a 3d convolution
    based baseline) by 11.2% mAP.
@@ -118,280 +137,48 @@ Importantly, these contextual images need not be labeled.
    novel camera deployment to improve performance at that camera, boosting
    model generalizeability.

+Read about Context R-CNN on the Google AI blog
+[here](https://ai.googleblog.com/2020/06/leveraging-temporal-context-for-object.html).
+
 We have provided code for generating data with associated context
-[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN
-model [here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).
+[here](g3doc/context_rcnn.md), and a sample config for a Context R-CNN model
+[here](samples/configs/context_rcnn_resnet101_snapshot_serengeti_sync.config).

 Snapshot Serengeti-trained Faster R-CNN and Context R-CNN models can be found in
-the [model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#snapshot-serengeti-camera-trap-trained-models).
+the
+[model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md#snapshot-serengeti-camera-trap-trained-models).

 A colab demonstrating Context R-CNN is provided
 [here](colab_tutorials/context_rcnn_tutorial.ipynb).

 <b>Thanks to contributors</b>: Sara Beery, Jonathan Huang, Guanhang Wu, Vivek
-Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and
-the Wildlife Insights AI Team.
-
-### May 19th, 2020
-
-We have released [MobileDets](https://arxiv.org/abs/2004.14525), a set of
-high-performance models for mobile CPUs, DSPs and EdgeTPUs.
-
-*   MobileDets outperform MobileNetV3+SSDLite by 1.7 mAP at comparable mobile
-    CPU inference latencies. MobileDets also outperform MobileNetV2+SSDLite by
-    1.9 mAP on mobile CPUs, 3.7 mAP on EdgeTPUs and 3.4 mAP on DSPs while
-    running equally fast. MobileDets also offer up to 2x speedup over MnasFPN on
-    EdgeTPUs and DSPs.
-
-For each of the three hardware platforms we have released model definition,
-model checkpoints trained on the COCO14 dataset and converted TFLite models in
-fp32 and/or uint8.
-
-<b>Thanks to contributors</b>: Yunyang Xiong, Hanxiao Liu, Suyog Gupta, Berkin
-Akin, Gabriel Bender, Pieter-Jan Kindermans, Mingxing Tan, Vikas Singh, Bo Chen,
-Quoc Le, Zhichao Lu.
-
-### May 7th, 2020
-
-We have released a mobile model with the
-[MnasFPN head](https://arxiv.org/abs/1912.01106).
-
-*   MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms
-    on Pixel 1) mobile detection model we have released to date. With
-    depth-multiplier, MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than
-    MobileNet-V3-Large with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency
-    (120ms) on Pixel 1.
-
-We have released model definition, model checkpoints trained on the COCO14
-dataset and a converted TFLite model.
-
-<b>Thanks to contributors</b>: Bo Chen, Golnaz Ghiasi, Hanxiao Liu, Tsung-Yi
-Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu, Jonathan Huang, Hao
-Xu.
-
-### Nov 13th, 2019
-
-We have released MobileNetEdgeTPU SSDLite model.
-
-*   SSDLite with MobileNetEdgeTPU backbone, which achieves 10% mAP higher than
-    MobileNetV2 SSDLite (24.3 mAP vs 22 mAP) on a Google Pixel4 at comparable
-    latency (6.6ms vs 6.8ms).
-
-Along with the model definition, we are also releasing model checkpoints trained
-on the COCO dataset.
-
-<b>Thanks to contributors</b>: Yunyang Xiong, Bo Chen, Suyog Gupta, Hanxiao Liu,
-Gabriel Bender, Mingxing Tan, Berkin Akin, Zhichao Lu, Quoc Le
-
-### Oct 15th, 2019
-
-We have released two MobileNet V3 SSDLite models (presented in
-[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244)).
-
-*   SSDLite with MobileNet-V3-Large backbone, which is 27% faster than Mobilenet
-    V2 SSDLite (119ms vs 162ms) on a Google Pixel phone CPU at the same mAP.
-*   SSDLite with MobileNet-V3-Small backbone, which is 37% faster than MnasNet
-    SSDLite reduced with depth-multiplier (43ms vs 68ms) at the same mAP.
-
-Along with the model definition, we are also releasing model checkpoints trained
-on the COCO dataset.
-
-<b>Thanks to contributors</b>: Bo Chen, Zhichao Lu, Vivek Rathod, Jonathan Huang
-
-### July 1st, 2019
+Rathod, Ronny Votel, Zhichao Lu, David Ross, Pietro Perona, Tanya Birch, and the
+Wildlife Insights AI Team.

-We have released an updated set of utils and an updated
-[tutorial](g3doc/challenge_evaluation.md) for all three tracks of the
-[Open Images Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)!
+## Release Notes
+See [notes](g3doc/release_notes.md) for all past releases.

-The Instance Segmentation metric for
-[Open Images V5](https://storage.googleapis.com/openimages/web/index.html) and
-[Challenge 2019](https://storage.googleapis.com/openimages/web/challenge2019.html)
-is part of this release. Check out
-[the metric description](https://storage.googleapis.com/openimages/web/evaluation.html#instance_segmentation_eval)
-on the Open Images website.
-
-<b>Thanks to contributors</b>: Alina Kuznetsova, Rodrigo Benenson
-
-### Feb 11, 2019
-
-We have released detection models trained on the Open Images Dataset V4 in our
-detection model zoo, including
-
-*   Faster R-CNN detector with Inception Resnet V2 feature extractor
-*   SSD detector with MobileNet V2 feature extractor
-*   SSD detector with ResNet 101 FPN feature extractor (aka RetinaNet-101)
-
-<b>Thanks to contributors</b>: Alina Kuznetsova, Yinxiao Li
-
-### Sep 17, 2018
-
-We have released Faster R-CNN detectors with ResNet-50 / ResNet-101 feature
-extractors trained on the
-[iNaturalist Species Detection Dataset](https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes).
-The models are trained on the training split of the iNaturalist data for 4M
-iterations, they achieve 55% and 58% mean AP@.5 over 2854 classes respectively.
-For more details please refer to this [paper](https://arxiv.org/abs/1707.06642).
-
-<b>Thanks to contributors</b>: Chen Sun
-
-### July 13, 2018
-
-There are many new updates in this release, extending the functionality and
-capability of the API:
-
-*   Moving from slim-based training to
-    [Estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)-based
-    training.
-*   Support for [RetinaNet](https://arxiv.org/abs/1708.02002), and a
-    [MobileNet](https://ai.googleblog.com/2017/06/mobilenets-open-source-models-for.html)
-    adaptation of RetinaNet.
-*   A novel SSD-based architecture called the
-    [Pooling Pyramid Network](https://arxiv.org/abs/1807.03284) (PPN).
-*   Releasing several [TPU](https://cloud.google.com/tpu/)-compatible models.
-    These can be found in the `samples/configs/` directory with a comment in the
-    pipeline configuration files indicating TPU compatibility.
-*   Support for quantized training.
-*   Updated documentation for new binaries, Cloud training, and
-    [Tensorflow Lite](https://www.tensorflow.org/mobile/tflite/).
-
-See also our
-[expanded announcement blogpost](https://ai.googleblog.com/2018/07/accelerated-training-and-inference-with.html)
-and accompanying tutorial at the
-[TensorFlow blog](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193).
-
-<b>Thanks to contributors</b>: Sara Robinson, Aakanksha Chowdhery, Derek Chow,
-Pengchong Jin, Jonathan Huang, Vivek Rathod, Zhichao Lu, Ronny Votel
-
-### June 25, 2018
-
-Additional evaluation tools for the
-[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
-are out. Check out our short tutorial on data preparation and running evaluation
-[here](g3doc/challenge_evaluation.md)!
-
-<b>Thanks to contributors</b>: Alina Kuznetsova
-
-### June 5, 2018
-
-We have released the implementation of evaluation metrics for both tracks of the
-[Open Images Challenge 2018](https://storage.googleapis.com/openimages/web/challenge.html)
-as a part of the Object Detection API - see the
-[evaluation protocols](g3doc/evaluation_protocols.md) for more details.
-Additionally, we have released a tool for hierarchical labels expansion for the
-Open Images Challenge: check out
-[oid_hierarchical_labels_expansion.py](dataset_tools/oid_hierarchical_labels_expansion.py).
-
-<b>Thanks to contributors</b>: Alina Kuznetsova, Vittorio Ferrari, Jasper
-Uijlings
-
-### April 30, 2018
-
-We have released a Faster R-CNN detector with ResNet-101 feature extractor
-trained on [AVA](https://research.google.com/ava/) v2.1. Compared with other
-commonly used object detectors, it changes the action classification loss
-function to per-class Sigmoid loss to handle boxes with multiple labels. The
-model is trained on the training split of AVA v2.1 for 1.5M iterations, it
-achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
-For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
-
-<b>Thanks to contributors</b>: Chen Sun, David Ross
-
-### April 2, 2018
-
-Supercharge your mobile phones with the next generation mobile object detector!
-We are adding support for MobileNet V2 with SSDLite presented in
-[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
-This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU
-(200ms vs. 270ms) at the same accuracy. Along with the model definition, we are
-also releasing a model checkpoint trained on the COCO dataset.
-
-<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek
-Rathod, Jonathan Huang
-
-### February 9, 2018
-
-We now support instance segmentation!! In this API update we support a number of
-instance segmentation models similar to those discussed in the
-[Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer
-to [our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the
-2017 Coco + Places Workshop. Refer to the section on
-[Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for
-instructions on how to configure a model that predicts masks in addition to
-object bounding boxes.
-
-<b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny
-Votel, Jonathan Huang
-
-### November 17, 2017
-
-As a part of the Open Images V3 release we have released:
-
-*   An implementation of the Open Images evaluation metric and the
-    [protocol](g3doc/evaluation_protocols.md#open-images).
-*   Additional tools to separate inference of detection and evaluation (see
-    [this tutorial](g3doc/oid_inference_and_evaluation.md)).
-*   A new detection model trained on the Open Images V2 data release (see
-    [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
-
-See more information on the
-[Open Images website](https://github.com/openimages/dataset)!
-
-<b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova
-
-### November 6, 2017
-
-We have re-released faster versions of our (pre-trained) models in the
-<a href='g3doc/detection_model_zoo.md'>model zoo</a>. In addition to what was
-available before, we are also adding Faster R-CNN models trained on COCO with
-Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN with
-Resnet-101 model trained on the KITTI dataset.
-
-<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Tal
-Remez, Chen Sun.
-
-### October 31, 2017
-
-We have released a new state-of-the-art model for object detection using the
-Faster-RCNN with the
-[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This model
-achieves mAP of 43.1% on the test-dev validation dataset for COCO, improving on
-the best available model in the zoo by 6% in terms of absolute mAP.
-
-<b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens,
-Quoc Le
-
-### August 11, 2017
+## Getting Help

-We have released an update to the
-[Android Detect demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
-which will now run models trained using the Tensorflow Object Detection API on
-an Android device. By default, it currently runs a frozen SSD w/Mobilenet
-detector trained on COCO, but we encourage you to try out other detection
-models!
+To get help with issues you may encounter using the TensorFlow Object Detection
+API, create a new question on [StackOverflow](https://stackoverflow.com/) with
+the tags "tensorflow" and "object-detection".

-<b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp
+Please report bugs (actually broken code, not usage questions) to the
+tensorflow/models GitHub
+[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
+issue name with "object_detection".

-### June 15, 2017
+Please check the [FAQ](g3doc/faq.md) for frequently asked questions before
+reporting an issue.

-In addition to our base Tensorflow detection model definitions, this release
-includes:
+## Maintainers

-*   A selection of trainable detection models, including:
-    *   Single Shot Multibox Detector (SSD) with MobileNet,
-    *   SSD with Inception V2,
-    *   Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
-    *   Faster RCNN with Resnet 101,
-    *   Faster RCNN with Inception Resnet v2
-*   Frozen weights (trained on the COCO dataset) for each of the above models to
-    be used for out-of-the-box inference purposes.
-*   A [Jupyter notebook](colab_tutorials/object_detection_tutorial.ipynb) for
-    performing out-of-the-box inference with one of our released models
-*   Convenient [local training](g3doc/running_locally.md) scripts as well as
-    distributed training and evaluation pipelines via
-    [Google Cloud](g3doc/running_on_cloud.md).
-
-<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow, Chen
-Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer,
-Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings, Viacheslav
-Kovalevskyi, Kevin Murphy
+* Jonathan Huang ([@GitHub jch1](https://github.com/jch1))
+* Vivek Rathod ([@GitHub tombstone](https://github.com/tombstone))
+* Vighnesh Birodkar ([@GitHub vighneshbirodkar](https://github.com/vighneshbirodkar))
+* Austin Myers ([@GitHub austin-myers](https://github.com/austin-myers))
+* Zhichao Lu ([@GitHub pkulzc](https://github.com/pkulzc))
+* Ronny Votel ([@GitHub ronnyvotel](https://github.com/ronnyvotel))
+* Yu-hui Chen ([@GitHub yuhuichen1015](https://github.com/yuhuichen1015))
+* Derek Chow  ([@GitHub derekjchow](https://github.com/derekjchow))
--- a/research/object_detection/builders/box_predictor_builder_test.py
+++ b/research/object_detection/builders/box_predictor_builder_test.py
@@ -17,9 +17,8 @@
 """Tests for box_predictor_builder."""

 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
-
 from google.protobuf import text_format
 from object_detection.builders import box_predictor_builder
 from object_detection.builders import hyperparams_builder

--- a/research/object_detection/builders/dataset_builder_test.py
+++ b/research/object_detection/builders/dataset_builder_test.py
@@ -390,7 +390,7 @@ class DatasetBuilderTest(test_case.TestCase):
      return iter1.get_next(), iter2.get_next()

    output_dict1, output_dict2 = self.execute(graph_fn, [])
-    self.assertAllEqual(['0'], output_dict1[fields.InputDataFields.source_id])
+    self.assertAllEqual([b'0'], output_dict1[fields.InputDataFields.source_id])
    self.assertEqual([b'1'], output_dict2[fields.InputDataFields.source_id])

  def test_sample_one_of_n_shards(self):

--- a/research/object_detection/builders/decoder_builder.py
+++ b/research/object_detection/builders/decoder_builder.py
@@ -58,7 +58,8 @@ def build(input_reader_config):
          use_display_name=input_reader_config.use_display_name,
          num_additional_channels=input_reader_config.num_additional_channels,
          num_keypoints=input_reader_config.num_keypoints,
-          expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy)
+          expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy,
+          load_dense_pose=input_reader_config.load_dense_pose)
      return decoder
    elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
      decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(

--- a/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
+++ b/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 """Tests for graph_rewriter_builder."""
 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
 import tf_slim as slim


--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -16,6 +16,7 @@
 """A function to build a DetectionModel from configuration."""

 import functools
+import sys
 from object_detection.builders import anchor_generator_builder
 from object_detection.builders import box_coder_builder
 from object_detection.builders import box_predictor_builder
@@ -38,6 +39,7 @@ from object_detection.protos import losses_pb2
 from object_detection.protos import model_pb2
 from object_detection.utils import label_map_util
 from object_detection.utils import ops
+from object_detection.utils import spatial_transform_ops as spatial_ops
 from object_detection.utils import tf_version

 ## Feature Extractors for TF
@@ -47,16 +49,20 @@ from object_detection.utils import tf_version
 # pylint: disable=g-import-not-at-top
 if tf_version.is_tf2():
  from object_detection.models import center_net_hourglass_feature_extractor
+  from object_detection.models import center_net_mobilenet_v2_feature_extractor
  from object_detection.models import center_net_resnet_feature_extractor
  from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
  from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
  from object_detection.models import faster_rcnn_resnet_keras_feature_extractor as frcnn_resnet_keras
  from object_detection.models import ssd_resnet_v1_fpn_keras_feature_extractor as ssd_resnet_v1_fpn_keras
+  from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_resnet_fpn_keras
  from object_detection.models.ssd_mobilenet_v1_fpn_keras_feature_extractor import SSDMobileNetV1FpnKerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v1_keras_feature_extractor import SSDMobileNetV1KerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v2_fpn_keras_feature_extractor import SSDMobileNetV2FpnKerasFeatureExtractor
  from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
  from object_detection.predictors import rfcn_keras_box_predictor
+  if sys.version_info[0] >= 3:
+    from object_detection.models import ssd_efficientnet_bifpn_feature_extractor as ssd_efficientnet_bifpn

 if tf_version.is_tf1():
  from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
@@ -98,6 +104,22 @@ if tf_version.is_tf2():
          ssd_resnet_v1_fpn_keras.SSDResNet101V1FpnKerasFeatureExtractor,
      'ssd_resnet152_v1_fpn_keras':
          ssd_resnet_v1_fpn_keras.SSDResNet152V1FpnKerasFeatureExtractor,
+      'ssd_efficientnet-b0_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB0BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b1_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB1BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b2_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB2BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b3_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB3BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b4_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB4BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b5_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB5BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b6_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB6BiFPNKerasFeatureExtractor,
+      'ssd_efficientnet-b7_bifpn_keras':
+          ssd_efficientnet_bifpn.SSDEfficientNetB7BiFPNKerasFeatureExtractor,
  }

  FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
@@ -109,16 +131,29 @@ if tf_version.is_tf2():
          frcnn_resnet_keras.FasterRCNNResnet152KerasFeatureExtractor,
      'faster_rcnn_inception_resnet_v2_keras':
      frcnn_inc_res_keras.FasterRCNNInceptionResnetV2KerasFeatureExtractor,
+      'faster_rcnn_resnet50_fpn_keras':
+          frcnn_resnet_fpn_keras.FasterRCNNResnet50FpnKerasFeatureExtractor,
+      'faster_rcnn_resnet101_fpn_keras':
+          frcnn_resnet_fpn_keras.FasterRCNNResnet101FpnKerasFeatureExtractor,
+      'faster_rcnn_resnet152_fpn_keras':
+          frcnn_resnet_fpn_keras.FasterRCNNResnet152FpnKerasFeatureExtractor,
  }

  CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
      'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
      'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
+      'resnet_v1_18_fpn':
+          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn,
+      'resnet_v1_34_fpn':
+          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_34_fpn,
      'resnet_v1_50_fpn':
          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn,
      'resnet_v1_101_fpn':
          center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn,
-      'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104,
+      'hourglass_104':
+          center_net_hourglass_feature_extractor.hourglass_104,
+      'mobilenet_v2':
+          center_net_mobilenet_v2_feature_extractor.mobilenet_v2,
  }

  FEATURE_EXTRACTOR_MAPS = [
@@ -303,6 +338,14 @@ def _build_ssd_feature_extractor(feature_extractor_config,
            feature_extractor_config.fpn.additional_layer_depth,
    })

+  if feature_extractor_config.HasField('bifpn'):
+    kwargs.update({
+        'bifpn_min_level': feature_extractor_config.bifpn.min_level,
+        'bifpn_max_level': feature_extractor_config.bifpn.max_level,
+        'bifpn_num_iterations': feature_extractor_config.bifpn.num_iterations,
+        'bifpn_num_filters': feature_extractor_config.bifpn.num_filters,
+        'bifpn_combine_method': feature_extractor_config.bifpn.combine_method,
+    })

  return feature_extractor_class(**kwargs)

@@ -614,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
        second_stage_localization_loss_weight)

  crop_and_resize_fn = (
-      ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
-      else ops.native_crop_and_resize)
+      spatial_ops.multilevel_matmul_crop_and_resize
+      if frcnn_config.use_matmul_crop_and_resize
+      else spatial_ops.multilevel_native_crop_and_resize)
  clip_anchors_to_image = (
      frcnn_config.clip_anchors_to_image)

@@ -836,6 +880,22 @@ def mask_proto_to_params(mask_config):
      heatmap_bias_init=mask_config.heatmap_bias_init)


+def densepose_proto_to_params(densepose_config):
+  """Converts CenterNet.DensePoseEstimation proto to parameter namedtuple."""
+  classification_loss, localization_loss, _, _, _, _, _ = (
+      losses_builder.build(densepose_config.loss))
+  return center_net_meta_arch.DensePoseParams(
+      class_id=densepose_config.class_id,
+      classification_loss=classification_loss,
+      localization_loss=localization_loss,
+      part_loss_weight=densepose_config.part_loss_weight,
+      coordinate_loss_weight=densepose_config.coordinate_loss_weight,
+      num_parts=densepose_config.num_parts,
+      task_loss_weight=densepose_config.task_loss_weight,
+      upsample_to_input_res=densepose_config.upsample_to_input_res,
+      heatmap_bias_init=densepose_config.heatmap_bias_init)
+
+
 def _build_center_net_model(center_net_config, is_training, add_summaries):
  """Build a CenterNet detection model.

@@ -888,6 +948,11 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
  if center_net_config.HasField('mask_estimation_task'):
    mask_params = mask_proto_to_params(center_net_config.mask_estimation_task)

+  densepose_params = None
+  if center_net_config.HasField('densepose_estimation_task'):
+    densepose_params = densepose_proto_to_params(
+        center_net_config.densepose_estimation_task)
+
  return center_net_meta_arch.CenterNetMetaArch(
      is_training=is_training,
      add_summaries=add_summaries,
@@ -897,7 +962,8 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
      object_center_params=object_center_params,
      object_detection_params=object_detection_params,
      keypoint_params_dict=keypoint_params_dict,
-      mask_params=mask_params)
+      mask_params=mask_params,
+      densepose_params=densepose_params)


 def _build_center_net_feature_extractor(

--- a/research/object_detection/builders/model_builder_test.py
+++ b/research/object_detection/builders/model_builder_test.py
@@ -39,6 +39,9 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
  def ssd_feature_extractors(self):
    raise NotImplementedError

+  def get_override_base_feature_extractor_hyperparams(self, extractor_type):
+    raise NotImplementedError
+
  def faster_rcnn_feature_extractors(self):
    raise NotImplementedError

@@ -70,7 +73,6 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
                }
              }
          }
-          override_base_feature_extractor_hyperparams: true
        }
        box_coder {
          faster_rcnn_box_coder {
@@ -205,6 +207,8 @@ class ModelBuilderTest(test_case.TestCase, parameterized.TestCase):
    for extractor_type, extractor_class in self.ssd_feature_extractors().items(
    ):
      model_proto.ssd.feature_extractor.type = extractor_type
+      model_proto.ssd.feature_extractor.override_base_feature_extractor_hyperparams = (
+          self.get_override_base_feature_extractor_hyperparams(extractor_type))
      model = model_builder.build(model_proto, is_training=True)
      self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
      self.assertIsInstance(model._feature_extractor, extractor_class)