Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

47bc1813 · syiming · d8611151 · b035a227 · 47bc1813 · 47bc1813
Commit 47bc1813 authored Jul 01, 2020 by syiming
20 changed files
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Beam pipeline to create COCO Camera Traps Object Detection TFRecords.
+
+Please note that this tool creates sharded output files.
+
+This tool assumes the input annotations are in the COCO Camera Traps json
+format, specified here:
+https://github.com/Microsoft/CameraTraps/blob/master/data_management/README.md
+
+Example usage:
+
+    python create_cococameratraps_tfexample_main.py \
+      --alsologtostderr \
+      --output_tfrecord_prefix="/path/to/output/tfrecord/location/prefix" \
+      --image_directory="/path/to/image/folder/" \
+      --input_annotations_file="path/to/annotations.json"
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import json
+import logging
+import os
+from absl import app
+from absl import flags
+import apache_beam as beam
+import numpy as np
+import PIL.Image
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+from object_detection.utils import dataset_util
+
+flags.DEFINE_string('image_directory', None, 'Directory where images are '
+                    'stored')
+flags.DEFINE_string('output_tfrecord_prefix', None,
+                    'TFRecord containing images in tf.Example format.')
+flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
+                    'style annotations file')
+flags.DEFINE_integer('num_images_per_shard',
+                     200,
+                     'The number of  images to be stored in each shard.')
+
+FLAGS = flags.FLAGS
+
+
+class ParseImage(beam.DoFn):
+  """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
+
+  def __init__(self, image_directory, images, annotations, categories,
+               keep_bboxes):
+    """Initialization function.
+
+    Args:
+      image_directory: Path to image directory
+      images: list of COCO Camera Traps style image dictionaries
+      annotations: list of COCO Camera Traps style annotation dictionaries
+      categories: list of COCO Camera Traps style category dictionaries
+      keep_bboxes: Whether to keep any bounding boxes that exist in the
+        annotations
+    """
+
+    self._image_directory = image_directory
+    self._image_dict = {im['id']: im for im in images}
+    self._annotation_dict = {im['id']: [] for im in images}
+    self._category_dict = {int(cat['id']): cat for cat in categories}
+    for ann in annotations:
+      self._annotation_dict[ann['image_id']].append(ann)
+    self._images = images
+    self._keep_bboxes = keep_bboxes
+
+    self._num_examples_processed = beam.metrics.Metrics.counter(
+        'cococameratraps_data_generation', 'num_tf_examples_processed')
+
+  def process(self, image_id):
+    """Builds a tf.Example given an image id.
+
+    Args:
+      image_id: the image id of the associated image
+
+    Returns:
+      List of tf.Examples.
+    """
+
+    image = self._image_dict[image_id]
+    annotations = self._annotation_dict[image_id]
+    image_height = image['height']
+    image_width = image['width']
+    filename = image['file_name']
+    image_id = image['id']
+    image_location_id = image['location']
+
+    image_datetime = str(image['date_captured'])
+
+    image_sequence_id = str(image['seq_id'])
+    image_sequence_num_frames = int(image['seq_num_frames'])
+    image_sequence_frame_num = int(image['frame_num'])
+
+    full_path = os.path.join(self._image_directory, filename)
+
+    try:
+      # Ensure the image exists and is not corrupted
+      with tf.io.gfile.GFile(full_path, 'rb') as fid:
+        encoded_jpg = fid.read()
+      encoded_jpg_io = io.BytesIO(encoded_jpg)
+      image = PIL.Image.open(encoded_jpg_io)
+      # Ensure the image can be read by tf
+      with tf.Graph().as_default():
+        image = tf.image.decode_jpeg(encoded_jpg, channels=3)
+        init_op = tf.initialize_all_tables()
+        with tf.Session() as sess:
+          sess.run(init_op)
+          sess.run(image)
+    except Exception as e:  # pylint: disable=broad-except
+      # The image file is missing or corrupt
+      tf.logging.error(str(e))
+      return []
+
+    key = hashlib.sha256(encoded_jpg).hexdigest()
+    feature_dict = {
+        'image/height':
+            dataset_util.int64_feature(image_height),
+        'image/width':
+            dataset_util.int64_feature(image_width),
+        'image/filename':
+            dataset_util.bytes_feature(filename.encode('utf8')),
+        'image/source_id':
+            dataset_util.bytes_feature(str(image_id).encode('utf8')),
+        'image/key/sha256':
+            dataset_util.bytes_feature(key.encode('utf8')),
+        'image/encoded':
+            dataset_util.bytes_feature(encoded_jpg),
+        'image/format':
+            dataset_util.bytes_feature('jpeg'.encode('utf8')),
+        'image/location':
+            dataset_util.bytes_feature(str(image_location_id).encode('utf8')),
+        'image/seq_num_frames':
+            dataset_util.int64_feature(image_sequence_num_frames),
+        'image/seq_frame_num':
+            dataset_util.int64_feature(image_sequence_frame_num),
+        'image/seq_id':
+            dataset_util.bytes_feature(image_sequence_id.encode('utf8')),
+        'image/date_captured':
+            dataset_util.bytes_feature(image_datetime.encode('utf8'))
+    }
+
+    num_annotations_skipped = 0
+    if annotations:
+      xmin = []
+      xmax = []
+      ymin = []
+      ymax = []
+      category_names = []
+      category_ids = []
+      area = []
+
+      for object_annotations in annotations:
+        if 'bbox' in object_annotations and self._keep_bboxes:
+          (x, y, width, height) = tuple(object_annotations['bbox'])
+          if width <= 0 or height <= 0:
+            num_annotations_skipped += 1
+            continue
+          if x + width > image_width or y + height > image_height:
+            num_annotations_skipped += 1
+            continue
+          xmin.append(float(x) / image_width)
+          xmax.append(float(x + width) / image_width)
+          ymin.append(float(y) / image_height)
+          ymax.append(float(y + height) / image_height)
+          if 'area' in object_annotations:
+            area.append(object_annotations['area'])
+          else:
+            # approximate area using l*w/2
+            area.append(width*height/2.0)
+
+        category_id = int(object_annotations['category_id'])
+        category_ids.append(category_id)
+        category_names.append(
+            self._category_dict[category_id]['name'].encode('utf8'))
+
+      feature_dict.update({
+          'image/object/bbox/xmin':
+              dataset_util.float_list_feature(xmin),
+          'image/object/bbox/xmax':
+              dataset_util.float_list_feature(xmax),
+          'image/object/bbox/ymin':
+              dataset_util.float_list_feature(ymin),
+          'image/object/bbox/ymax':
+              dataset_util.float_list_feature(ymax),
+          'image/object/class/text':
+              dataset_util.bytes_list_feature(category_names),
+          'image/object/class/label':
+              dataset_util.int64_list_feature(category_ids),
+          'image/object/area':
+              dataset_util.float_list_feature(area),
+      })
+
+      # For classification, add the first category to image/class/label and
+      # image/class/text
+      if not category_ids:
+        feature_dict.update({
+            'image/class/label':
+                dataset_util.int64_list_feature([0]),
+            'image/class/text':
+                dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
+        })
+      else:
+        feature_dict.update({
+            'image/class/label':
+                dataset_util.int64_list_feature([category_ids[0]]),
+            'image/class/text':
+                dataset_util.bytes_list_feature([category_names[0]]),
+        })
+
+    else:
+      # Add empty class if there are no annotations
+      feature_dict.update({
+          'image/class/label':
+              dataset_util.int64_list_feature([0]),
+          'image/class/text':
+              dataset_util.bytes_list_feature(['empty'.encode('utf8')]),
+      })
+
+    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+    self._num_examples_processed.inc(1)
+
+    return [(example)]
+
+
+def _load_json_data(data_file):
+  with tf.io.gfile.GFile(data_file, 'r') as fid:
+    data_dict = json.load(fid)
+  return data_dict
+
+
+def create_pipeline(image_directory,
+                    input_annotations_file,
+                    output_tfrecord_prefix=None,
+                    num_images_per_shard=200,
+                    keep_bboxes=True):
+  """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
+
+  Args:
+    image_directory: Path to image directory
+    input_annotations_file: Path to a coco-cameratraps annotation file
+    output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
+      be named {output_tfrecord_prefix}@N.
+    num_images_per_shard: The number of images to store in each shard
+    keep_bboxes: Whether to keep any bounding boxes that exist in the json file
+
+  Returns:
+    A Beam pipeline.
+  """
+
+  logging.info('Reading data from COCO-CameraTraps Dataset.')
+
+  data = _load_json_data(input_annotations_file)
+
+  num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
+
+  def pipeline(root):
+    """Builds beam pipeline."""
+
+    image_examples = (
+        root
+        | ('CreateCollections') >> beam.Create(
+            [im['id'] for im in data['images']])
+        | ('ParseImage') >> beam.ParDo(ParseImage(
+            image_directory, data['images'], data['annotations'],
+            data['categories'], keep_bboxes=keep_bboxes)))
+    _ = (image_examples
+         | ('Reshuffle') >> beam.Reshuffle()
+         | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
+             output_tfrecord_prefix,
+             num_shards=num_shards,
+             coder=beam.coders.ProtoCoder(tf.train.Example)))
+
+  return pipeline
+
+
+def main(_):
+  """Runs the Beam pipeline that performs inference.
+
+  Args:
+    _: unused
+  """
+
+  # must create before flags are used
+  runner = runners.DirectRunner()
+
+  dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
+  tf.io.gfile.makedirs(dirname)
+
+  runner.run(
+      create_pipeline(
+          image_directory=FLAGS.image_directory,
+          input_annotations_file=FLAGS.input_annotations_file,
+          output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
+          num_images_per_shard=FLAGS.num_images_per_shard))
+
+
+if __name__ == '__main__':
+  flags.mark_flags_as_required([
+      'image_directory',
+      'input_annotations_file',
+      'output_tfrecord_prefix'
+  ])
+  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for create_cococameratraps_tfexample_main."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import datetime
+import json
+import os
+import tempfile
+import unittest
+import numpy as np
+
+from PIL import Image
+import tensorflow.compat.v1 as tf
+from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
+
+  IMAGE_HEIGHT = 360
+  IMAGE_WIDTH = 480
+
+  def _write_random_images_to_directory(self, directory, num_frames):
+    for frame_num in range(num_frames):
+      img = np.random.randint(0, high=256,
+                              size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3),
+                              dtype=np.uint8)
+      pil_image = Image.fromarray(img)
+      fname = 'im_' + str(frame_num) + '.jpg'
+      pil_image.save(os.path.join(directory, fname), 'JPEG')
+
+  def _create_json_file(self, directory, num_frames, keep_bboxes=False):
+    json_dict = {'images': [], 'annotations': []}
+    json_dict['categories'] = [{'id': 0, 'name': 'empty'},
+                               {'id': 1, 'name': 'animal'}]
+    for idx in range(num_frames):
+      im = {'id': 'im_' + str(idx),
+            'file_name': 'im_' + str(idx) + '.jpg',
+            'height': self.IMAGE_HEIGHT,
+            'width': self.IMAGE_WIDTH,
+            'seq_id': 'seq_1',
+            'seq_num_frames': num_frames,
+            'frame_num': idx,
+            'location': 'loc_' + str(idx),
+            'date_captured': str(datetime.datetime.now())
+           }
+      json_dict['images'].append(im)
+      ann = {'id': 'ann' + str(idx),
+             'image_id': 'im_' + str(idx),
+             'category_id': 1,
+            }
+      if keep_bboxes:
+        ann['bbox'] = [0.0 * self.IMAGE_WIDTH,
+                       0.1 * self.IMAGE_HEIGHT,
+                       0.5 * self.IMAGE_WIDTH,
+                       0.5 * self.IMAGE_HEIGHT]
+      json_dict['annotations'].append(ann)
+
+    json_path = os.path.join(directory, 'test_file.json')
+    with tf.io.gfile.GFile(json_path, 'w') as f:
+      json.dump(json_dict, f)
+    return json_path
+
+  def assert_expected_example_bbox(self, example):
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.1])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.0])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.6])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.5])
+    self.assertAllClose(
+        example.features.feature['image/object/class/label']
+        .int64_list.value, [1])
+    self.assertAllEqual(
+        example.features.feature['image/object/class/text']
+        .bytes_list.value, [b'animal'])
+    self.assertAllClose(
+        example.features.feature['image/class/label']
+        .int64_list.value, [1])
+    self.assertAllEqual(
+        example.features.feature['image/class/text']
+        .bytes_list.value, [b'animal'])
+
+    # Check other essential attributes.
+    self.assertAllEqual(
+        example.features.feature['image/height'].int64_list.value,
+        [self.IMAGE_HEIGHT])
+    self.assertAllEqual(
+        example.features.feature['image/width'].int64_list.value,
+        [self.IMAGE_WIDTH])
+    self.assertAllEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [b'im_0'])
+    self.assertTrue(
+        example.features.feature['image/encoded'].bytes_list.value)
+
+  def assert_expected_example(self, example):
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [])
+    self.assertAllClose(
+        example.features.feature['image/object/class/label']
+        .int64_list.value, [1])
+    self.assertAllEqual(
+        example.features.feature['image/object/class/text']
+        .bytes_list.value, [b'animal'])
+    self.assertAllClose(
+        example.features.feature['image/class/label']
+        .int64_list.value, [1])
+    self.assertAllEqual(
+        example.features.feature['image/class/text']
+        .bytes_list.value, [b'animal'])
+
+    # Check other essential attributes.
+    self.assertAllEqual(
+        example.features.feature['image/height'].int64_list.value,
+        [self.IMAGE_HEIGHT])
+    self.assertAllEqual(
+        example.features.feature['image/width'].int64_list.value,
+        [self.IMAGE_WIDTH])
+    self.assertAllEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [b'im_0'])
+    self.assertTrue(
+        example.features.feature['image/encoded'].bytes_list.value)
+
+  def test_beam_pipeline(self):
+    runner = runners.DirectRunner()
+    num_frames = 1
+    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+    json_path = self._create_json_file(temp_dir, num_frames)
+    output_tfrecord = temp_dir+'/output'
+    self._write_random_images_to_directory(temp_dir, num_frames)
+    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+        temp_dir, json_path,
+        output_tfrecord_prefix=output_tfrecord)
+    runner.run(pipeline)
+    filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+    actual_output = []
+    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+    for record in record_iterator:
+      actual_output.append(record)
+    self.assertEqual(len(actual_output), num_frames)
+    self.assert_expected_example(tf.train.Example.FromString(
+        actual_output[0]))
+
+  def test_beam_pipeline_bbox(self):
+    runner = runners.DirectRunner()
+    num_frames = 1
+    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+    json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
+    output_tfrecord = temp_dir+'/output'
+    self._write_random_images_to_directory(temp_dir, num_frames)
+    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+        temp_dir, json_path,
+        output_tfrecord_prefix=output_tfrecord,
+        keep_bboxes=True)
+    runner.run(pipeline)
+    filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
+    actual_output = []
+    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+    for record in record_iterator:
+      actual_output.append(record)
+    self.assertEqual(len(actual_output), num_frames)
+    self.assert_expected_example_bbox(tf.train.Example.FromString(
+        actual_output[0]))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A Beam job to generate detection data for camera trap images.
+
+This tools allows to run inference with an exported Object Detection model in
+`saved_model` format and produce raw detection boxes on images in tf.Examples,
+with the assumption that the bounding box class label will match the image-level
+class label in the tf.Example.
+
+Steps to generate a detection dataset:
+1. Use object_detection/export_inference_graph.py to get a `saved_model` for
+  inference. The input node must accept a tf.Example proto.
+2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
+  protos containing images for inference.
+
+Example Usage:
+--------------
+python tensorflow_models/object_detection/export_inference_graph.py \
+    --alsologtostderr \
+    --input_type tf_example \
+    --pipeline_config_path path/to/detection_model.config \
+    --trained_checkpoint_prefix path/to/model.ckpt \
+    --output_directory path/to/exported_model_directory
+
+python generate_detection_data.py \
+    --alsologtostderr \
+    --input_tfrecord path/to/input_tfrecord@X \
+    --output_tfrecord path/to/output_tfrecord@X \
+    --model_dir path/to/exported_model_directory/saved_model
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+from absl import app
+from absl import flags
+import apache_beam as beam
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+
+
+flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
+                    'images in tf.Example format for object detection.')
+flags.DEFINE_string('detection_output_tfrecord', None,
+                    'TFRecord containing detections in tf.Example format.')
+flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
+                    'an object detection SavedModel.')
+flags.DEFINE_float('confidence_threshold', 0.9,
+                   'Min confidence to keep bounding boxes')
+flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
+
+FLAGS = flags.FLAGS
+
+
+class GenerateDetectionDataFn(beam.DoFn):
+  """Generates detection data for camera trap images.
+
+  This Beam DoFn performs inference with an object detection `saved_model` and
+  produces detection boxes for camera trap data, matched to the
+  object class.
+  """
+  session_lock = threading.Lock()
+
+  def __init__(self, model_dir, confidence_threshold):
+    """Initialization function.
+
+    Args:
+      model_dir: A directory containing saved model.
+      confidence_threshold: the confidence threshold for boxes to keep
+    """
+    self._model_dir = model_dir
+    self._confidence_threshold = confidence_threshold
+    self._session = None
+    self._num_examples_processed = beam.metrics.Metrics.counter(
+        'detection_data_generation', 'num_tf_examples_processed')
+
+  def start_bundle(self):
+    self._load_inference_model()
+
+  def _load_inference_model(self):
+    # Because initialization of the tf.Session is expensive we share
+    # one instance across all threads in the worker. This is possible since
+    # tf.Session.run() is thread safe.
+    with self.session_lock:
+      if self._session is None:
+        graph = tf.Graph()
+        self._session = tf.Session(graph=graph)
+        with graph.as_default():
+          meta_graph = tf.saved_model.loader.load(
+              self._session, [tf.saved_model.tag_constants.SERVING],
+              self._model_dir)
+        signature = meta_graph.signature_def['serving_default']
+        input_tensor_name = signature.inputs['inputs'].name
+        self._input = graph.get_tensor_by_name(input_tensor_name)
+        self._boxes_node = graph.get_tensor_by_name(
+            signature.outputs['detection_boxes'].name)
+        self._scores_node = graph.get_tensor_by_name(
+            signature.outputs['detection_scores'].name)
+        self._num_detections_node = graph.get_tensor_by_name(
+            signature.outputs['num_detections'].name)
+
+  def process(self, tfrecord_entry):
+    return self._run_inference_and_generate_detections(tfrecord_entry)
+
+  def _run_inference_and_generate_detections(self, tfrecord_entry):
+    input_example = tf.train.Example.FromString(tfrecord_entry)
+    if input_example.features.feature[
+        'image/object/bbox/ymin'].float_list.value:
+      # There are already ground truth boxes for this image, just keep them.
+      return [input_example]
+
+    detection_boxes, detection_scores, num_detections = self._session.run(
+        [self._boxes_node, self._scores_node, self._num_detections_node],
+        feed_dict={self._input: [tfrecord_entry]})
+
+    example = tf.train.Example()
+
+    num_detections = int(num_detections[0])
+
+    image_class_labels = input_example.features.feature[
+        'image/object/class/label'].int64_list.value
+
+    image_class_texts = input_example.features.feature[
+        'image/object/class/text'].bytes_list.value
+
+    # Ignore any images with multiple classes,
+    # we can't match the class to the box.
+    if len(image_class_labels) > 1:
+      return []
+
+    # Don't add boxes for images already labeled empty (for now)
+    if len(image_class_labels) == 1:
+      # Add boxes over confidence threshold.
+      for idx, score in enumerate(detection_scores[0]):
+        if score >= self._confidence_threshold and idx < num_detections:
+          example.features.feature[
+              'image/object/bbox/ymin'].float_list.value.extend([
+                  detection_boxes[0, idx, 0]])
+          example.features.feature[
+              'image/object/bbox/xmin'].float_list.value.extend([
+                  detection_boxes[0, idx, 1]])
+          example.features.feature[
+              'image/object/bbox/ymax'].float_list.value.extend([
+                  detection_boxes[0, idx, 2]])
+          example.features.feature[
+              'image/object/bbox/xmax'].float_list.value.extend([
+                  detection_boxes[0, idx, 3]])
+
+          # Add box scores and class texts and labels.
+          example.features.feature[
+              'image/object/class/score'].float_list.value.extend(
+                  [score])
+
+          example.features.feature[
+              'image/object/class/label'].int64_list.value.extend(
+                  [image_class_labels[0]])
+
+          example.features.feature[
+              'image/object/class/text'].bytes_list.value.extend(
+                  [image_class_texts[0]])
+
+    # Add other essential example attributes
+    example.features.feature['image/encoded'].bytes_list.value.extend(
+        input_example.features.feature['image/encoded'].bytes_list.value)
+    example.features.feature['image/height'].int64_list.value.extend(
+        input_example.features.feature['image/height'].int64_list.value)
+    example.features.feature['image/width'].int64_list.value.extend(
+        input_example.features.feature['image/width'].int64_list.value)
+    example.features.feature['image/source_id'].bytes_list.value.extend(
+        input_example.features.feature['image/source_id'].bytes_list.value)
+    example.features.feature['image/location'].bytes_list.value.extend(
+        input_example.features.feature['image/location'].bytes_list.value)
+
+    example.features.feature['image/date_captured'].bytes_list.value.extend(
+        input_example.features.feature['image/date_captured'].bytes_list.value)
+
+    example.features.feature['image/class/text'].bytes_list.value.extend(
+        input_example.features.feature['image/class/text'].bytes_list.value)
+    example.features.feature['image/class/label'].int64_list.value.extend(
+        input_example.features.feature['image/class/label'].int64_list.value)
+
+    example.features.feature['image/seq_id'].bytes_list.value.extend(
+        input_example.features.feature['image/seq_id'].bytes_list.value)
+    example.features.feature['image/seq_num_frames'].int64_list.value.extend(
+        input_example.features.feature['image/seq_num_frames'].int64_list.value)
+    example.features.feature['image/seq_frame_num'].int64_list.value.extend(
+        input_example.features.feature['image/seq_frame_num'].int64_list.value)
+
+    self._num_examples_processed.inc(1)
+    return [example]
+
+
+def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+                       confidence_threshold, num_shards):
+  """Returns a Beam pipeline to run object detection inference.
+
+  Args:
+    input_tfrecord: A TFRecord of tf.train.Example protos containing images.
+    output_tfrecord: A TFRecord of tf.train.Example protos that contain images
+      in the input TFRecord and the detections from the model.
+    model_dir: Path to `saved_model` to use for inference.
+    confidence_threshold: Threshold to use when keeping detection results.
+    num_shards: The number of output shards.
+  Returns:
+    pipeline: A Beam pipeline.
+  """
+  def pipeline(root):
+    input_collection = (
+        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+            input_tfrecord,
+            coder=beam.coders.BytesCoder()))
+    output_collection = input_collection | 'RunInference' >> beam.ParDo(
+        GenerateDetectionDataFn(model_dir, confidence_threshold))
+    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+        output_tfrecord,
+        num_shards=num_shards,
+        coder=beam.coders.ProtoCoder(tf.train.Example))
+  return pipeline
+
+
+def main(_):
+  """Runs the Beam pipeline that performs inference.
+
+  Args:
+    _: unused
+  """
+  # must create before flags are used
+  runner = runners.DirectRunner()
+
+  dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
+  tf.io.gfile.makedirs(dirname)
+  runner.run(
+      construct_pipeline(FLAGS.detection_input_tfrecord,
+                         FLAGS.detection_output_tfrecord,
+                         FLAGS.detection_model_dir,
+                         FLAGS.confidence_threshold,
+                         FLAGS.num_shards))
+
+
+if __name__ == '__main__':
+  flags.mark_flags_as_required([
+      'detection_input_tfrecord',
+      'detection_output_tfrecord',
+      'detection_model_dir'
+  ])
+  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for generate_detection_data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+import os
+import tempfile
+import unittest
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+
+from object_detection import exporter
+from object_detection.builders import model_builder
+from object_detection.core import model
+from object_detection.dataset_tools.context_rcnn import generate_detection_data
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+if six.PY2:
+  import mock  # pylint: disable=g-import-not-at-top
+else:
+  mock = unittest.mock
+
+
+class FakeModel(model.DetectionModel):
+  """A Fake Detection model with expected output nodes from post-processing."""
+
+  def preprocess(self, inputs):
+    true_image_shapes = []  # Doesn't matter for the fake model.
+    return tf.identity(inputs), true_image_shapes
+
+  def predict(self, preprocessed_inputs, true_image_shapes):
+    return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
+
+  def postprocess(self, prediction_dict, true_image_shapes):
+    with tf.control_dependencies(prediction_dict.values()):
+      postprocessed_tensors = {
+          'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
+                                           [0.5, 0.5, 0.8, 0.8]]], tf.float32),
+          'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
+          'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
+                                                       [0.3, 0.1, 0.6]]],
+                                                     tf.float32),
+          'detection_classes': tf.constant([[0, 1]], tf.float32),
+          'num_detections': tf.constant([2], tf.float32)
+      }
+    return postprocessed_tensors
+
+  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
+    pass
+
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
+  def loss(self, prediction_dict, true_image_shapes):
+    pass
+
+  def regularization_losses(self):
+    pass
+
+  def updates(self):
+    pass
+
+
+@contextlib.contextmanager
+def InMemoryTFRecord(entries):
+  temp = tempfile.NamedTemporaryFile(delete=False)
+  filename = temp.name
+  try:
+    with tf.python_io.TFRecordWriter(filename) as writer:
+      for value in entries:
+        writer.write(value)
+    yield filename
+  finally:
+    os.unlink(filename)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class GenerateDetectionDataTest(tf.test.TestCase):
+
+  def _save_checkpoint_from_mock_model(self, checkpoint_path):
+    """A function to save checkpoint from a fake Detection Model.
+
+    Args:
+      checkpoint_path: Path to save checkpoint from Fake model.
+    """
+    g = tf.Graph()
+    with g.as_default():
+      mock_model = FakeModel(num_classes=5)
+      preprocessed_inputs, true_image_shapes = mock_model.preprocess(
+          tf.placeholder(tf.float32, shape=[None, None, None, 3]))
+      predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+      mock_model.postprocess(predictions, true_image_shapes)
+      tf.train.get_or_create_global_step()
+      saver = tf.train.Saver()
+      init = tf.global_variables_initializer()
+      with self.test_session(graph=g) as sess:
+        sess.run(init)
+        saver.save(sess, checkpoint_path)
+
+  def _export_saved_model(self):
+    tmp_dir = self.get_temp_dir()
+    checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
+    self._save_checkpoint_from_mock_model(checkpoint_path)
+    output_directory = os.path.join(tmp_dir, 'output')
+    saved_model_path = os.path.join(output_directory, 'saved_model')
+    tf.io.gfile.makedirs(output_directory)
+    with mock.patch.object(
+        model_builder, 'build', autospec=True) as mock_builder:
+      mock_builder.return_value = FakeModel(num_classes=5)
+      pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+      pipeline_config.eval_config.use_moving_averages = False
+      detection_model = model_builder.build(pipeline_config.model,
+                                            is_training=False)
+      outputs, placeholder_tensor = exporter.build_detection_graph(
+          input_type='tf_example',
+          detection_model=detection_model,
+          input_shape=None,
+          output_collection_name='inference_op',
+          graph_hook_fn=None)
+      output_node_names = ','.join(outputs.keys())
+      saver = tf.train.Saver()
+      input_saver_def = saver.as_saver_def()
+      frozen_graph_def = exporter.freeze_graph_with_def_protos(
+          input_graph_def=tf.get_default_graph().as_graph_def(),
+          input_saver_def=input_saver_def,
+          input_checkpoint=checkpoint_path,
+          output_node_names=output_node_names,
+          restore_op_name='save/restore_all',
+          filename_tensor_name='save/Const:0',
+          output_graph='',
+          clear_devices=True,
+          initializer_nodes='')
+      exporter.write_saved_model(
+          saved_model_path=saved_model_path,
+          frozen_graph_def=frozen_graph_def,
+          inputs=placeholder_tensor,
+          outputs=outputs)
+      return saved_model_path
+
+  def _create_tf_example(self):
+    with self.test_session():
+      encoded_image = tf.image.encode_jpeg(
+          tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).eval()
+
+    def BytesFeature(value):
+      return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def Int64Feature(value):
+      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': BytesFeature(encoded_image),
+        'image/source_id': BytesFeature(b'image_id'),
+        'image/height': Int64Feature(4),
+        'image/width': Int64Feature(6),
+        'image/object/class/label': Int64Feature(5),
+        'image/object/class/text': BytesFeature(b'hyena'),
+        'image/class/label': Int64Feature(5),
+        'image/class/text': BytesFeature(b'hyena'),
+    }))
+
+    return example.SerializeToString()
+
+  def assert_expected_example(self, example):
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.0])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.1])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.5])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.6])
+    self.assertAllClose(
+        example.features.feature['image/object/class/score']
+        .float_list.value, [0.95])
+    self.assertAllClose(
+        example.features.feature['image/object/class/label']
+        .int64_list.value, [5])
+    self.assertAllEqual(
+        example.features.feature['image/object/class/text']
+        .bytes_list.value, [b'hyena'])
+    self.assertAllClose(
+        example.features.feature['image/class/label']
+        .int64_list.value, [5])
+    self.assertAllEqual(
+        example.features.feature['image/class/text']
+        .bytes_list.value, [b'hyena'])
+
+    # Check other essential attributes.
+    self.assertAllEqual(
+        example.features.feature['image/height'].int64_list.value, [4])
+    self.assertAllEqual(
+        example.features.feature['image/width'].int64_list.value, [6])
+    self.assertAllEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [b'image_id'])
+    self.assertTrue(
+        example.features.feature['image/encoded'].bytes_list.value)
+
+  def test_generate_detection_data_fn(self):
+    saved_model_path = self._export_saved_model()
+    confidence_threshold = 0.8
+    inference_fn = generate_detection_data.GenerateDetectionDataFn(
+        saved_model_path, confidence_threshold)
+    inference_fn.start_bundle()
+    generated_example = self._create_tf_example()
+    self.assertAllEqual(tf.train.Example.FromString(
+        generated_example).features.feature['image/object/class/label']
+                        .int64_list.value, [5])
+    self.assertAllEqual(tf.train.Example.FromString(
+        generated_example).features.feature['image/object/class/text']
+                        .bytes_list.value, [b'hyena'])
+    output = inference_fn.process(generated_example)
+    output_example = output[0]
+
+    self.assertAllEqual(
+        output_example.features.feature['image/object/class/label']
+        .int64_list.value, [5])
+    self.assertAllEqual(output_example.features.feature['image/width']
+                        .int64_list.value, [6])
+
+    self.assert_expected_example(output_example)
+
+  def test_beam_pipeline(self):
+    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
+      runner = runners.DirectRunner()
+      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+      saved_model_path = self._export_saved_model()
+      confidence_threshold = 0.8
+      num_shards = 1
+      pipeline = generate_detection_data.construct_pipeline(
+          input_tfrecord, output_tfrecord, saved_model_path,
+          confidence_threshold, num_shards)
+      runner.run(pipeline)
+      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
+      actual_output = []
+      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+      for record in record_iterator:
+        actual_output.append(record)
+      self.assertEqual(len(actual_output), 1)
+      self.assert_expected_example(tf.train.Example.FromString(
+          actual_output[0]))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""A Beam job to generate embedding data for camera trap images.
+
+This tool runs inference with an exported Object Detection model in
+`saved_model` format and produce raw embeddings for camera trap data. These
+embeddings contain an object-centric feature embedding from Faster R-CNN, the
+datetime that the image was taken (normalized in a specific way), and the
+position of the object of interest. By default, only the highest-scoring object
+embedding is included.
+
+Steps to generate a embedding dataset:
+1. Use object_detection/export_inference_graph.py to get a Faster R-CNN
+  `saved_model` for inference. The input node must accept a tf.Example proto.
+2. Run this tool with `saved_model` from step 1 and an TFRecord of tf.Example
+  protos containing images for inference.
+
+Example Usage:
+--------------
+python tensorflow_models/object_detection/export_inference_graph.py \
+    --alsologtostderr \
+    --input_type tf_example \
+    --pipeline_config_path path/to/faster_rcnn_model.config \
+    --trained_checkpoint_prefix path/to/model.ckpt \
+    --output_directory path/to/exported_model_directory
+
+python generate_embedding_data.py \
+    --alsologtostderr \
+    --embedding_input_tfrecord path/to/input_tfrecords* \
+    --embedding_output_tfrecord path/to/output_tfrecords \
+    --embedding_model_dir path/to/exported_model_directory/saved_model
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+import os
+import threading
+from absl import app
+from absl import flags
+import apache_beam as beam
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+from apache_beam import runners
+
+flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
+                    'images in tf.Example format for object detection.')
+flags.DEFINE_string('embedding_output_tfrecord', None,
+                    'TFRecord containing embeddings in tf.Example format.')
+flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
+                    'an object detection SavedModel with'
+                    'detection_box_classifier_features in the output.')
+flags.DEFINE_integer('top_k_embedding_count', 1,
+                     'The number of top k embeddings to add to the memory bank.'
+                    )
+flags.DEFINE_integer('bottom_k_embedding_count', 0,
+                     'The number of bottom k embeddings to add to the memory '
+                     'bank.')
+flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
+
+
+FLAGS = flags.FLAGS
+
+
+class GenerateEmbeddingDataFn(beam.DoFn):
+  """Generates embedding data for camera trap images.
+
+  This Beam DoFn performs inference with an object detection `saved_model` and
+  produces contextual embedding vectors.
+  """
+  session_lock = threading.Lock()
+
+  def __init__(self, model_dir, top_k_embedding_count,
+               bottom_k_embedding_count):
+    """Initialization function.
+
+    Args:
+      model_dir: A directory containing saved model.
+      top_k_embedding_count: the number of high-confidence embeddings to store
+      bottom_k_embedding_count: the number of low-confidence embeddings to store
+    """
+    self._model_dir = model_dir
+    self._session = None
+    self._num_examples_processed = beam.metrics.Metrics.counter(
+        'embedding_data_generation', 'num_tf_examples_processed')
+    self._top_k_embedding_count = top_k_embedding_count
+    self._bottom_k_embedding_count = bottom_k_embedding_count
+
+  def start_bundle(self):
+    self._load_inference_model()
+
+  def _load_inference_model(self):
+    # Because initialization of the tf.Session is expensive we share
+    # one instance across all threads in the worker. This is possible since
+    # tf.Session.run() is thread safe.
+    with self.session_lock:
+      if self._session is None:
+        graph = tf.Graph()
+        self._session = tf.Session(graph=graph)
+        with graph.as_default():
+          meta_graph = tf.saved_model.loader.load(
+              self._session, [tf.saved_model.tag_constants.SERVING],
+              self._model_dir)
+        signature = meta_graph.signature_def['serving_default']
+        input_tensor_name = signature.inputs['inputs'].name
+        detection_features_name = signature.outputs['detection_features'].name
+        detection_boxes_name = signature.outputs['detection_boxes'].name
+        num_detections_name = signature.outputs['num_detections'].name
+        self._input = graph.get_tensor_by_name(input_tensor_name)
+        self._embedding_node = graph.get_tensor_by_name(detection_features_name)
+        self._box_node = graph.get_tensor_by_name(detection_boxes_name)
+        self._scores_node = graph.get_tensor_by_name(
+            signature.outputs['detection_scores'].name)
+        self._num_detections = graph.get_tensor_by_name(num_detections_name)
+        tf.logging.info(signature.outputs['detection_features'].name)
+        tf.logging.info(signature.outputs['detection_boxes'].name)
+        tf.logging.info(signature.outputs['num_detections'].name)
+
+  def process(self, tfrecord_entry):
+    return self._run_inference_and_generate_embedding(tfrecord_entry)
+
+  def _run_inference_and_generate_embedding(self, tfrecord_entry):
+    input_example = tf.train.Example.FromString(tfrecord_entry)
+    # Convert date_captured datetime string to unix time integer and store
+
+    def get_date_captured(example):
+      date_captured = datetime.datetime.strptime(
+          six.ensure_str(
+              example.features.feature[
+                  'image/date_captured'].bytes_list.value[0]),
+          '%Y-%m-%d %H:%M:%S')
+      return date_captured
+
+    try:
+      date_captured = get_date_captured(input_example)
+    except Exception:  # pylint: disable=broad-except
+      # we require date_captured to be available for all images
+      return []
+
+    def embed_date_captured(date_captured):
+      """Encodes the datetime of the image."""
+      embedded_date_captured = []
+      month_max = 12.0
+      day_max = 31.0
+      hour_max = 24.0
+      minute_max = 60.0
+      min_year = 1990.0
+      max_year = 2030.0
+
+      year = (date_captured.year-min_year)/float(max_year-min_year)
+      embedded_date_captured.append(year)
+
+      month = (date_captured.month-1)/month_max
+      embedded_date_captured.append(month)
+
+      day = (date_captured.day-1)/day_max
+      embedded_date_captured.append(day)
+
+      hour = date_captured.hour/hour_max
+      embedded_date_captured.append(hour)
+
+      minute = date_captured.minute/minute_max
+      embedded_date_captured.append(minute)
+
+      return np.asarray(embedded_date_captured)
+
+    def embed_position_and_size(box):
+      """Encodes the bounding box of the object of interest."""
+      ymin = box[0]
+      xmin = box[1]
+      ymax = box[2]
+      xmax = box[3]
+      w = xmax - xmin
+      h = ymax - ymin
+      x = xmin + w / 2.0
+      y = ymin + h / 2.0
+      return np.asarray([x, y, w, h])
+
+    unix_time = (
+        (date_captured - datetime.datetime.fromtimestamp(0)).total_seconds())
+
+    example = tf.train.Example()
+    example.features.feature['image/unix_time'].float_list.value.extend(
+        [unix_time])
+
+    (detection_features, detection_boxes, num_detections,
+     detection_scores) = self._session.run(
+         [
+             self._embedding_node, self._box_node, self._num_detections[0],
+             self._scores_node
+         ],
+         feed_dict={self._input: [tfrecord_entry]})
+
+    num_detections = int(num_detections)
+    embed_all = []
+    score_all = []
+
+    detection_features = np.asarray(detection_features)
+
+    def get_bb_embedding(detection_features, detection_boxes, detection_scores,
+                         index):
+      embedding = detection_features[0][index]
+      pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0)
+
+      box = detection_boxes[0][index]
+      position_embedding = embed_position_and_size(box)
+
+      score = detection_scores[0][index]
+      return np.concatenate((pooled_embedding, position_embedding)), score
+
+    temporal_embedding = embed_date_captured(date_captured)
+
+    embedding_count = 0
+    for index in range(min(num_detections, self._top_k_embedding_count)):
+      bb_embedding, score = get_bb_embedding(
+          detection_features, detection_boxes, detection_scores, index)
+      embed_all.extend(bb_embedding)
+      embed_all.extend(temporal_embedding)
+      score_all.append(score)
+      embedding_count += 1
+
+    for index in range(
+        max(0, num_detections - 1),
+        max(-1, num_detections - 1 - self._bottom_k_embedding_count), -1):
+      bb_embedding, score = get_bb_embedding(
+          detection_features, detection_boxes, detection_scores, index)
+      embed_all.extend(bb_embedding)
+      embed_all.extend(temporal_embedding)
+      score_all.append(score)
+      embedding_count += 1
+
+    if embedding_count == 0:
+      bb_embedding, score = get_bb_embedding(
+          detection_features, detection_boxes, detection_scores, 0)
+      embed_all.extend(bb_embedding)
+      embed_all.extend(temporal_embedding)
+      score_all.append(score)
+
+    # Takes max in case embedding_count is 0.
+    embedding_length = len(embed_all) // max(1, embedding_count)
+
+    embed_all = np.asarray(embed_all)
+
+    example.features.feature['image/embedding'].float_list.value.extend(
+        embed_all)
+    example.features.feature['image/embedding_score'].float_list.value.extend(
+        score_all)
+    example.features.feature['image/embedding_length'].int64_list.value.append(
+        embedding_length)
+    example.features.feature['image/embedding_count'].int64_list.value.append(
+        embedding_count)
+
+    # Add other essential example attributes
+    example.features.feature['image/encoded'].bytes_list.value.extend(
+        input_example.features.feature['image/encoded'].bytes_list.value)
+    example.features.feature['image/height'].int64_list.value.extend(
+        input_example.features.feature['image/height'].int64_list.value)
+    example.features.feature['image/width'].int64_list.value.extend(
+        input_example.features.feature['image/width'].int64_list.value)
+    example.features.feature['image/source_id'].bytes_list.value.extend(
+        input_example.features.feature['image/source_id'].bytes_list.value)
+    example.features.feature['image/location'].bytes_list.value.extend(
+        input_example.features.feature['image/location'].bytes_list.value)
+
+    example.features.feature['image/date_captured'].bytes_list.value.extend(
+        input_example.features.feature['image/date_captured'].bytes_list.value)
+
+    example.features.feature['image/class/text'].bytes_list.value.extend(
+        input_example.features.feature['image/class/text'].bytes_list.value)
+    example.features.feature['image/class/label'].int64_list.value.extend(
+        input_example.features.feature['image/class/label'].int64_list.value)
+
+    example.features.feature['image/seq_id'].bytes_list.value.extend(
+        input_example.features.feature['image/seq_id'].bytes_list.value)
+    example.features.feature['image/seq_num_frames'].int64_list.value.extend(
+        input_example.features.feature['image/seq_num_frames'].int64_list.value)
+    example.features.feature['image/seq_frame_num'].int64_list.value.extend(
+        input_example.features.feature['image/seq_frame_num'].int64_list.value)
+
+    example.features.feature['image/object/bbox/ymax'].float_list.value.extend(
+        input_example.features.feature[
+            'image/object/bbox/ymax'].float_list.value)
+    example.features.feature['image/object/bbox/ymin'].float_list.value.extend(
+        input_example.features.feature[
+            'image/object/bbox/ymin'].float_list.value)
+    example.features.feature['image/object/bbox/xmax'].float_list.value.extend(
+        input_example.features.feature[
+            'image/object/bbox/xmax'].float_list.value)
+    example.features.feature['image/object/bbox/xmin'].float_list.value.extend(
+        input_example.features.feature[
+            'image/object/bbox/xmin'].float_list.value)
+    example.features.feature[
+        'image/object/class/score'].float_list.value.extend(
+            input_example.features.feature[
+                'image/object/class/score'].float_list.value)
+    example.features.feature[
+        'image/object/class/label'].int64_list.value.extend(
+            input_example.features.feature[
+                'image/object/class/label'].int64_list.value)
+    example.features.feature[
+        'image/object/class/text'].bytes_list.value.extend(
+            input_example.features.feature[
+                'image/object/class/text'].bytes_list.value)
+
+    self._num_examples_processed.inc(1)
+    return [example]
+
+
+def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+                       top_k_embedding_count, bottom_k_embedding_count,
+                       num_shards):
+  """Returns a beam pipeline to run object detection inference.
+
+  Args:
+    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
+    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
+      in the input TFRecord and the detections from the model.
+    model_dir: Path to `saved_model` to use for inference.
+    top_k_embedding_count: The number of high-confidence embeddings to store.
+    bottom_k_embedding_count: The number of low-confidence embeddings to store.
+    num_shards: The number of output shards.
+  """
+  def pipeline(root):
+    input_collection = (
+        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+            input_tfrecord,
+            coder=beam.coders.BytesCoder()))
+    output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
+        GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
+                                bottom_k_embedding_count))
+    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+        output_tfrecord,
+        num_shards=num_shards,
+        coder=beam.coders.ProtoCoder(tf.train.Example))
+  return pipeline
+
+
+def main(_):
+  """Runs the Beam pipeline that performs inference.
+
+  Args:
+    _: unused
+  """
+  # must create before flags are used
+  runner = runners.DirectRunner()
+
+  dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
+  tf.io.gfile.makedirs(dirname)
+  runner.run(
+      construct_pipeline(FLAGS.embedding_input_tfrecord,
+                         FLAGS.embedding_output_tfrecord,
+                         FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
+                         FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
+
+
+if __name__ == '__main__':
+  flags.mark_flags_as_required([
+      'embedding_input_tfrecord',
+      'embedding_output_tfrecord',
+      'embedding_model_dir'
+  ])
+  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for generate_embedding_data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import contextlib
+import os
+import tempfile
+import unittest
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+from object_detection import exporter
+from object_detection.builders import model_builder
+from object_detection.core import model
+from object_detection.dataset_tools.context_rcnn import generate_embedding_data
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import tf_version
+from apache_beam import runners
+
+if six.PY2:
+  import mock  # pylint: disable=g-import-not-at-top
+else:
+  mock = unittest.mock
+
+
+class FakeModel(model.DetectionModel):
+  """A Fake Detection model with expected output nodes from post-processing."""
+
+  def preprocess(self, inputs):
+    true_image_shapes = []  # Doesn't matter for the fake model.
+    return tf.identity(inputs), true_image_shapes
+
+  def predict(self, preprocessed_inputs, true_image_shapes):
+    return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
+
+  def postprocess(self, prediction_dict, true_image_shapes):
+    with tf.control_dependencies(prediction_dict.values()):
+      num_features = 100
+      feature_dims = 10
+      classifier_feature = np.ones(
+          (2, feature_dims, feature_dims, num_features),
+          dtype=np.float32).tolist()
+      postprocessed_tensors = {
+          'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6],
+                                           [0.5, 0.5, 0.8, 0.8]]], tf.float32),
+          'detection_scores': tf.constant([[0.95, 0.6]], tf.float32),
+          'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2],
+                                                       [0.3, 0.1, 0.6]]],
+                                                     tf.float32),
+          'detection_classes': tf.constant([[0, 1]], tf.float32),
+          'num_detections': tf.constant([2], tf.float32),
+          'detection_features':
+              tf.constant([classifier_feature],
+                          tf.float32)
+      }
+    return postprocessed_tensors
+
+  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
+    pass
+
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
+  def loss(self, prediction_dict, true_image_shapes):
+    pass
+
+  def regularization_losses(self):
+    pass
+
+  def updates(self):
+    pass
+
+
+@contextlib.contextmanager
+def InMemoryTFRecord(entries):
+  temp = tempfile.NamedTemporaryFile(delete=False)
+  filename = temp.name
+  try:
+    with tf.python_io.TFRecordWriter(filename) as writer:
+      for value in entries:
+        writer.write(value)
+    yield filename
+  finally:
+    os.unlink(temp.name)
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class GenerateEmbeddingData(tf.test.TestCase):
+
+  def _save_checkpoint_from_mock_model(self, checkpoint_path):
+    """A function to save checkpoint from a fake Detection Model.
+
+    Args:
+      checkpoint_path: Path to save checkpoint from Fake model.
+    """
+    g = tf.Graph()
+    with g.as_default():
+      mock_model = FakeModel(num_classes=5)
+      preprocessed_inputs, true_image_shapes = mock_model.preprocess(
+          tf.placeholder(tf.float32, shape=[None, None, None, 3]))
+      predictions = mock_model.predict(preprocessed_inputs, true_image_shapes)
+      mock_model.postprocess(predictions, true_image_shapes)
+      tf.train.get_or_create_global_step()
+      saver = tf.train.Saver()
+      init = tf.global_variables_initializer()
+      with self.test_session(graph=g) as sess:
+        sess.run(init)
+        saver.save(sess, checkpoint_path)
+
+  def _export_saved_model(self):
+    tmp_dir = self.get_temp_dir()
+    checkpoint_path = os.path.join(tmp_dir, 'model.ckpt')
+    self._save_checkpoint_from_mock_model(checkpoint_path)
+    output_directory = os.path.join(tmp_dir, 'output')
+    saved_model_path = os.path.join(output_directory, 'saved_model')
+    tf.io.gfile.makedirs(output_directory)
+    with mock.patch.object(
+        model_builder, 'build', autospec=True) as mock_builder:
+      mock_builder.return_value = FakeModel(num_classes=5)
+      pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+      pipeline_config.eval_config.use_moving_averages = False
+      detection_model = model_builder.build(pipeline_config.model,
+                                            is_training=False)
+      outputs, placeholder_tensor = exporter.build_detection_graph(
+          input_type='tf_example',
+          detection_model=detection_model,
+          input_shape=None,
+          output_collection_name='inference_op',
+          graph_hook_fn=None)
+      output_node_names = ','.join(outputs.keys())
+      saver = tf.train.Saver()
+      input_saver_def = saver.as_saver_def()
+      frozen_graph_def = exporter.freeze_graph_with_def_protos(
+          input_graph_def=tf.get_default_graph().as_graph_def(),
+          input_saver_def=input_saver_def,
+          input_checkpoint=checkpoint_path,
+          output_node_names=output_node_names,
+          restore_op_name='save/restore_all',
+          filename_tensor_name='save/Const:0',
+          output_graph='',
+          clear_devices=True,
+          initializer_nodes='')
+      exporter.write_saved_model(
+          saved_model_path=saved_model_path,
+          frozen_graph_def=frozen_graph_def,
+          inputs=placeholder_tensor,
+          outputs=outputs)
+      return saved_model_path
+
+  def _create_tf_example(self):
+    with self.test_session():
+      encoded_image = tf.image.encode_jpeg(
+          tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval()
+
+    def BytesFeature(value):
+      return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def Int64Feature(value):
+      return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    def FloatFeature(value):
+      return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded': BytesFeature(encoded_image),
+                'image/source_id': BytesFeature(b'image_id'),
+                'image/height': Int64Feature(400),
+                'image/width': Int64Feature(600),
+                'image/class/label': Int64Feature(5),
+                'image/class/text': BytesFeature(b'hyena'),
+                'image/object/bbox/xmin': FloatFeature(0.1),
+                'image/object/bbox/xmax': FloatFeature(0.6),
+                'image/object/bbox/ymin': FloatFeature(0.0),
+                'image/object/bbox/ymax': FloatFeature(0.5),
+                'image/object/class/score': FloatFeature(0.95),
+                'image/object/class/label': Int64Feature(5),
+                'image/object/class/text': BytesFeature(b'hyena'),
+                'image/date_captured': BytesFeature(b'2019-10-20 12:12:12')
+            }))
+
+    return example.SerializeToString()
+
+  def assert_expected_example(self, example, topk=False, botk=False):
+    # Check embeddings
+    if topk or botk:
+      self.assertEqual(len(
+          example.features.feature['image/embedding'].float_list.value),
+                       218)
+      self.assertAllEqual(
+          example.features.feature['image/embedding_count'].int64_list.value,
+          [2])
+    else:
+      self.assertEqual(len(
+          example.features.feature['image/embedding'].float_list.value),
+                       109)
+      self.assertAllEqual(
+          example.features.feature['image/embedding_count'].int64_list.value,
+          [1])
+
+    self.assertAllEqual(
+        example.features.feature['image/embedding_length'].int64_list.value,
+        [109])
+
+    # Check annotations
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.0])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.1])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.5])
+    self.assertAllClose(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.6])
+    self.assertAllClose(
+        example.features.feature['image/object/class/score']
+        .float_list.value, [0.95])
+    self.assertAllClose(
+        example.features.feature['image/object/class/label']
+        .int64_list.value, [5])
+    self.assertAllEqual(
+        example.features.feature['image/object/class/text']
+        .bytes_list.value, [b'hyena'])
+    self.assertAllClose(
+        example.features.feature['image/class/label']
+        .int64_list.value, [5])
+    self.assertAllEqual(
+        example.features.feature['image/class/text']
+        .bytes_list.value, [b'hyena'])
+
+    # Check other essential attributes.
+    self.assertAllEqual(
+        example.features.feature['image/height'].int64_list.value, [400])
+    self.assertAllEqual(
+        example.features.feature['image/width'].int64_list.value, [600])
+    self.assertAllEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [b'image_id'])
+    self.assertTrue(
+        example.features.feature['image/encoded'].bytes_list.value)
+
+  def test_generate_embedding_data_fn(self):
+    saved_model_path = self._export_saved_model()
+    top_k_embedding_count = 1
+    bottom_k_embedding_count = 0
+    inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+        saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+    inference_fn.start_bundle()
+    generated_example = self._create_tf_example()
+    self.assertAllEqual(tf.train.Example.FromString(
+        generated_example).features.feature['image/object/class/label']
+                        .int64_list.value, [5])
+    self.assertAllEqual(tf.train.Example.FromString(
+        generated_example).features.feature['image/object/class/text']
+                        .bytes_list.value, [b'hyena'])
+    output = inference_fn.process(generated_example)
+    output_example = output[0]
+    self.assert_expected_example(output_example)
+
+  def test_generate_embedding_data_with_top_k_boxes(self):
+    saved_model_path = self._export_saved_model()
+    top_k_embedding_count = 2
+    bottom_k_embedding_count = 0
+    inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+        saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+    inference_fn.start_bundle()
+    generated_example = self._create_tf_example()
+    self.assertAllEqual(
+        tf.train.Example.FromString(generated_example).features
+        .feature['image/object/class/label'].int64_list.value, [5])
+    self.assertAllEqual(
+        tf.train.Example.FromString(generated_example).features
+        .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
+    output = inference_fn.process(generated_example)
+    output_example = output[0]
+    self.assert_expected_example(output_example, topk=True)
+
+  def test_generate_embedding_data_with_bottom_k_boxes(self):
+    saved_model_path = self._export_saved_model()
+    top_k_embedding_count = 0
+    bottom_k_embedding_count = 2
+    inference_fn = generate_embedding_data.GenerateEmbeddingDataFn(
+        saved_model_path, top_k_embedding_count, bottom_k_embedding_count)
+    inference_fn.start_bundle()
+    generated_example = self._create_tf_example()
+    self.assertAllEqual(
+        tf.train.Example.FromString(generated_example).features
+        .feature['image/object/class/label'].int64_list.value, [5])
+    self.assertAllEqual(
+        tf.train.Example.FromString(generated_example).features
+        .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
+    output = inference_fn.process(generated_example)
+    output_example = output[0]
+    self.assert_expected_example(output_example, botk=True)
+
+  def test_beam_pipeline(self):
+    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
+      runner = runners.DirectRunner()
+      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
+      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
+      saved_model_path = self._export_saved_model()
+      top_k_embedding_count = 1
+      bottom_k_embedding_count = 0
+      num_shards = 1
+      pipeline = generate_embedding_data.construct_pipeline(
+          input_tfrecord, output_tfrecord, saved_model_path,
+          top_k_embedding_count, bottom_k_embedding_count, num_shards)
+      runner.run(pipeline)
+      filenames = tf.io.gfile.glob(
+          output_tfrecord + '-?????-of-?????')
+      actual_output = []
+      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
+      for record in record_iterator:
+        actual_output.append(record)
+      self.assertEqual(len(actual_output), 1)
+      self.assert_expected_example(tf.train.Example.FromString(
+          actual_output[0]))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 r"""Convert raw COCO dataset to TFRecord for object_detection.

+This tool supports data generation for object detection (boxes, masks),
+keypoint detection, and DensePose.
+
 Please note that this tool creates sharded output files.

 Example usage:
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
                       'Training annotations JSON file.')
 tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
                       'Validation annotations JSON file.')
+# DensePose is only available for coco 2014.
+tf.flags.DEFINE_string('train_densepose_annotations_file', '',
+                       'Training annotations JSON file for DensePose.')
+tf.flags.DEFINE_string('val_densepose_annotations_file', '',
+                       'Validation annotations JSON file for DensePose.')
 tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+# Whether to only produce images/annotations on person class (for keypoint /
+# densepose task).
+tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
+                        'remove all annotations for non-person objects.')
+tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
+                        'remove all examples that do not contain a person.')

 FLAGS = flags.FLAGS

@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
    b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
 ]

+_COCO_PART_NAMES = [
+    b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+    b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+    b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+    b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+    b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+    b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+    b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+    b'left_face',
+]
+
+_DP_PART_ID_OFFSET = 1
+
+
+def clip_to_unit(x):
+  return min(max(x, 0.0), 1.0)
+

 def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
-                      keypoint_annotations_dict=None):
+                      keypoint_annotations_dict=None,
+                      densepose_annotations_dict=None,
+                      remove_non_person_annotations=False,
+                      remove_non_person_images=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
@@ -108,10 +142,23 @@ def create_tf_example(image,
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
+    densepose_annotations_dict: A dictionary that maps from annotation_id to a
+      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
+      representing part surface coordinates. For more information see
+      http://densepose.org/.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.

  Returns:
+    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+    num_keypoint_annotation_skipped: Number of keypoint annotations that were
+      skipped.
+    num_densepose_annotation_skipped: Number of DensePose annotations that were
+      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
@@ -146,6 +193,16 @@ def create_tf_example(image,
  num_annotations_skipped = 0
  num_keypoint_annotation_used = 0
  num_keypoint_annotation_skipped = 0
+  dp_part_index = []
+  dp_x = []
+  dp_y = []
+  dp_u = []
+  dp_v = []
+  dp_num_points = []
+  densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
+  include_densepose = densepose_annotations_dict is not None
+  num_densepose_annotation_used = 0
+  num_densepose_annotation_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
@@ -154,14 +211,18 @@ def create_tf_example(image,
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
+    category_id = int(object_annotations['category_id'])
+    category_name = category_index[category_id]['name'].encode('utf8')
+    if remove_non_person_annotations and category_name != b'person':
+      num_annotations_skipped += 1
+      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
-    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
-    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    category_names.append(category_name)
    area.append(object_annotations['area'])

    if include_masks:
@@ -197,6 +258,40 @@ def create_tf_example(image,
        keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
        keypoints_name.extend(_COCO_KEYPOINT_NAMES)
        num_keypoints.append(0)
+
+    if include_densepose:
+      annotation_id = object_annotations['id']
+      if (annotation_id in densepose_annotations_dict and
+          all(key in densepose_annotations_dict[annotation_id]
+              for key in densepose_keys)):
+        dp_annotations = densepose_annotations_dict[annotation_id]
+        num_densepose_annotation_used += 1
+        dp_num_points.append(len(dp_annotations['dp_I']))
+        dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
+                              for i in dp_annotations['dp_I']])
+        # DensePose surface coordinates are defined on a [256, 256] grid
+        # relative to each instance box (i.e. absolute coordinates in range
+        # [0., 256.]). The following converts the coordinates
+        # so that they are expressed in normalized image coordinates.
+        dp_x_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
+        dp_x_norm = [(float(x) + x_box_rel * width) / image_width
+                     for x_box_rel in dp_x_box_rel]
+        dp_y_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
+        dp_y_norm = [(float(y) + y_box_rel * height) / image_height
+                     for y_box_rel in dp_y_box_rel]
+        dp_x.extend(dp_x_norm)
+        dp_y.extend(dp_y_norm)
+        dp_u.extend(dp_annotations['dp_U'])
+        dp_v.extend(dp_annotations['dp_V'])
+      else:
+        dp_num_points.append(0)
+
+  if (remove_non_person_images and
+      not any(name == b'person' for name in category_names)):
+    return (key, None, num_annotations_skipped,
+            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
@@ -243,15 +338,34 @@ def create_tf_example(image,
        dataset_util.bytes_list_feature(keypoints_name))
    num_keypoint_annotation_skipped = (
        len(keypoint_annotations_dict) - num_keypoint_annotation_used)
+  if include_densepose:
+    feature_dict['image/object/densepose/num'] = (
+        dataset_util.int64_list_feature(dp_num_points))
+    feature_dict['image/object/densepose/part_index'] = (
+        dataset_util.int64_list_feature(dp_part_index))
+    feature_dict['image/object/densepose/x'] = (
+        dataset_util.float_list_feature(dp_x))
+    feature_dict['image/object/densepose/y'] = (
+        dataset_util.float_list_feature(dp_y))
+    feature_dict['image/object/densepose/u'] = (
+        dataset_util.float_list_feature(dp_u))
+    feature_dict['image/object/densepose/v'] = (
+        dataset_util.float_list_feature(dp_v))
+    num_densepose_annotation_skipped = (
+        len(densepose_annotations_dict) - num_densepose_annotation_used)

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
-  return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
+  return (key, example, num_annotations_skipped,
+          num_keypoint_annotation_skipped, num_densepose_annotation_skipped)


 def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards,
-                                            keypoint_annotations_file=''):
+                                            keypoint_annotations_file='',
+                                            densepose_annotations_file='',
+                                            remove_non_person_annotations=False,
+                                            remove_non_person_images=False):
  """Loads COCO annotation json files and converts to tf.Record format.

  Args:
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
    keypoint_annotations_file: JSON file containing the person keypoint
      annotations. If empty, then no person keypoint annotations will be
      generated.
+    densepose_annotations_file: JSON file containing the DensePose annotations.
+      If empty, then no DensePose annotations will be generated.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
-    logging.info('%d images are missing annotations.', missing_annotation_count)
+    logging.info('%d images are missing annotations.',
+                 missing_annotation_count)

    keypoint_annotations_index = {}
    if keypoint_annotations_file:
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
            keypoint_annotations_index[image_id] = {}
          keypoint_annotations_index[image_id][annotation['id']] = annotation

+    densepose_annotations_index = {}
+    if densepose_annotations_file:
+      with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
+        densepose_groundtruth_data = json.load(fid)
+      if 'annotations' in densepose_groundtruth_data:
+        for annotation in densepose_groundtruth_data['annotations']:
+          image_id = annotation['image_id']
+          if image_id not in densepose_annotations_index:
+            densepose_annotations_index[image_id] = {}
+          densepose_annotations_index[image_id][annotation['id']] = annotation
+
    total_num_annotations_skipped = 0
    total_num_keypoint_annotations_skipped = 0
+    total_num_densepose_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(images))
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
        keypoint_annotations_dict = {}
        if image['id'] in keypoint_annotations_index:
          keypoint_annotations_dict = keypoint_annotations_index[image['id']]
-      (_, tf_example, num_annotations_skipped,
-       num_keypoint_annotations_skipped) = create_tf_example(
+      densepose_annotations_dict = None
+      if densepose_annotations_file:
+        densepose_annotations_dict = {}
+        if image['id'] in densepose_annotations_index:
+          densepose_annotations_dict = densepose_annotations_index[image['id']]
+      (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
+       num_densepose_annotations_skipped) = create_tf_example(
           image, annotations_list, image_dir, category_index, include_masks,
-           keypoint_annotations_dict)
+           keypoint_annotations_dict, densepose_annotations_dict,
+           remove_non_person_annotations, remove_non_person_images)
      total_num_annotations_skipped += num_annotations_skipped
      total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
+      total_num_densepose_annotations_skipped += (
+          num_densepose_annotations_skipped)
      shard_idx = idx % num_shards
-      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+      if tf_example:
+        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    logging.info('Finished writing, skipped %d annotations.',
                 total_num_annotations_skipped)
    if keypoint_annotations_file:
      logging.info('Finished writing, skipped %d keypoint annotations.',
                   total_num_keypoint_annotations_skipped)
+    if densepose_annotations_file:
+      logging.info('Finished writing, skipped %d DensePose annotations.',
+                   total_num_densepose_annotations_skipped)


 def main(_):
@@ -347,20 +492,26 @@ def main(_):
      train_output_path,
      FLAGS.include_masks,
      num_shards=100,
-      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
+      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.train_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.val_annotations_file,
      FLAGS.val_image_dir,
      val_output_path,
      FLAGS.include_masks,
-      num_shards=100,
-      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
+      num_shards=50,
+      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.val_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.testdev_annotations_file,
      FLAGS.test_image_dir,
      testdev_output_path,
      FLAGS.include_masks,
-      num_shards=100)
+      num_shards=50)


 if __name__ == '__main__':

--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index)

    self.assertEqual(num_annotations_skipped, 0)
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index, include_masks=True)

    self.assertEqual(num_annotations_skipped, 0)
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        }
    }

-    (_, example, _,
-     num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
-         image,
-         annotations_list,
-         image_dir,
-         category_index,
-         include_masks=False,
-         keypoint_annotations_dict=keypoint_annotations_dict)
+    _, example, _, num_keypoint_annotation_skipped, _ = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            keypoint_annotations_dict=keypoint_annotations_dict))

    self.assertEqual(num_keypoint_annotation_skipped, 0)
    self._assertProtoEqual(
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        example.features.feature[
            'image/object/keypoint/visibility'].int64_list.value, vv)

+  def test_create_tf_example_with_dense_pose(self):
+    image_dir = self.get_temp_dir()
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
+        np.uint8)
+    save_path = os.path.join(image_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+
+    image = {
+        'file_name': image_file_name,
+        'height': 256,
+        'width': 256,
+        'id': 11,
+    }
+
+    min_x, min_y = 64, 64
+    max_x, max_y = 128, 128
+    keypoints = []
+    num_visible_keypoints = 0
+    xv = []
+    yv = []
+    vv = []
+    for _ in range(17):
+      xc = min_x + int(np.random.rand()*(max_x - min_x))
+      yc = min_y + int(np.random.rand()*(max_y - min_y))
+      vis = np.random.randint(0, 3)
+      xv.append(xc)
+      yv.append(yc)
+      vv.append(vis)
+      keypoints.extend([xc, yc, vis])
+      num_visible_keypoints += (vis > 0)
+
+    annotations_list = [{
+        'area': 0.5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [64, 64, 128, 128],
+        'category_id': 1,
+        'id': 1000
+    }]
+
+    num_points = 45
+    dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
+    dp_u = np.random.randn(num_points)
+    dp_v = np.random.randn(num_points)
+    dp_x = np.random.rand(num_points)*256.
+    dp_y = np.random.rand(num_points)*256.
+    densepose_annotations_dict = {
+        1000: {
+            'dp_I': dp_i,
+            'dp_U': dp_u,
+            'dp_V': dp_v,
+            'dp_x': dp_x,
+            'dp_y': dp_y,
+            'bbox': [64, 64, 128, 128],
+        }
+    }
+
+    category_index = {
+        1: {
+            'name': 'person',
+            'id': 1
+        }
+    }
+
+    _, example, _, _, num_densepose_annotation_skipped = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            densepose_annotations_dict=densepose_annotations_dict))
+
+    self.assertEqual(num_densepose_annotation_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [six.b(image_file_name)])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [six.b(str(image['id']))])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value,
+        [six.b('jpeg')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        [six.b('person')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/densepose/num'].int64_list.value,
+        [num_points])
+    self.assertAllEqual(
+        example.features.feature[
+            'image/object/densepose/part_index'].int64_list.value,
+        dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/u'].float_list.value,
+        dp_u)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/v'].float_list.value,
+        dp_v)
+    expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
+    expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/x'].float_list.value,
+        expected_dp_x)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/y'].float_list.value,
+        expected_dp_y)
+
  def test_create_sharded_tf_record(self):
    tmp_dir = self.get_temp_dir()
    image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']

--- a/research/object_detection/dataset_tools/seq_example_util_test.py
+++ b/research/object_detection/dataset_tools/seq_example_util_test.py
@@ -24,10 +24,18 @@ import six
 import tensorflow.compat.v1 as tf

 from object_detection.dataset_tools import seq_example_util
+from object_detection.utils import tf_version


 class SeqExampleUtilTest(tf.test.TestCase):

+  def materialize_tensors(self, list_of_tensors):
+    if tf_version.is_tf2():
+      return [tensor.numpy() for tensor in list_of_tensors]
+    else:
+      with self.cached_session() as sess:
+        return sess.run(list_of_tensors)
+
  def test_make_unlabeled_example(self):
    num_frames = 5
    image_height = 100
@@ -41,8 +49,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
    image_source_ids = [str(idx) for idx in range(num_frames)]
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
-    with tf.Session() as sess:
-      encoded_images = sess.run(encoded_images_list)
+    encoded_images = self.materialize_tensors(encoded_images_list)
    seq_example = seq_example_util.make_sequence_example(
        dataset_name=dataset_name,
        video_id=video_id,
@@ -109,8 +116,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        dtype=tf.int32), dtype=tf.uint8)
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
-    with tf.Session() as sess:
-      encoded_images = sess.run(encoded_images_list)
+    encoded_images = self.materialize_tensors(encoded_images_list)
    timestamps = [100000, 110000]
    is_annotated = [1, 0]
    bboxes = [
@@ -208,8 +214,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        dtype=tf.int32), dtype=tf.uint8)
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
-    with tf.Session() as sess:
-      encoded_images = sess.run(encoded_images_list)
+    encoded_images = self.materialize_tensors(encoded_images_list)
    bboxes = [
        np.array([[0., 0., 0.75, 0.75],
                  [0., 0., 1., 1.]], dtype=np.float32),
@@ -283,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75, 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
-        ['cat', 'frog'],
+        [b'cat', b'frog'],
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
    self.assertAllClose(
        [0.],
@@ -327,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
-        ['cat'],
+        [b'cat'],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
    self.assertAllClose(
        [],

--- a/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
      tf_record_path = '{}-{:05d}-of-00010'.format(
          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
      records = list(tf.python_io.tf_record_iterator(tf_record_path))
-      self.assertAllEqual(records, ['test_{}'.format(idx)])
+      self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])


 if __name__ == '__main__':

--- a/research/object_detection/dockerfiles/tf1/Dockerfile
+++ b/research/object_detection/dockerfiles/tf1/Dockerfile
+FROM tensorflow/tensorflow:1.15.2-gpu-py3
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install apt dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    gpg-agent \
+    python3-cairocffi \
+    protobuf-compiler \
+    python3-pil \
+    python3-lxml \
+    python3-tk \
+    wget
+
+# Install gcloud and gsutil commands
+# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
+RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
+    echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+    apt-get update -y && apt-get install google-cloud-sdk -y
+
+# Add new user to avoid running as root
+RUN useradd -ms /bin/bash tensorflow
+USER tensorflow
+WORKDIR /home/tensorflow
+
+# Install pip dependencies
+RUN pip3 install --user absl-py
+RUN pip3 install --user contextlib2
+RUN pip3 install --user Cython
+RUN pip3 install --user jupyter
+RUN pip3 install --user matplotlib
+RUN pip3 install --user pycocotools
+RUN pip3 install --user tf-slim
+
+# Copy this version of of the model garden into the image
+COPY --chown=tensorflow . /home/tensorflow/models
+
+# Compile protobuf configs
+RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+
+ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
+ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf1/README.md
+++ b/research/object_detection/dockerfiles/tf1/README.md
+# Tensorflow Object Detection on Docker
+
+These instructions are experimental.
+
+## Building and running:
+
+```bash
+# From the root of the git repository
+docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
+docker run -it od
+```
--- a/research/object_detection/dockerfiles/tf2/Dockerfile
+++ b/research/object_detection/dockerfiles/tf2/Dockerfile
+FROM tensorflow/tensorflow:2.2.0-gpu
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install apt dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    gpg-agent \
+    python3-cairocffi \
+    protobuf-compiler \
+    python3-pil \
+    python3-lxml \
+    python3-tk \
+    wget
+
+# Install gcloud and gsutil commands
+# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
+RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
+    echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
+    apt-get update -y && apt-get install google-cloud-sdk -y
+
+# Add new user to avoid running as root
+RUN useradd -ms /bin/bash tensorflow
+USER tensorflow
+WORKDIR /home/tensorflow
+
+# Install pip dependencies
+RUN pip3 install --user absl-py
+RUN pip3 install --user contextlib2
+RUN pip3 install --user Cython
+RUN pip3 install --user jupyter
+RUN pip3 install --user matplotlib
+RUN pip3 install --user pycocotools
+RUN pip3 install --user tf-slim
+
+# Copy this version of of the model garden into the image
+COPY --chown=tensorflow . /home/tensorflow/models
+
+# Compile protobuf configs
+RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+
+ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
+ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf2/README.md
+++ b/research/object_detection/dockerfiles/tf2/README.md
+# Tensorflow Object Detection on Docker
+
+These instructions are experimental.
+
+## Building and running:
+
+```bash
+# From the root of the git repository
+docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
+docker run -it od
+```
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -52,6 +52,8 @@ EVAL_METRICS_CLASS_DICT = {
        coco_evaluation.CocoKeypointEvaluator,
    'coco_mask_metrics':
        coco_evaluation.CocoMaskEvaluator,
+    'coco_panoptic_metrics':
+        coco_evaluation.CocoPanopticSegmentationEvaluator,
    'oid_challenge_detection_metrics':
        object_detection_evaluation.OpenImagesDetectionChallengeEvaluator,
    'oid_challenge_segmentation_metrics':

--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import unittest
 from absl.testing import parameterized

 import numpy as np
@@ -30,6 +31,7 @@ from object_detection.core import standard_fields as fields
 from object_detection.metrics import coco_evaluation
 from object_detection.protos import eval_pb2
 from object_detection.utils import test_case
+from object_detection.utils import tf_version


 class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
@@ -127,6 +129,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
+  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
  def test_get_eval_metric_ops_for_coco_detections(self, batch_size=1,
                                                   max_gt_boxes=None,
                                                   scale_to_absolute=False):
@@ -155,6 +158,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
+  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
  def test_get_eval_metric_ops_for_coco_detections_and_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
    eval_config = eval_pb2.EvalConfig()
@@ -185,6 +189,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
      {'batch_size': 1, 'max_gt_boxes': None, 'scale_to_absolute': False},
      {'batch_size': 8, 'max_gt_boxes': [1], 'scale_to_absolute': False}
  )
+  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
  def test_get_eval_metric_ops_for_coco_detections_and_resized_masks(
      self, batch_size=1, max_gt_boxes=None, scale_to_absolute=False):
    eval_config = eval_pb2.EvalConfig()
@@ -210,6 +215,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
      self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
      self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])

+  @unittest.skipIf(tf_version.is_tf2(), 'Only compatible with TF1.X')
  def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
    eval_config = eval_pb2.EvalConfig()
    eval_config.metrics_set.extend(['unsupported_metric'])
@@ -334,63 +340,67 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
                               dtype=np.float32)
    detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
                                   dtype=np.float32)
-    detections = {
-        detection_fields.detection_boxes:
-            tf.constant(detection_boxes),
-        detection_fields.detection_scores:
-            tf.constant([[1.], [1.]]),
-        detection_fields.detection_classes:
-            tf.constant([[1], [2]]),
-        detection_fields.num_detections:
-            tf.constant([1, 1]),
-        detection_fields.detection_keypoints:
-            tf.tile(
-                tf.reshape(
-                    tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
-                multiples=[2, 1, 1, 1])
-    }
-
-    gt_boxes = detection_boxes
-    groundtruth = {
-        input_data_fields.groundtruth_boxes:
-            tf.constant(gt_boxes),
-        input_data_fields.groundtruth_classes:
-            tf.constant([[1.], [1.]]),
-        input_data_fields.groundtruth_keypoints:
-            tf.tile(
-                tf.reshape(
-                    tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
-                multiples=[2, 1, 1, 1])
-    }
-
-    image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
-
-    true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
-    original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
-
-    result = eval_util.result_dict_for_batched_example(
-        image, key, detections, groundtruth,
-        scale_to_absolute=True,
-        true_image_shapes=true_image_shapes,
-        original_image_spatial_shapes=original_image_spatial_shapes,
-        max_gt_boxes=tf.constant(1))
-
-    with self.test_session() as sess:
-      result = sess.run(result)
-      self.assertAllEqual(
-          [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
-          result[input_data_fields.groundtruth_boxes])
-      self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
-                           [[[0., 0.], [150., 150.], [300., 300.]]]],
-                          result[input_data_fields.groundtruth_keypoints])
-
-      # Predictions from the model are not scaled.
-      self.assertAllEqual(
-          [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
-          result[detection_fields.detection_boxes])
-      self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
-                           [[[0., 0.], [75., 150.], [150., 300.]]]],
-                          result[detection_fields.detection_keypoints])
+    def graph_fn():
+      detections = {
+          detection_fields.detection_boxes:
+              tf.constant(detection_boxes),
+          detection_fields.detection_scores:
+              tf.constant([[1.], [1.]]),
+          detection_fields.detection_classes:
+              tf.constant([[1], [2]]),
+          detection_fields.num_detections:
+              tf.constant([1, 1]),
+          detection_fields.detection_keypoints:
+              tf.tile(
+                  tf.reshape(
+                      tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
+                  multiples=[2, 1, 1, 1])
+      }
+
+      gt_boxes = detection_boxes
+      groundtruth = {
+          input_data_fields.groundtruth_boxes:
+              tf.constant(gt_boxes),
+          input_data_fields.groundtruth_classes:
+              tf.constant([[1.], [1.]]),
+          input_data_fields.groundtruth_keypoints:
+              tf.tile(
+                  tf.reshape(
+                      tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
+                  multiples=[2, 1, 1, 1])
+      }
+
+      image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
+
+      true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]])
+      original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]])
+
+      result = eval_util.result_dict_for_batched_example(
+          image, key, detections, groundtruth,
+          scale_to_absolute=True,
+          true_image_shapes=true_image_shapes,
+          original_image_spatial_shapes=original_image_spatial_shapes,
+          max_gt_boxes=tf.constant(1))
+      return (result[input_data_fields.groundtruth_boxes],
+              result[input_data_fields.groundtruth_keypoints],
+              result[detection_fields.detection_boxes],
+              result[detection_fields.detection_keypoints])
+    (gt_boxes, gt_keypoints, detection_boxes,
+     detection_keypoints) = self.execute_cpu(graph_fn, [])
+    self.assertAllEqual(
+        [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
+        gt_boxes)
+    self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
+                         [[[0., 0.], [150., 150.], [300., 300.]]]],
+                        gt_keypoints)
+
+    # Predictions from the model are not scaled.
+    self.assertAllEqual(
+        [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
+        detection_boxes)
+    self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
+                         [[[0., 0.], [75., 150.], [150., 300.]]]],
+                        detection_keypoints)


 if __name__ == '__main__':

--- a/research/object_detection/export_inference_graph.py
+++ b/research/object_detection/export_inference_graph.py
@@ -134,6 +134,30 @@ flags.DEFINE_string('config_override', '',
                    'text proto to override pipeline_config_path.')
 flags.DEFINE_boolean('write_inference_graph', False,
                     'If true, writes inference graph to disk.')
+flags.DEFINE_string('additional_output_tensor_names', None,
+                    'Additional Tensors to output, to be specified as a comma '
+                    'separated list of tensor names.')
+flags.DEFINE_boolean('use_side_inputs', False,
+                     'If True, uses side inputs as well as image inputs.')
+flags.DEFINE_string('side_input_shapes', None,
+                    'If use_side_inputs is True, this explicitly sets '
+                    'the shape of the side input tensors to a fixed size. The '
+                    'dimensions are to be provided as a comma-separated list '
+                    'of integers. A value of -1 can be used for unknown '
+                    'dimensions. A `/` denotes a break, starting the shape of '
+                    'the next side input tensor. This flag is required if '
+                    'using side inputs.')
+flags.DEFINE_string('side_input_types', None,
+                    'If use_side_inputs is True, this explicitly sets '
+                    'the type of the side input tensors. The '
+                    'dimensions are to be provided as a comma-separated list '
+                    'of types, each of `string`, `integer`, or `float`. '
+                    'This flag is required if using side inputs.')
+flags.DEFINE_string('side_input_names', None,
+                    'If use_side_inputs is True, this explicitly sets '
+                    'the names of the side input tensors required by the model '
+                    'assuming the names will be a comma-separated list of '
+                    'strings. This flag is required if using side inputs.')
 tf.app.flags.mark_flag_as_required('pipeline_config_path')
 tf.app.flags.mark_flag_as_required('trained_checkpoint_prefix')
 tf.app.flags.mark_flag_as_required('output_directory')
@@ -152,10 +176,30 @@ def main(_):
    ]
  else:
    input_shape = None
+  if FLAGS.use_side_inputs:
+    side_input_shapes, side_input_names, side_input_types = (
+        exporter.parse_side_inputs(
+            FLAGS.side_input_shapes,
+            FLAGS.side_input_names,
+            FLAGS.side_input_types))
+  else:
+    side_input_shapes = None
+    side_input_names = None
+    side_input_types = None
+  if FLAGS.additional_output_tensor_names:
+    additional_output_tensor_names = list(
+        FLAGS.additional_output_tensor_names.split(','))
+  else:
+    additional_output_tensor_names = None
  exporter.export_inference_graph(
      FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
      FLAGS.output_directory, input_shape=input_shape,
-      write_inference_graph=FLAGS.write_inference_graph)
+      write_inference_graph=FLAGS.write_inference_graph,
+      additional_output_tensor_names=additional_output_tensor_names,
+      use_side_inputs=FLAGS.use_side_inputs,
+      side_input_shapes=side_input_shapes,
+      side_input_names=side_input_names,
+      side_input_types=side_input_types)


 if __name__ == '__main__':

--- a/research/object_detection/export_tflite_ssd_graph_lib.py
+++ b/research/object_detection/export_tflite_ssd_graph_lib.py
@@ -24,16 +24,19 @@ import tensorflow.compat.v1 as tf
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import saver_pb2
-from tensorflow.tools.graph_transforms import TransformGraph
 from object_detection import exporter
 from object_detection.builders import graph_rewriter_builder
 from object_detection.builders import model_builder
 from object_detection.builders import post_processing_builder
 from object_detection.core import box_list
+from object_detection.utils import tf_version

 _DEFAULT_NUM_CHANNELS = 3
 _DEFAULT_NUM_COORD_BOX = 4

+if tf_version.is_tf1():
+  from tensorflow.tools.graph_transforms import TransformGraph  # pylint: disable=g-import-not-at-top
+

 def get_const_center_size_encoded_anchors(anchors):
  """Exports center-size encoded anchors as a constant tensor.

--- a/research/object_detection/export_tflite_ssd_graph_lib_test.py
+++ b/research/object_detection/export_tflite_ssd_graph_lib_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
+import unittest
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -32,6 +33,7 @@ from object_detection.core import model
 from object_detection.protos import graph_rewriter_pb2
 from object_detection.protos import pipeline_pb2
 from object_detection.protos import post_processing_pb2
+from object_detection.utils import tf_version

 # pylint: disable=g-import-not-at-top

@@ -72,6 +74,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, from_detection_checkpoint):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass

@@ -82,6 +87,7 @@ class FakeModel(model.DetectionModel):
    pass


+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class ExportTfliteGraphTest(tf.test.TestCase):

  def _save_checkpoint_from_mock_model(self,
@@ -413,7 +419,7 @@ class ExportTfliteGraphTest(tf.test.TestCase):
    tflite_graph_file = self._export_graph_with_postprocessing_op(
        pipeline_config)
    self.assertTrue(os.path.exists(tflite_graph_file))
-    mock_get.assert_called_once()
+    self.assertEqual(1, mock_get.call_count)


 if __name__ == '__main__':

--- a/research/object_detection/exporter.py
+++ b/research/object_detection/exporter.py
@@ -39,6 +39,54 @@ except ImportError:
 freeze_graph_with_def_protos = freeze_graph.freeze_graph_with_def_protos


+def parse_side_inputs(side_input_shapes_string, side_input_names_string,
+                      side_input_types_string):
+  """Parses side input flags.
+
+  Args:
+    side_input_shapes_string: The shape of the side input tensors, provided as a
+      comma-separated list of integers. A value of -1 is used for unknown
+      dimensions. A `/` denotes a break, starting the shape of the next side
+      input tensor.
+    side_input_names_string: The names of the side input tensors, provided as a
+      comma-separated list of strings.
+    side_input_types_string: The type of the side input tensors, provided as a
+      comma-separated list of types, each of `string`, `integer`, or `float`.
+
+  Returns:
+    side_input_shapes: A list of shapes.
+    side_input_names: A list of strings.
+    side_input_types: A list of tensorflow dtypes.
+
+  """
+  if side_input_shapes_string:
+    side_input_shapes = []
+    for side_input_shape_list in side_input_shapes_string.split('/'):
+      side_input_shape = [
+          int(dim) if dim != '-1' else None
+          for dim in side_input_shape_list.split(',')
+      ]
+      side_input_shapes.append(side_input_shape)
+  else:
+    raise ValueError('When using side_inputs, side_input_shapes must be '
+                     'specified in the input flags.')
+  if side_input_names_string:
+    side_input_names = list(side_input_names_string.split(','))
+  else:
+    raise ValueError('When using side_inputs, side_input_names must be '
+                     'specified in the input flags.')
+  if side_input_types_string:
+    typelookup = {'float': tf.float32, 'int': tf.int32, 'string': tf.string}
+    side_input_types = [
+        typelookup[side_input_type]
+        for side_input_type in side_input_types_string.split(',')
+    ]
+  else:
+    raise ValueError('When using side_inputs, side_input_types must be '
+                     'specified in the input flags.')
+  return side_input_shapes, side_input_names, side_input_types
+
+
 def rewrite_nn_resize_op(is_quantized=False):
  """Replaces a custom nearest-neighbor resize op with the Tensorflow version.

@@ -140,6 +188,14 @@ def _image_tensor_input_placeholder(input_shape=None):
  return input_tensor, input_tensor


+def _side_input_tensor_placeholder(side_input_shape, side_input_name,
+                                   side_input_type):
+  """Returns side input placeholder and side input tensor."""
+  side_input_tensor = tf.placeholder(
+      dtype=side_input_type, shape=side_input_shape, name=side_input_name)
+  return side_input_tensor, side_input_tensor
+
+
 def _tf_example_input_placeholder(input_shape=None):
  """Returns input that accepts a batch of strings with tf examples.

@@ -200,7 +256,7 @@ input_placeholder_fn_map = {
    'image_tensor': _image_tensor_input_placeholder,
    'encoded_image_string_tensor':
    _encoded_image_string_tensor_input_placeholder,
-    'tf_example': _tf_example_input_placeholder,
+    'tf_example': _tf_example_input_placeholder
 }


@@ -312,7 +368,7 @@ def write_saved_model(saved_model_path,
  Args:
    saved_model_path: Path to write SavedModel.
    frozen_graph_def: tf.GraphDef holding frozen graph.
-    inputs: The input placeholder tensor.
+    inputs: A tensor dictionary containing the inputs to a DetectionModel.
    outputs: A tensor dictionary containing the outputs of a DetectionModel.
  """
  with tf.Graph().as_default():
@@ -322,8 +378,13 @@ def write_saved_model(saved_model_path,

      builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)

-      tensor_info_inputs = {
-          'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
+      tensor_info_inputs = {}
+      if isinstance(inputs, dict):
+        for k, v in inputs.items():
+          tensor_info_inputs[k] = tf.saved_model.utils.build_tensor_info(v)
+      else:
+        tensor_info_inputs['inputs'] = tf.saved_model.utils.build_tensor_info(
+            inputs)
      tensor_info_outputs = {}
      for k, v in outputs.items():
        tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
@@ -364,11 +425,11 @@ def write_graph_and_checkpoint(inference_graph_def,


 def _get_outputs_from_inputs(input_tensors, detection_model,
-                             output_collection_name):
+                             output_collection_name, **side_inputs):
  inputs = tf.cast(input_tensors, dtype=tf.float32)
  preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)
  output_tensors = detection_model.predict(
-      preprocessed_inputs, true_image_shapes)
+      preprocessed_inputs, true_image_shapes, **side_inputs)
  postprocessed_tensors = detection_model.postprocess(
      output_tensors, true_image_shapes)
  return add_output_tensor_nodes(postprocessed_tensors,
@@ -376,32 +437,45 @@ def _get_outputs_from_inputs(input_tensors, detection_model,


 def build_detection_graph(input_type, detection_model, input_shape,
-                          output_collection_name, graph_hook_fn):
+                          output_collection_name, graph_hook_fn,
+                          use_side_inputs=False, side_input_shapes=None,
+                          side_input_names=None, side_input_types=None):
  """Build the detection graph."""
  if input_type not in input_placeholder_fn_map:
    raise ValueError('Unknown input type: {}'.format(input_type))
  placeholder_args = {}
+  side_inputs = {}
  if input_shape is not None:
    if (input_type != 'image_tensor' and
        input_type != 'encoded_image_string_tensor' and
-        input_type != 'tf_example'):
+        input_type != 'tf_example' and
+        input_type != 'tf_sequence_example'):
      raise ValueError('Can only specify input shape for `image_tensor`, '
-                       '`encoded_image_string_tensor`, or `tf_example` '
-                       'inputs.')
+                       '`encoded_image_string_tensor`, `tf_example`, '
+                       ' or `tf_sequence_example` inputs.')
    placeholder_args['input_shape'] = input_shape
  placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type](
      **placeholder_args)
+  placeholder_tensors = {'inputs': placeholder_tensor}
+  if use_side_inputs:
+    for idx, side_input_name in enumerate(side_input_names):
+      side_input_placeholder, side_input = _side_input_tensor_placeholder(
+          side_input_shapes[idx], side_input_name, side_input_types[idx])
+      print(side_input)
+      side_inputs[side_input_name] = side_input
+      placeholder_tensors[side_input_name] = side_input_placeholder
  outputs = _get_outputs_from_inputs(
      input_tensors=input_tensors,
      detection_model=detection_model,
-      output_collection_name=output_collection_name)
+      output_collection_name=output_collection_name,
+      **side_inputs)

  # Add global step to the graph.
  slim.get_or_create_global_step()

  if graph_hook_fn: graph_hook_fn()

-  return outputs, placeholder_tensor
+  return outputs, placeholder_tensors


 def _export_inference_graph(input_type,
@@ -414,7 +488,11 @@ def _export_inference_graph(input_type,
                            output_collection_name='inference_op',
                            graph_hook_fn=None,
                            write_inference_graph=False,
-                            temp_checkpoint_prefix=''):
+                            temp_checkpoint_prefix='',
+                            use_side_inputs=False,
+                            side_input_shapes=None,
+                            side_input_names=None,
+                            side_input_types=None):
  """Export helper."""
  tf.gfile.MakeDirs(output_directory)
  frozen_graph_path = os.path.join(output_directory,
@@ -422,12 +500,16 @@ def _export_inference_graph(input_type,
  saved_model_path = os.path.join(output_directory, 'saved_model')
  model_path = os.path.join(output_directory, 'model.ckpt')

-  outputs, placeholder_tensor = build_detection_graph(
+  outputs, placeholder_tensor_dict = build_detection_graph(
      input_type=input_type,
      detection_model=detection_model,
      input_shape=input_shape,
      output_collection_name=output_collection_name,
-      graph_hook_fn=graph_hook_fn)
+      graph_hook_fn=graph_hook_fn,
+      use_side_inputs=use_side_inputs,
+      side_input_shapes=side_input_shapes,
+      side_input_names=side_input_names,
+      side_input_types=side_input_types)

  profile_inference_graph(tf.get_default_graph())
  saver_kwargs = {}
@@ -464,7 +546,8 @@ def _export_inference_graph(input_type,
      f.write(str(inference_graph_def))

  if additional_output_tensor_names is not None:
-    output_node_names = ','.join(outputs.keys()+additional_output_tensor_names)
+    output_node_names = ','.join(list(outputs.keys())+(
+        additional_output_tensor_names))
  else:
    output_node_names = ','.join(outputs.keys())

@@ -480,7 +563,7 @@ def _export_inference_graph(input_type,
      initializer_nodes='')

  write_saved_model(saved_model_path, frozen_graph_def,
-                    placeholder_tensor, outputs)
+                    placeholder_tensor_dict, outputs)


 def export_inference_graph(input_type,
@@ -490,7 +573,11 @@ def export_inference_graph(input_type,
                           input_shape=None,
                           output_collection_name='inference_op',
                           additional_output_tensor_names=None,
-                           write_inference_graph=False):
+                           write_inference_graph=False,
+                           use_side_inputs=False,
+                           side_input_shapes=None,
+                           side_input_names=None,
+                           side_input_types=None):
  """Exports inference graph for the model specified in the pipeline config.

  Args:
@@ -506,6 +593,13 @@ def export_inference_graph(input_type,
    additional_output_tensor_names: list of additional output
      tensors to include in the frozen graph.
    write_inference_graph: If true, writes inference graph to disk.
+    use_side_inputs: If True, the model requires side_inputs.
+    side_input_shapes: List of shapes of the side input tensors,
+      required if use_side_inputs is True.
+    side_input_names: List of names of the side input tensors,
+      required if use_side_inputs is True.
+    side_input_types: List of types of the side input tensors,
+      required if use_side_inputs is True.
  """
  detection_model = model_builder.build(pipeline_config.model,
                                        is_training=False)
@@ -524,7 +618,11 @@ def export_inference_graph(input_type,
      input_shape,
      output_collection_name,
      graph_hook_fn=graph_rewriter_fn,
-      write_inference_graph=write_inference_graph)
+      write_inference_graph=write_inference_graph,
+      use_side_inputs=use_side_inputs,
+      side_input_shapes=side_input_shapes,
+      side_input_names=side_input_names,
+      side_input_types=side_input_types)
  pipeline_config.eval_config.use_moving_averages = False
  config_util.save_pipeline_config(pipeline_config, output_directory)