pull latest

657dcda5 · Kaushik Shivakumar · 26e24e21 · e6017471 · 657dcda5 · 657dcda5
Commit 657dcda5 authored Jul 01, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -66,6 +66,11 @@ class InputDataFields(object):
    groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
    groundtruth_label_weights: groundtruth label weights.
    groundtruth_weights: groundtruth weight factor for bounding boxes.
+    groundtruth_dp_num_points: The number of DensePose sampled points for each
+      instance.
+    groundtruth_dp_part_ids: Part indices for DensePose points.
+    groundtruth_dp_surface_coords: Image locations and UV coordinates for
+      DensePose points.
    num_groundtruth_boxes: number of groundtruth boxes.
    is_annotated: whether an image has been labeled or not.
    true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
  groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
  groundtruth_label_weights = 'groundtruth_label_weights'
  groundtruth_weights = 'groundtruth_weights'
+  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+  groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
  num_groundtruth_boxes = 'num_groundtruth_boxes'
  is_annotated = 'is_annotated'
  true_image_shape = 'true_image_shape'

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils
 # pylint: disable=g-import-not-at-top
 try:
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
               num_additional_channels=0,
               load_multiclass_scores=False,
               load_context_features=False,
-               expand_hierarchy_labels=False):
+               expand_hierarchy_labels=False,
+               load_dense_pose=False):
    """Constructor sets keys_to_features and items_to_handlers.
    Args:
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        account the provided hierarchy in the label_map_proto_file. For positive
        classes, the labels are extended to ancestor. For negative classes,
        the labels are expanded to descendants.
+      load_dense_pose: Whether to load DensePose annotations.
    Raises:
      ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if load_dense_pose:
+      self.keys_to_features['image/object/densepose/num'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/part_index'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/x'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/y'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/u'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/v'] = (
+          tf.VarLenFeature(tf.float32))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_num_points] = (
+              slim_example_decoder.Tensor('image/object/densepose/num'))
+      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+          slim_example_decoder.ItemHandlerCallback(
+              ['image/object/densepose/part_index',
+               'image/object/densepose/num'], self._dense_pose_part_indices))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_surface_coords] = (
+              slim_example_decoder.ItemHandlerCallback(
+                  ['image/object/densepose/x', 'image/object/densepose/y',
+                   'image/object/densepose/u', 'image/object/densepose/v',
+                   'image/object/densepose/num'],
+                  self._dense_pose_surface_coordinates))
    if label_map_proto_file:
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
+    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+          dtype=tf.int32)
+      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+          dtype=tf.int32)
    return tensor_dict
  def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
        lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
+  def _dense_pose_part_indices(self, keys_to_tensors):
+    """Creates a tensor that contains part indices for each DensePose point.
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+    Returns:
+      A 2-D int32 tensor of shape [num_instances, num_points] where each element
+      contains the DensePose part index (0-23). The value `num_points`
+      corresponds to the maximum number of sampled points across all instances
+      in the image. Note that instances with less sampled points will be padded
+      with zeros in the last dimension.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    part_index = keys_to_tensors['image/object/densepose/part_index']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(part_index, tf.SparseTensor):
+      part_index = tf.sparse_tensor_to_dense(part_index)
+    part_index = tf.cast(part_index, dtype=tf.int32)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+    def pad_parts_tensor(instance_ind):
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      part_inds = part_index[points_range_start:points_range_end]
+      return shape_utils.pad_or_clip_nd(part_inds,
+                                        output_shape=[max_points_per_instance])
+    return tf.map_fn(pad_parts_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.int32)
+  def _dense_pose_surface_coordinates(self, keys_to_tensors):
+    """Creates a tensor that contains surface coords for each DensePose point.
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+    Returns:
+      A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+      point contains (y, x, v, u) data for each sampled DensePose point. The
+      (y, x) coordinate has normalized image locations for the point, and (v, u)
+      contains the surface coordinate (also normalized) for the part. The value
+      `num_points` corresponds to the maximum number of sampled points across
+      all instances in the image. Note that instances with less sampled points
+      will be padded with zeros in dim=1.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    dp_y = keys_to_tensors['image/object/densepose/y']
+    dp_x = keys_to_tensors['image/object/densepose/x']
+    dp_v = keys_to_tensors['image/object/densepose/v']
+    dp_u = keys_to_tensors['image/object/densepose/u']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(dp_y, tf.SparseTensor):
+      dp_y = tf.sparse_tensor_to_dense(dp_y)
+    if isinstance(dp_x, tf.SparseTensor):
+      dp_x = tf.sparse_tensor_to_dense(dp_x)
+    if isinstance(dp_v, tf.SparseTensor):
+      dp_v = tf.sparse_tensor_to_dense(dp_v)
+    if isinstance(dp_u, tf.SparseTensor):
+      dp_u = tf.sparse_tensor_to_dense(dp_u)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+    def pad_surface_coordinates_tensor(instance_ind):
+      """Pads DensePose surface coordinates for each instance."""
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      y = dp_y[points_range_start:points_range_end]
+      x = dp_x[points_range_start:points_range_end]
+      v = dp_v[points_range_start:points_range_end]
+      u = dp_u[points_range_start:points_range_end]
+      # Create [num_points_i, 4] tensor, where num_points_i is the number of
+      # sampled points for instance i.
+      unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+      return shape_utils.pad_or_clip_nd(
+          unpadded_tensor, output_shape=[max_points_per_instance, 4])
+    return tf.map_fn(pad_surface_coordinates_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.float32)
  def _expand_image_label_hierarchy(self, image_classes, image_confidences):
    """Expand image level labels according to the hierarchy.

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))
    tensor_dict = self.execute_cpu(graph_fn, [])
-    self.assertTrue(
+    self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
-        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+                     tensor_dict)
  def testDecodeImageLabels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))
    tensor_dict = self.execute_cpu(graph_fn_1, [])
-    self.assertTrue(
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 2]))
@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))
    tensor_dict = self.execute_cpu(graph_fn_2, [])
-    self.assertTrue(
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 3]))
@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
        expected_image_confidence,
        tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
+  def testDecodeDensePose(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    bbox_ymins = [0.0, 4.0, 2.0]
+    bbox_xmins = [1.0, 5.0, 8.0]
+    bbox_ymaxs = [2.0, 6.0, 1.0]
+    bbox_xmaxs = [3.0, 7.0, 3.3]
+    densepose_num = [0, 4, 2]
+    densepose_part_index = [2, 2, 3, 4, 2, 9]
+    densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
+    densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+    densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/object/bbox/ymin':
+                      dataset_util.float_list_feature(bbox_ymins),
+                  'image/object/bbox/xmin':
+                      dataset_util.float_list_feature(bbox_xmins),
+                  'image/object/bbox/ymax':
+                      dataset_util.float_list_feature(bbox_ymaxs),
+                  'image/object/bbox/xmax':
+                      dataset_util.float_list_feature(bbox_xmaxs),
+                  'image/object/densepose/num':
+                      dataset_util.int64_list_feature(densepose_num),
+                  'image/object/densepose/part_index':
+                      dataset_util.int64_list_feature(densepose_part_index),
+                  'image/object/densepose/x':
+                      dataset_util.float_list_feature(densepose_x),
+                  'image/object/densepose/y':
+                      dataset_util.float_list_feature(densepose_y),
+                  'image/object/densepose/u':
+                      dataset_util.float_list_feature(densepose_u),
+                  'image/object/densepose/v':
+                      dataset_util.float_list_feature(densepose_v),
+              })).SerializeToString()
+      example_decoder = tf_example_decoder.TfExampleDecoder(
+          load_dense_pose=True)
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
+      dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
+      dp_surface_coords = output[
+          fields.InputDataFields.groundtruth_dp_surface_coords]
+      return dp_num_points, dp_part_ids, dp_surface_coords
+    dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
+        graph_fn, [])
+    expected_dp_num_points = [0, 4, 2]
+    expected_dp_part_ids = [
+        [0, 0, 0, 0],
+        [2, 2, 3, 4],
+        [2, 9, 0, 0]
+    ]
+    expected_dp_surface_coords = np.array(
+        [
+            # Instance 0 (no points).
+            [[0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            # Instance 1 (4 points).
+            [[0.9, 0.1, 0.99, 0.01],
+             [0.8, 0.2, 0.98, 0.02],
+             [0.7, 0.3, 0.97, 0.03],
+             [0.6, 0.4, 0.96, 0.04]],
+            # Instance 2 (2 points).
+            [[0.5, 0.5, 0.95, 0.05],
+             [0.4, 0.6, 0.94, 0.06],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+        ], dtype=np.float32)
+    self.assertAllEqual(dp_num_points, expected_dp_num_points)
+    self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
+    self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -43,6 +43,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import copy
 import datetime
 import io
@@ -51,62 +52,11 @@ import json
 import os
 from absl import app
-from absl import flags
 import apache_beam as beam
 import numpy as np
 import PIL.Image
 import six
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
-from apache_beam import runners
-flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
-                    'tf.Example format for object detection, with bounding'
-                    'boxes and contextual feature embeddings.')
-flags.DEFINE_string('output_tfrecord', None,
-                    'TFRecord containing images in tf.Example format, with '
-                    'added contextual memory banks.')
-flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
-                    'so far supports `image/seq_id` and `image/location`.')
-flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
-                    'splitting the data, if any. Options are: `year`, `month`,'
-                    ' `week`, `day `, `hour`, `minute`, `None`.')
-flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
-                     'subsample the context_features, and if so how many to '
-                     'sample. If the rate is set to X, it will sample context '
-                     'from 1 out of every X images. Default is sampling from '
-                     'every image, which is X=0.')
-flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
-                     'have longest side max_image_dimension, maintaining aspect'
-                     ' ratio')
-flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
-flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
-                     'embeddings to each clip')
-flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
-                     'are sortable to deal with date_captured tie-breaks')
-flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
-                    'ids to keep, used for ground truth eval creation')
-flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
-                     'not to keep a list of the image_ids corresponding to the '
-                     'memory bank')
-flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
-                     'keep only positive boxes based on score')
-flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
-                     'keep only positive boxes based on gt class')
-flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
-                   'threshold to use for boxes in context_features')
-flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
-                     'num elements per memory bank')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
-                    '`tf_example`, `tf_sequence_example`')
-flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
-                     'example outputs.')
-FLAGS = flags.FLAGS
-DEFAULT_FEATURE_LENGTH = 2057
 class ReKeyDataFn(beam.DoFn):
@@ -406,7 +356,8 @@ class GenerateContextFn(beam.DoFn):
               keep_only_positives_gt=False,
               max_num_elements_in_context_features=5000,
               pad_context_features=False,
-               output_type='tf_example', max_clip_length=None):
+               output_type='tf_example', max_clip_length=None,
+               context_feature_length=2057):
    """Initialization function.
    Args:
@@ -432,6 +383,8 @@ class GenerateContextFn(beam.DoFn):
      output_type: What type of output, tf_example of tf_sequence_example
      max_clip_length: The maximum length of a sequence example, before
        splitting into multiple
+      context_feature_length: The length of the context feature embeddings
+        stored in the input data.
    """
    self._session = None
    self._num_examples_processed = beam.metrics.Metrics.counter(
@@ -456,6 +409,7 @@ class GenerateContextFn(beam.DoFn):
    self._context_features_score_threshold = context_features_score_threshold
    self._max_num_elements_in_context_features = (
        max_num_elements_in_context_features)
+    self._context_feature_length = context_feature_length
    self._images_kept = beam.metrics.Metrics.counter(
        'sequence_data_generation', 'images_kept')
@@ -506,9 +460,9 @@ class GenerateContextFn(beam.DoFn):
      context_features_image_id_list.append(example_image_id)
    if not example_embedding:
-      example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH))
+      example_embedding.append(np.zeros(self._context_feature_length))
-    feature_length = DEFAULT_FEATURE_LENGTH
+    feature_length = self._context_feature_length
    # If the example_list is not empty and image/embedding_length is in the
    # featture dict, feature_length will be assigned to that. Otherwise, it will
@@ -703,7 +657,8 @@ class GenerateContextFn(beam.DoFn):
    return list_of_examples
-def construct_pipeline(input_tfrecord,
+def construct_pipeline(pipeline,
+                       input_tfrecord,
                       output_tfrecord,
                       sequence_key,
                       time_horizon=None,
@@ -720,10 +675,12 @@ def construct_pipeline(input_tfrecord,
                       max_num_elements_in_context_features=5000,
                       num_shards=0,
                       output_type='tf_example',
-                       max_clip_length=None):
+                       max_clip_length=None,
+                       context_feature_length=2057):
  """Returns a beam pipeline to run object detection inference.
  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
@@ -755,91 +712,224 @@ def construct_pipeline(input_tfrecord,
    output_type: What type of output, tf_example of tf_sequence_example
    max_clip_length: The maximum length of a sequence example, before
      splitting into multiple
+    context_feature_length: The length of the context feature embeddings stored
+      in the input data.
  """
-  def pipeline(root):
+  if output_type == 'tf_example':
-    if output_type == 'tf_example':
+    coder = beam.coders.ProtoCoder(tf.train.Example)
-      coder = beam.coders.ProtoCoder(tf.train.Example)
+  elif output_type == 'tf_sequence_example':
-    elif output_type == 'tf_sequence_example':
+    coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
-      coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
+  else:
-    else:
+    raise ValueError('Unsupported output type.')
-      raise ValueError('Unsupported output type.')
+  input_collection = (
-    input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
-            input_tfrecord,
+          coder=beam.coders.BytesCoder()))
-            coder=beam.coders.BytesCoder()))
+  rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
-    rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
+      ReKeyDataFn(sequence_key, time_horizon,
-        ReKeyDataFn(sequence_key, time_horizon,
+                  reduce_image_size, max_image_dimension))
-                    reduce_image_size, max_image_dimension))
+  grouped_collection = (
-    grouped_collection = (
+      rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
-        rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
+  grouped_collection = (
-    grouped_collection = (
+      grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
-        grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
+  ordered_collection = (
-    ordered_collection = (
+      grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
-        grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
+          SortGroupedDataFn(sequence_key, sorted_image_ids,
-            SortGroupedDataFn(sequence_key, sorted_image_ids,
+                            max_num_elements_in_context_features)))
-                              max_num_elements_in_context_features)))
+  ordered_collection = (
-    ordered_collection = (
+      ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
-        ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
+  output_collection = (
-    output_collection = (
+      ordered_collection | 'AddContextToExamples' >> beam.ParDo(
-        ordered_collection | 'AddContextToExamples' >> beam.ParDo(
+          GenerateContextFn(
-            GenerateContextFn(
+              sequence_key, add_context_features, image_ids_to_keep,
-                sequence_key, add_context_features, image_ids_to_keep,
+              keep_context_features_image_id_list=(
-                keep_context_features_image_id_list=(
+                  keep_context_features_image_id_list),
-                    keep_context_features_image_id_list),
+              subsample_context_features_rate=subsample_context_features_rate,
-                subsample_context_features_rate=subsample_context_features_rate,
+              keep_only_positives=keep_only_positives,
-                keep_only_positives=keep_only_positives,
+              keep_only_positives_gt=keep_only_positives_gt,
-                keep_only_positives_gt=keep_only_positives_gt,
+              context_features_score_threshold=(
-                context_features_score_threshold=(
+                  context_features_score_threshold),
-                    context_features_score_threshold),
+              max_num_elements_in_context_features=(
-                max_num_elements_in_context_features=(
+                  max_num_elements_in_context_features),
-                    max_num_elements_in_context_features),
+              output_type=output_type,
-                output_type=output_type,
+              max_clip_length=max_clip_length,
-                max_clip_length=max_clip_length)))
+              context_feature_length=context_feature_length)))
-    output_collection = (
+  output_collection = (
-        output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
+      output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-        output_tfrecord,
+      output_tfrecord,
-        num_shards=num_shards,
+      num_shards=num_shards,
-        coder=coder)
+      coder=coder)
-  return pipeline
+def parse_args(argv):
-def main(_):
+  """Command-line argument parser.
-  """Runs the Beam pipeline that builds context features.
  Args:
-    _: unused
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
  """
-  # must create before flags are used
+  parser = argparse.ArgumentParser()
-  runner = runners.DirectRunner()
+  parser.add_argument(
+      '--input_tfrecord',
+      dest='input_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format for object '
+      'detection, with bounding boxes and contextual feature embeddings.')
+  parser.add_argument(
+      '--output_tfrecord',
+      dest='output_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format, with added '
+      'contextual memory banks.')
+  parser.add_argument(
+      '--sequence_key',
+      dest='sequence_key',
+      default='image/location',
+      help='Key to use when grouping sequences: so far supports `image/seq_id` '
+      'and `image/location`.')
+  parser.add_argument(
+      '--context_feature_length',
+      dest='context_feature_length',
+      default=2057,
+      help='The length of the context feature embeddings stored in the input '
+      'data.')
+  parser.add_argument(
+      '--time_horizon',
+      dest='time_horizon',
+      default=None,
+      help='What time horizon to use when splitting the data, if any. Options '
+      'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.')
+  parser.add_argument(
+      '--subsample_context_features_rate',
+      dest='subsample_context_features_rate',
+      default=0,
+      help='Whether to subsample the context_features, and if so how many to '
+      'sample. If the rate is set to X, it will sample context from 1 out of '
+      'every X images. Default is sampling from every image, which is X=0.')
+  parser.add_argument(
+      '--reduce_image_size',
+      dest='reduce_image_size',
+      default=True,
+      help='downsamples images to have longest side max_image_dimension, '
+      'maintaining aspect ratio')
+  parser.add_argument(
+      '--max_image_dimension',
+      dest='max_image_dimension',
+      default=1024,
+      help='Sets max image dimension for resizing.')
+  parser.add_argument(
+      '--add_context_features',
+      dest='add_context_features',
+      default=True,
+      help='Adds a memory bank of embeddings to each clip')
+  parser.add_argument(
+      '--sorted_image_ids',
+      dest='sorted_image_ids',
+      default=True,
+      help='Whether the image source_ids are sortable to deal with '
+      'date_captured tie-breaks.')
+  parser.add_argument(
+      '--image_ids_to_keep',
+      dest='image_ids_to_keep',
+      default='All',
+      help='Path to .json list of image ids to keep, used for ground truth '
+      'eval creation.')
+  parser.add_argument(
+      '--keep_context_features_image_id_list',
+      dest='keep_context_features_image_id_list',
+      default=False,
+      help='Whether or not to keep a list of the image_ids corresponding to '
+      'the memory bank.')
+  parser.add_argument(
+      '--keep_only_positives',
+      dest='keep_only_positives',
+      default=False,
+      help='Whether or not to keep only positive boxes based on score.')
+  parser.add_argument(
+      '--context_features_score_threshold',
+      dest='context_features_score_threshold',
+      default=0.7,
+      help='What score threshold to use for boxes in context_features, when '
+      '`keep_only_positives` is set to `True`.')
+  parser.add_argument(
+      '--keep_only_positives_gt',
+      dest='keep_only_positives_gt',
+      default=False,
+      help='Whether or not to keep only positive boxes based on gt class.')
+  parser.add_argument(
+      '--max_num_elements_in_context_features',
+      dest='max_num_elements_in_context_features',
+      default=2000,
+      help='Sets max number of context feature elements per memory bank. '
+      'If the number of images in the context group is greater than '
+      '`max_num_elements_in_context_features`, the context group will be split.'
+      )
+  parser.add_argument(
+      '--output_type',
+      dest='output_type',
+      default='tf_example',
+      help='Output type, one of `tf_example`, `tf_sequence_example`.')
+  parser.add_argument(
+      '--max_clip_length',
+      dest='max_clip_length',
+      default=None,
+      help='Max length for sequence example outputs.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+def main(argv=None, save_main_session=True):
+  """Runs the Beam pipeline that performs inference.
-  dirname = os.path.dirname(FLAGS.output_tfrecord)
+  Args:
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
+  """
+  args, pipeline_args = parse_args(argv)
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
+  dirname = os.path.dirname(args.output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.input_tfrecord,
+  p = beam.Pipeline(options=pipeline_options)
-                         FLAGS.output_tfrecord,
-                         FLAGS.sequence_key,
+  construct_pipeline(
-                         FLAGS.time_horizon,
+      p,
-                         FLAGS.subsample_context_features_rate,
+      args.input_tfrecord,
-                         FLAGS.reduce_image_size,
+      args.output_tfrecord,
-                         FLAGS.max_image_dimension,
+      args.sequence_key,
-                         FLAGS.add_context_features,
+      args.time_horizon,
-                         FLAGS.sorted_image_ids,
+      args.subsample_context_features_rate,
-                         FLAGS.image_ids_to_keep,
+      args.reduce_image_size,
-                         FLAGS.keep_context_features_image_id_list,
+      args.max_image_dimension,
-                         FLAGS.keep_only_positives,
+      args.add_context_features,
-                         FLAGS.context_features_score_threshold,
+      args.sorted_image_ids,
-                         FLAGS.keep_only_positives_gt,
+      args.image_ids_to_keep,
-                         FLAGS.max_num_elements_in_context_features,
+      args.keep_context_features_image_id_list,
-                         FLAGS.num_shards,
+      args.keep_only_positives,
-                         FLAGS.output_type,
+      args.context_features_score_threshold,
-                         FLAGS.max_clip_length))
+      args.keep_only_positives_gt,
+      args.max_num_elements_in_context_features,
+      args.output_type,
+      args.max_clip_length,
+      args.context_feature_length)
+  p.run()
 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'input_tfrecord',
-      'output_tfrecord'
-  ])
  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
@@ -22,13 +22,13 @@ import datetime
 import os
 import tempfile
 import unittest
+import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
 from object_detection.dataset_tools.context_rcnn import add_context_to_examples
 from object_detection.utils import tf_version
-from apache_beam import runners
 @contextlib.contextmanager
@@ -200,7 +200,7 @@ class GenerateContextDataTest(tf.test.TestCase):
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
  def assert_expected_key(self, key):
-    self.assertAllEqual(key, '01')
+    self.assertAllEqual(key, b'01')
  def assert_sorted(self, example_collection):
    example_list = list(example_collection)
@@ -329,19 +329,22 @@ class GenerateContextDataTest(tf.test.TestCase):
    with InMemoryTFRecord(
        [self._create_first_tf_example(),
         self._create_second_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      sequence_key = six.ensure_binary('image/seq_id')
      max_num_elements = 10
      num_shards = 1
-      pipeline = add_context_to_examples.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      add_context_to_examples.construct_pipeline(
+          p,
          input_tfrecord,
          output_tfrecord,
          sequence_key,
          max_num_elements_in_context_features=max_num_elements,
          num_shards=num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
@@ -355,20 +358,23 @@ class GenerateContextDataTest(tf.test.TestCase):
    with InMemoryTFRecord(
        [self._create_first_tf_example(),
         self._create_second_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      sequence_key = six.ensure_binary('image/seq_id')
      max_num_elements = 10
      num_shards = 1
-      pipeline = add_context_to_examples.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      add_context_to_examples.construct_pipeline(
+          p,
          input_tfrecord,
          output_tfrecord,
          sequence_key,
          max_num_elements_in_context_features=max_num_elements,
          num_shards=num_shards,
          output_type='tf_sequence_example')
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(

--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -33,32 +33,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import hashlib
 import io
 import json
 import logging
 import os
 from absl import app
-from absl import flags
 import apache_beam as beam
 import numpy as np
 import PIL.Image
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
 from object_detection.utils import dataset_util
-flags.DEFINE_string('image_directory', None, 'Directory where images are '
-                    'stored')
-flags.DEFINE_string('output_tfrecord_prefix', None,
-                    'TFRecord containing images in tf.Example format.')
-flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
-                    'style annotations file')
-flags.DEFINE_integer('num_images_per_shard',
-                     200,
-                     'The number of  images to be stored in each shard.')
-FLAGS = flags.FLAGS
 class ParseImage(beam.DoFn):
  """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
@@ -243,13 +230,14 @@ class ParseImage(beam.DoFn):
    return [(example)]
-def _load_json_data(data_file):
+def load_json_data(data_file):
  with tf.io.gfile.GFile(data_file, 'r') as fid:
    data_dict = json.load(fid)
  return data_dict
-def create_pipeline(image_directory,
+def create_pipeline(pipeline,
+                    image_directory,
                    input_annotations_file,
                    output_tfrecord_prefix=None,
                    num_images_per_shard=200,
@@ -257,68 +245,97 @@ def create_pipeline(image_directory,
  """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
  Args:
+    pipeline: Initialized beam pipeline.
    image_directory: Path to image directory
    input_annotations_file: Path to a coco-cameratraps annotation file
    output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
      be named {output_tfrecord_prefix}@N.
    num_images_per_shard: The number of images to store in each shard
    keep_bboxes: Whether to keep any bounding boxes that exist in the json file
-  Returns:
-    A Beam pipeline.
  """
  logging.info('Reading data from COCO-CameraTraps Dataset.')
-  data = _load_json_data(input_annotations_file)
+  data = load_json_data(input_annotations_file)
  num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))
-  def pipeline(root):
+  image_examples = (
-    """Builds beam pipeline."""
+      pipeline | ('CreateCollections') >> beam.Create(
+          [im['id'] for im in data['images']])
+      | ('ParseImage') >> beam.ParDo(ParseImage(
+          image_directory, data['images'], data['annotations'],
+          data['categories'], keep_bboxes=keep_bboxes)))
+  _ = (image_examples
+       | ('Reshuffle') >> beam.Reshuffle()
+       | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
+           output_tfrecord_prefix,
+           num_shards=num_shards,
+           coder=beam.coders.ProtoCoder(tf.train.Example)))
-    image_examples = (
-        root
-        | ('CreateCollections') >> beam.Create(
-            [im['id'] for im in data['images']])
-        | ('ParseImage') >> beam.ParDo(ParseImage(
-            image_directory, data['images'], data['annotations'],
-            data['categories'], keep_bboxes=keep_bboxes)))
-    _ = (image_examples
-         | ('Reshuffle') >> beam.Reshuffle()
-         | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
-             output_tfrecord_prefix,
-             num_shards=num_shards,
-             coder=beam.coders.ProtoCoder(tf.train.Example)))
-  return pipeline
+def parse_args(argv):
+  """Command-line argument parser.
+  Args:
-def main(_):
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
+  """
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--image_directory',
+      dest='image_directory',
+      required=True,
+      help='Path to the directory where the images are stored.')
+  parser.add_argument(
+      '--output_tfrecord_prefix',
+      dest='output_tfrecord_prefix',
+      required=True,
+      help='Path and prefix to store TFRecords containing images in tf.Example'
+      'format.')
+  parser.add_argument(
+      '--input_annotations_file',
+      dest='input_annotations_file',
+      required=True,
+      help='Path to Coco-CameraTraps style annotations file.')
+  parser.add_argument(
+      '--num_images_per_shard',
+      dest='num_images_per_shard',
+      default=200,
+      help='The number of  images to be stored in each outputshard.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.
  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
+  args, pipeline_args = parse_args(argv)
-  # must create before flags are used
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
-  runner = runners.DirectRunner()
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
-  dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
+  dirname = os.path.dirname(args.output_tfrecord_prefix)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
+  p = beam.Pipeline(options=pipeline_options)
-      create_pipeline(
+  create_pipeline(
-          image_directory=FLAGS.image_directory,
+      pipeline=p,
-          input_annotations_file=FLAGS.input_annotations_file,
+      image_directory=args.image_directory,
-          output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
+      input_annotations_file=args.input_annotations_file,
-          num_images_per_shard=FLAGS.num_images_per_shard))
+      output_tfrecord_prefix=args.output_tfrecord_prefix,
+      num_images_per_shard=args.num_images_per_shard)
+  p.run()
 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'image_directory',
-      'input_annotations_file',
-      'output_tfrecord_prefix'
-  ])
  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
@@ -21,13 +21,14 @@ import json
 import os
 import tempfile
 import unittest
+import apache_beam as beam
 import numpy as np
 from PIL import Image
 import tensorflow.compat.v1 as tf
 from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
 from object_detection.utils import tf_version
-from apache_beam import runners
 @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
@@ -95,13 +96,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    # Check other essential attributes.
    self.assertAllEqual(
@@ -112,7 +113,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        [self.IMAGE_WIDTH])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['im_0'])
+        [b'im_0'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)
@@ -134,13 +135,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    # Check other essential attributes.
    self.assertAllEqual(
@@ -151,21 +152,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        [self.IMAGE_WIDTH])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['im_0'])
+        [b'im_0'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)
  def test_beam_pipeline(self):
-    runner = runners.DirectRunner()
    num_frames = 1
    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
    json_path = self._create_json_file(temp_dir, num_frames)
    output_tfrecord = temp_dir+'/output'
    self._write_random_images_to_directory(temp_dir, num_frames)
-    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+    pipeline_options = beam.options.pipeline_options.PipelineOptions(
-        temp_dir, json_path,
+        runner='DirectRunner')
+    p = beam.Pipeline(options=pipeline_options)
+    create_cococameratraps_tfexample_main.create_pipeline(
+        p, temp_dir, json_path,
        output_tfrecord_prefix=output_tfrecord)
-    runner.run(pipeline)
+    p.run()
    filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
    actual_output = []
    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
@@ -176,17 +179,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        actual_output[0]))
  def test_beam_pipeline_bbox(self):
-    runner = runners.DirectRunner()
    num_frames = 1
    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
    json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
    output_tfrecord = temp_dir+'/output'
    self._write_random_images_to_directory(temp_dir, num_frames)
-    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
+    pipeline_options = beam.options.pipeline_options.PipelineOptions(
-        temp_dir, json_path,
+        runner='DirectRunner')
+    p = beam.Pipeline(options=pipeline_options)
+    create_cococameratraps_tfexample_main.create_pipeline(
+        p, temp_dir, json_path,
        output_tfrecord_prefix=output_tfrecord,
        keep_bboxes=True)
-    runner.run(pipeline)
+    p.run()
    filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
    actual_output = []
    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])

--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -45,26 +45,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import os
 import threading
 from absl import app
-from absl import flags
 import apache_beam as beam
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
-flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
-                    'images in tf.Example format for object detection.')
-flags.DEFINE_string('detection_output_tfrecord', None,
-                    'TFRecord containing detections in tf.Example format.')
-flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
-                    'an object detection SavedModel.')
-flags.DEFINE_float('confidence_threshold', 0.9,
-                   'Min confidence to keep bounding boxes')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-FLAGS = flags.FLAGS
 class GenerateDetectionDataFn(beam.DoFn):
@@ -205,58 +191,103 @@ class GenerateDetectionDataFn(beam.DoFn):
    return [example]
-def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
                       confidence_threshold, num_shards):
  """Returns a Beam pipeline to run object detection inference.
  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: A TFRecord of tf.train.Example protos containing images.
    output_tfrecord: A TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
    model_dir: Path to `saved_model` to use for inference.
    confidence_threshold: Threshold to use when keeping detection results.
    num_shards: The number of output shards.
+  """
+  input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
+          coder=beam.coders.BytesCoder()))
+  output_collection = input_collection | 'RunInference' >> beam.ParDo(
+      GenerateDetectionDataFn(model_dir, confidence_threshold))
+  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      output_tfrecord,
+      num_shards=num_shards,
+      coder=beam.coders.ProtoCoder(tf.train.Example))
+def parse_args(argv):
+  """Command-line argument parser.
+  Args:
+    argv: command line arguments
  Returns:
-    pipeline: A Beam pipeline.
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
  """
-  def pipeline(root):
+  parser = argparse.ArgumentParser()
-    input_collection = (
+  parser.add_argument(
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+      '--detection_input_tfrecord',
-            input_tfrecord,
+      dest='detection_input_tfrecord',
-            coder=beam.coders.BytesCoder()))
+      required=True,
-    output_collection = input_collection | 'RunInference' >> beam.ParDo(
+      help='TFRecord containing images in tf.Example format for object '
-        GenerateDetectionDataFn(model_dir, confidence_threshold))
+      'detection.')
-    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+  parser.add_argument(
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      '--detection_output_tfrecord',
-        output_tfrecord,
+      dest='detection_output_tfrecord',
-        num_shards=num_shards,
+      required=True,
-        coder=beam.coders.ProtoCoder(tf.train.Example))
+      help='TFRecord containing detections in tf.Example format.')
-  return pipeline
+  parser.add_argument(
+      '--detection_model_dir',
+      dest='detection_model_dir',
-def main(_):
+      required=True,
+      help='Path to directory containing an object detection SavedModel.')
+  parser.add_argument(
+      '--confidence_threshold',
+      dest='confidence_threshold',
+      default=0.9,
+      help='Min confidence to keep bounding boxes.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.
  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
-  # must create before flags are used
-  runner = runners.DirectRunner()
-  dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
+  args, pipeline_args = parse_args(argv)
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
+  dirname = os.path.dirname(args.detection_output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.detection_input_tfrecord,
+  p = beam.Pipeline(options=pipeline_options)
-                         FLAGS.detection_output_tfrecord,
-                         FLAGS.detection_model_dir,
+  construct_pipeline(
-                         FLAGS.confidence_threshold,
+      p,
-                         FLAGS.num_shards))
+      args.detection_input_tfrecord,
+      args.detection_output_tfrecord,
+      args.detection_model_dir,
+      args.confidence_threshold,
+      args.num_shards)
+  p.run()
 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'detection_input_tfrecord',
-      'detection_output_tfrecord',
-      'detection_model_dir'
-  ])
  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -22,6 +22,7 @@ import contextlib
 import os
 import tempfile
 import unittest
+import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -32,7 +33,6 @@ from object_detection.core import model
 from object_detection.dataset_tools.context_rcnn import generate_detection_data
 from object_detection.protos import pipeline_pb2
 from object_detection.utils import tf_version
-from apache_beam import runners
 if six.PY2:
  import mock  # pylint: disable=g-import-not-at-top
@@ -67,6 +67,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
  def loss(self, prediction_dict, true_image_shapes):
    pass
@@ -243,16 +246,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):
  def test_beam_pipeline(self):
    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      saved_model_path = self._export_saved_model()
      confidence_threshold = 0.8
      num_shards = 1
-      pipeline = generate_detection_data.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
-          input_tfrecord, output_tfrecord, saved_model_path,
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      generate_detection_data.construct_pipeline(
+          p, input_tfrecord, output_tfrecord, saved_model_path,
          confidence_threshold, num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])

--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -47,34 +47,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import datetime
 import os
 import threading
 from absl import app
-from absl import flags
 import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
-flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
-                    'images in tf.Example format for object detection.')
-flags.DEFINE_string('embedding_output_tfrecord', None,
-                    'TFRecord containing embeddings in tf.Example format.')
-flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
-                    'an object detection SavedModel with'
-                    'detection_box_classifier_features in the output.')
-flags.DEFINE_integer('top_k_embedding_count', 1,
-                     'The number of top k embeddings to add to the memory bank.'
-                    )
-flags.DEFINE_integer('bottom_k_embedding_count', 0,
-                     'The number of bottom k embeddings to add to the memory '
-                     'bank.')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-FLAGS = flags.FLAGS
 class GenerateEmbeddingDataFn(beam.DoFn):
@@ -321,12 +304,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
    return [example]
-def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
                       top_k_embedding_count, bottom_k_embedding_count,
                       num_shards):
  """Returns a beam pipeline to run object detection inference.
  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
@@ -335,44 +319,96 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
    bottom_k_embedding_count: The number of low-confidence embeddings to store.
    num_shards: The number of output shards.
  """
-  def pipeline(root):
+  input_collection = (
-    input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
-            input_tfrecord,
+          coder=beam.coders.BytesCoder()))
-            coder=beam.coders.BytesCoder()))
+  output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
-    output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
+      GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
-        GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
+                              bottom_k_embedding_count))
-                                bottom_k_embedding_count))
+  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
-    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      output_tfrecord,
-        output_tfrecord,
+      num_shards=num_shards,
-        num_shards=num_shards,
+      coder=beam.coders.ProtoCoder(tf.train.Example))
-        coder=beam.coders.ProtoCoder(tf.train.Example))
-  return pipeline
+def parse_args(argv):
+  """Command-line argument parser.
-def main(_):
+  Args:
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
+  """
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--embedding_input_tfrecord',
+      dest='embedding_input_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format for object '
+      'detection.')
+  parser.add_argument(
+      '--embedding_output_tfrecord',
+      dest='embedding_output_tfrecord',
+      required=True,
+      help='TFRecord containing embeddings in tf.Example format.')
+  parser.add_argument(
+      '--embedding_model_dir',
+      dest='embedding_model_dir',
+      required=True,
+      help='Path to directory containing an object detection SavedModel with'
+      'detection_box_classifier_features in the output.')
+  parser.add_argument(
+      '--top_k_embedding_count',
+      dest='top_k_embedding_count',
+      default=1,
+      help='The number of top k embeddings to add to the memory bank.')
+  parser.add_argument(
+      '--bottom_k_embedding_count',
+      dest='bottom_k_embedding_count',
+      default=0,
+      help='The number of bottom k embeddings to add to the memory bank.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.
  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
-  # must create before flags are used
+  args, pipeline_args = parse_args(argv)
-  runner = runners.DirectRunner()
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
-  dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
+  dirname = os.path.dirname(args.embedding_output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.embedding_input_tfrecord,
-                         FLAGS.embedding_output_tfrecord,
-                         FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
-                         FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
+  p = beam.Pipeline(options=pipeline_options)
+  construct_pipeline(
+      p,
+      args.embedding_input_tfrecord,
+      args.embedding_output_tfrecord,
+      args.embedding_model_dir,
+      args.top_k_embedding_count,
+      args.bottom_k_embedding_count,
+      args.num_shards)
+  p.run()
 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'embedding_input_tfrecord',
-      'embedding_output_tfrecord',
-      'embedding_model_dir'
-  ])
  app.run(main)
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -21,6 +21,7 @@ import contextlib
 import os
 import tempfile
 import unittest
+import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -30,7 +31,7 @@ from object_detection.core import model
 from object_detection.dataset_tools.context_rcnn import generate_embedding_data
 from object_detection.protos import pipeline_pb2
 from object_detection.utils import tf_version
-from apache_beam import runners
 if six.PY2:
  import mock  # pylint: disable=g-import-not-at-top
@@ -73,6 +74,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
  def loss(self, prediction_dict, true_image_shapes):
    pass
@@ -236,13 +240,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
        .int64_list.value, [5])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['hyena'])
+        .bytes_list.value, [b'hyena'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [5])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['hyena'])
+        .bytes_list.value, [b'hyena'])
    # Check other essential attributes.
    self.assertAllEqual(
@@ -251,7 +255,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
        example.features.feature['image/width'].int64_list.value, [600])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['image_id'])
+        [b'image_id'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)
@@ -268,7 +272,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
                        .int64_list.value, [5])
    self.assertAllEqual(tf.train.Example.FromString(
        generated_example).features.feature['image/object/class/text']
-                        .bytes_list.value, ['hyena'])
+                        .bytes_list.value, [b'hyena'])
    output = inference_fn.process(generated_example)
    output_example = output[0]
    self.assert_expected_example(output_example)
@@ -304,24 +308,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
        .feature['image/object/class/label'].int64_list.value, [5])
    self.assertAllEqual(
        tf.train.Example.FromString(generated_example).features
-        .feature['image/object/class/text'].bytes_list.value, ['hyena'])
+        .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
    output = inference_fn.process(generated_example)
    output_example = output[0]
    self.assert_expected_example(output_example, botk=True)
  def test_beam_pipeline(self):
    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      saved_model_path = self._export_saved_model()
      top_k_embedding_count = 1
      bottom_k_embedding_count = 0
      num_shards = 1
-      pipeline = generate_embedding_data.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
-          input_tfrecord, output_tfrecord, saved_model_path,
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      generate_embedding_data.construct_pipeline(
+          p, input_tfrecord, output_tfrecord, saved_model_path,
          top_k_embedding_count, bottom_k_embedding_count, num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(
          output_tfrecord + '-?????-of-?????')
      actual_output = []

--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 r"""Convert raw COCO dataset to TFRecord for object_detection.
+This tool supports data generation for object detection (boxes, masks),
+keypoint detection, and DensePose.
 Please note that this tool creates sharded output files.
 Example usage:
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
                       'Training annotations JSON file.')
 tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
                       'Validation annotations JSON file.')
+# DensePose is only available for coco 2014.
+tf.flags.DEFINE_string('train_densepose_annotations_file', '',
+                       'Training annotations JSON file for DensePose.')
+tf.flags.DEFINE_string('val_densepose_annotations_file', '',
+                       'Validation annotations JSON file for DensePose.')
 tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+# Whether to only produce images/annotations on person class (for keypoint /
+# densepose task).
+tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
+                        'remove all annotations for non-person objects.')
+tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
+                        'remove all examples that do not contain a person.')
 FLAGS = flags.FLAGS
@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
    b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
 ]
+_COCO_PART_NAMES = [
+    b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+    b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+    b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+    b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+    b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+    b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+    b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+    b'left_face',
+]
+_DP_PART_ID_OFFSET = 1
+def clip_to_unit(x):
+  return min(max(x, 0.0), 1.0)
 def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
-                      keypoint_annotations_dict=None):
+                      keypoint_annotations_dict=None,
+                      densepose_annotations_dict=None,
+                      remove_non_person_annotations=False,
+                      remove_non_person_images=False):
  """Converts image and annotations to a tf.Example proto.
  Args:
@@ -108,10 +142,23 @@ def create_tf_example(image,
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
+    densepose_annotations_dict: A dictionary that maps from annotation_id to a
+      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
+      representing part surface coordinates. For more information see
+      http://densepose.org/.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.
  Returns:
+    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+    num_keypoint_annotation_skipped: Number of keypoint annotations that were
+      skipped.
+    num_densepose_annotation_skipped: Number of DensePose annotations that were
+      skipped.
  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
@@ -146,6 +193,16 @@ def create_tf_example(image,
  num_annotations_skipped = 0
  num_keypoint_annotation_used = 0
  num_keypoint_annotation_skipped = 0
+  dp_part_index = []
+  dp_x = []
+  dp_y = []
+  dp_u = []
+  dp_v = []
+  dp_num_points = []
+  densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
+  include_densepose = densepose_annotations_dict is not None
+  num_densepose_annotation_used = 0
+  num_densepose_annotation_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
@@ -154,14 +211,18 @@ def create_tf_example(image,
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
+    category_id = int(object_annotations['category_id'])
+    category_name = category_index[category_id]['name'].encode('utf8')
+    if remove_non_person_annotations and category_name != b'person':
+      num_annotations_skipped += 1
+      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
-    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
-    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    category_names.append(category_name)
    area.append(object_annotations['area'])
    if include_masks:
@@ -197,6 +258,40 @@ def create_tf_example(image,
        keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
        keypoints_name.extend(_COCO_KEYPOINT_NAMES)
        num_keypoints.append(0)
+    if include_densepose:
+      annotation_id = object_annotations['id']
+      if (annotation_id in densepose_annotations_dict and
+          all(key in densepose_annotations_dict[annotation_id]
+              for key in densepose_keys)):
+        dp_annotations = densepose_annotations_dict[annotation_id]
+        num_densepose_annotation_used += 1
+        dp_num_points.append(len(dp_annotations['dp_I']))
+        dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
+                              for i in dp_annotations['dp_I']])
+        # DensePose surface coordinates are defined on a [256, 256] grid
+        # relative to each instance box (i.e. absolute coordinates in range
+        # [0., 256.]). The following converts the coordinates
+        # so that they are expressed in normalized image coordinates.
+        dp_x_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
+        dp_x_norm = [(float(x) + x_box_rel * width) / image_width
+                     for x_box_rel in dp_x_box_rel]
+        dp_y_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
+        dp_y_norm = [(float(y) + y_box_rel * height) / image_height
+                     for y_box_rel in dp_y_box_rel]
+        dp_x.extend(dp_x_norm)
+        dp_y.extend(dp_y_norm)
+        dp_u.extend(dp_annotations['dp_U'])
+        dp_v.extend(dp_annotations['dp_V'])
+      else:
+        dp_num_points.append(0)
+  if (remove_non_person_images and
+      not any(name == b'person' for name in category_names)):
+    return (key, None, num_annotations_skipped,
+            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
@@ -243,15 +338,34 @@ def create_tf_example(image,
        dataset_util.bytes_list_feature(keypoints_name))
    num_keypoint_annotation_skipped = (
        len(keypoint_annotations_dict) - num_keypoint_annotation_used)
+  if include_densepose:
+    feature_dict['image/object/densepose/num'] = (
+        dataset_util.int64_list_feature(dp_num_points))
+    feature_dict['image/object/densepose/part_index'] = (
+        dataset_util.int64_list_feature(dp_part_index))
+    feature_dict['image/object/densepose/x'] = (
+        dataset_util.float_list_feature(dp_x))
+    feature_dict['image/object/densepose/y'] = (
+        dataset_util.float_list_feature(dp_y))
+    feature_dict['image/object/densepose/u'] = (
+        dataset_util.float_list_feature(dp_u))
+    feature_dict['image/object/densepose/v'] = (
+        dataset_util.float_list_feature(dp_v))
+    num_densepose_annotation_skipped = (
+        len(densepose_annotations_dict) - num_densepose_annotation_used)
  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
-  return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
+  return (key, example, num_annotations_skipped,
+          num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
 def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards,
-                                            keypoint_annotations_file=''):
+                                            keypoint_annotations_file='',
+                                            densepose_annotations_file='',
+                                            remove_non_person_annotations=False,
+                                            remove_non_person_images=False):
  """Loads COCO annotation json files and converts to tf.Record format.
  Args:
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
    keypoint_annotations_file: JSON file containing the person keypoint
      annotations. If empty, then no person keypoint annotations will be
      generated.
+    densepose_annotations_file: JSON file containing the DensePose annotations.
+      If empty, then no DensePose annotations will be generated.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
-    logging.info('%d images are missing annotations.', missing_annotation_count)
+    logging.info('%d images are missing annotations.',
+                 missing_annotation_count)
    keypoint_annotations_index = {}
    if keypoint_annotations_file:
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
            keypoint_annotations_index[image_id] = {}
          keypoint_annotations_index[image_id][annotation['id']] = annotation
+    densepose_annotations_index = {}
+    if densepose_annotations_file:
+      with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
+        densepose_groundtruth_data = json.load(fid)
+      if 'annotations' in densepose_groundtruth_data:
+        for annotation in densepose_groundtruth_data['annotations']:
+          image_id = annotation['image_id']
+          if image_id not in densepose_annotations_index:
+            densepose_annotations_index[image_id] = {}
+          densepose_annotations_index[image_id][annotation['id']] = annotation
    total_num_annotations_skipped = 0
    total_num_keypoint_annotations_skipped = 0
+    total_num_densepose_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(images))
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
        keypoint_annotations_dict = {}
        if image['id'] in keypoint_annotations_index:
          keypoint_annotations_dict = keypoint_annotations_index[image['id']]
-      (_, tf_example, num_annotations_skipped,
+      densepose_annotations_dict = None
-       num_keypoint_annotations_skipped) = create_tf_example(
+      if densepose_annotations_file:
+        densepose_annotations_dict = {}
+        if image['id'] in densepose_annotations_index:
+          densepose_annotations_dict = densepose_annotations_index[image['id']]
+      (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
+       num_densepose_annotations_skipped) = create_tf_example(
           image, annotations_list, image_dir, category_index, include_masks,
-           keypoint_annotations_dict)
+           keypoint_annotations_dict, densepose_annotations_dict,
+           remove_non_person_annotations, remove_non_person_images)
      total_num_annotations_skipped += num_annotations_skipped
      total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
+      total_num_densepose_annotations_skipped += (
+          num_densepose_annotations_skipped)
      shard_idx = idx % num_shards
-      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+      if tf_example:
+        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    logging.info('Finished writing, skipped %d annotations.',
                 total_num_annotations_skipped)
    if keypoint_annotations_file:
      logging.info('Finished writing, skipped %d keypoint annotations.',
                   total_num_keypoint_annotations_skipped)
+    if densepose_annotations_file:
+      logging.info('Finished writing, skipped %d DensePose annotations.',
+                   total_num_densepose_annotations_skipped)
 def main(_):
@@ -347,20 +492,26 @@ def main(_):
      train_output_path,
      FLAGS.include_masks,
      num_shards=100,
-      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
+      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.train_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.val_annotations_file,
      FLAGS.val_image_dir,
      val_output_path,
      FLAGS.include_masks,
-      num_shards=100,
+      num_shards=50,
-      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
+      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.val_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.testdev_annotations_file,
      FLAGS.test_image_dir,
      testdev_output_path,
      FLAGS.include_masks,
-      num_shards=100)
+      num_shards=50)
 if __name__ == '__main__':

--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }
    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index)
    self.assertEqual(num_annotations_skipped, 0)
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }
    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index, include_masks=True)
    self.assertEqual(num_annotations_skipped, 0)
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        }
    }
-    (_, example, _,
+    _, example, _, num_keypoint_annotation_skipped, _ = (
-     num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
+        create_coco_tf_record.create_tf_example(
-         image,
+            image,
-         annotations_list,
+            annotations_list,
-         image_dir,
+            image_dir,
-         category_index,
+            category_index,
-         include_masks=False,
+            include_masks=False,
-         keypoint_annotations_dict=keypoint_annotations_dict)
+            keypoint_annotations_dict=keypoint_annotations_dict))
    self.assertEqual(num_keypoint_annotation_skipped, 0)
    self._assertProtoEqual(
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        example.features.feature[
            'image/object/keypoint/visibility'].int64_list.value, vv)
+  def test_create_tf_example_with_dense_pose(self):
+    image_dir = self.get_temp_dir()
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
+        np.uint8)
+    save_path = os.path.join(image_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+    image = {
+        'file_name': image_file_name,
+        'height': 256,
+        'width': 256,
+        'id': 11,
+    }
+    min_x, min_y = 64, 64
+    max_x, max_y = 128, 128
+    keypoints = []
+    num_visible_keypoints = 0
+    xv = []
+    yv = []
+    vv = []
+    for _ in range(17):
+      xc = min_x + int(np.random.rand()*(max_x - min_x))
+      yc = min_y + int(np.random.rand()*(max_y - min_y))
+      vis = np.random.randint(0, 3)
+      xv.append(xc)
+      yv.append(yc)
+      vv.append(vis)
+      keypoints.extend([xc, yc, vis])
+      num_visible_keypoints += (vis > 0)
+    annotations_list = [{
+        'area': 0.5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [64, 64, 128, 128],
+        'category_id': 1,
+        'id': 1000
+    }]
+    num_points = 45
+    dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
+    dp_u = np.random.randn(num_points)
+    dp_v = np.random.randn(num_points)
+    dp_x = np.random.rand(num_points)*256.
+    dp_y = np.random.rand(num_points)*256.
+    densepose_annotations_dict = {
+        1000: {
+            'dp_I': dp_i,
+            'dp_U': dp_u,
+            'dp_V': dp_v,
+            'dp_x': dp_x,
+            'dp_y': dp_y,
+            'bbox': [64, 64, 128, 128],
+        }
+    }
+    category_index = {
+        1: {
+            'name': 'person',
+            'id': 1
+        }
+    }
+    _, example, _, _, num_densepose_annotation_skipped = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            densepose_annotations_dict=densepose_annotations_dict))
+    self.assertEqual(num_densepose_annotation_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [six.b(image_file_name)])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [six.b(str(image['id']))])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value,
+        [six.b('jpeg')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        [six.b('person')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/densepose/num'].int64_list.value,
+        [num_points])
+    self.assertAllEqual(
+        example.features.feature[
+            'image/object/densepose/part_index'].int64_list.value,
+        dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/u'].float_list.value,
+        dp_u)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/v'].float_list.value,
+        dp_v)
+    expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
+    expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/x'].float_list.value,
+        expected_dp_x)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/y'].float_list.value,
+        expected_dp_y)
  def test_create_sharded_tf_record(self):
    tmp_dir = self.get_temp_dir()
    image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']

--- a/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat
+++ b/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat
--- a/research/object_detection/dataset_tools/seq_example_util_test.py
+++ b/research/object_detection/dataset_tools/seq_example_util_test.py
@@ -288,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75, 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
-        ['cat', 'frog'],
+        [b'cat', b'frog'],
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
    self.assertAllClose(
        [0.],
@@ -332,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
-        ['cat'],
+        [b'cat'],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
    self.assertAllClose(
        [],

--- a/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
      tf_record_path = '{}-{:05d}-of-00010'.format(
          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
      records = list(tf.python_io.tf_record_iterator(tf_record_path))
-      self.assertAllEqual(records, ['test_{}'.format(idx)])
+      self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
 if __name__ == '__main__':

--- a/research/object_detection/dockerfiles/1.15/Dockerfile
+++ b/research/object_detection/dockerfiles/1.15/Dockerfile
--- a/research/object_detection/dockerfiles/1.15/README.md
+++ b/research/object_detection/dockerfiles/1.15/README.md
--- a/research/object_detection/dockerfiles/2.2/Dockerfile
+++ b/research/object_detection/dockerfiles/2.2/Dockerfile
--- a/research/object_detection/dockerfiles/2.2/README.md
+++ b/research/object_detection/dockerfiles/2.2/README.md