Object detection changes: (#7208)

257914648 by lzc: Internal changes -- 257525973 by Zhichao Lu: Fixes bug that silently prevents checkpoints from loading when training w/ eager + functions. Also sets up scripts to run training. -- 257296614 by Zhichao Lu: Adding detection_features to model outputs -- 257234565 by Zhichao Lu: Fix wrong order of `classes_with_max_scores` in class-agnostic NMS caused by sorting in partitioned-NMS. -- 257232002 by ronnyvotel: Supporting `filter_nonoverlapping` option in np_box_list_ops.clip_to_window(). -- 257198282 by Zhichao Lu: Adding the focal loss and l1 loss from the Objects as Points paper. -- 257089535 by Zhichao Lu: Create Keras based ssd + resnetv1 + fpn. -- 257087407 by Zhichao Lu: Make object_detection/data_decoders Python3-compatible. -- 257004582 by Zhichao Lu: Updates _decode_raw_data_into_masks_and_boxes to the latest binary masks-to-string encoding fo...

Object detection changes: (#7208)
257914648 by lzc: Internal changes -- 257525973 by Zhichao Lu: Fixes bug that silently prevents checkpoints from loading when training w/ eager + functions. Also sets up scripts to run training. -- 257296614 by Zhichao Lu: Adding detection_features to model outputs -- 257234565 by Zhichao Lu: Fix wrong order of `classes_with_max_scores` in class-agnostic NMS caused by sorting in partitioned-NMS. -- 257232002 by ronnyvotel: Supporting `filter_nonoverlapping` option in np_box_list_ops.clip_to_window(). -- 257198282 by Zhichao Lu: Adding the focal loss and l1 loss from the Objects as Points paper. -- 257089535 by Zhichao Lu: Create Keras based ssd + resnetv1 + fpn. -- 257087407 by Zhichao Lu: Make object_detection/data_decoders Python3-compatible. -- 257004582 by Zhichao Lu: Updates _decode_raw_data_into_masks_and_boxes to the latest binary masks-to-string encoding fo...
fe748d4a · pkulzc · GitHub · 81123ebf · fe748d4a · fe748d4a
Unverified Commit fe748d4a authored Jul 15, 2019 by pkulzc Committed by GitHub Jul 15, 2019
20 changed files
--- a/research/object_detection/core/matcher.py
+++ b/research/object_detection/core/matcher.py
@@ -31,7 +31,12 @@ consider this box a positive example (match) nor a negative example (no match).
 The Match class is used to store the match results and it provides simple apis
 to query the results.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import abc
+import six
 import tensorflow as tf
 from object_detection.utils import ops
@@ -210,10 +215,9 @@ class Match(object):
    return gathered_tensor
-class Matcher(object):
+class Matcher(six.with_metaclass(abc.ABCMeta, object)):
  """Abstract base class for matcher.
  """
-  __metaclass__ = abc.ABCMeta
  def __init__(self, use_matmul_gather=False):
    """Constructs a Matcher.

--- a/research/object_detection/core/minibatch_sampler.py
+++ b/research/object_detection/core/minibatch_sampler.py
@@ -28,17 +28,21 @@ Subclasses should implement the Subsample function and can make use of the
 @staticmethod SubsampleIndicator.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 from abc import ABCMeta
 from abc import abstractmethod
+import six
 import tensorflow as tf
 from object_detection.utils import ops
-class MinibatchSampler(object):
+class MinibatchSampler(six.with_metaclass(ABCMeta, object)):
  """Abstract base class for subsampling minibatches."""
-  __metaclass__ = ABCMeta
  def __init__(self):
    """Constructs a minibatch sampler."""

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -54,7 +54,12 @@ By default, DetectionModels produce bounding box detections; However, we support
 a handful of auxiliary annotations associated with each bounding box, namely,
 instance masks and keypoints.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import abc
+import six
 import tensorflow as tf
 from object_detection.core import standard_fields as fields
@@ -68,12 +73,11 @@ except AttributeError:
  _BaseClass = object
-class DetectionModel(_BaseClass):
+class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
  """Abstract base class for detection models.
  Extends tf.Module to guarantee variable tracking.
  """
-  __metaclass__ = abc.ABCMeta
  def __init__(self, num_classes):
    """Constructor.

--- a/research/object_detection/core/multiclass_nms_test.py
+++ b/research/object_detection/core/multiclass_nms_test.py
@@ -521,5 +521,6 @@ class MulticlassNonMaxSuppressionTest(test_case.TestCase):
      self.assertAllClose(nms_classes_output, exp_nms_classes)
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/post_processing.py
+++ b/research/object_detection/core/post_processing.py
--- a/research/object_detection/core/prefetcher_test.py
+++ b/research/object_detection/core/prefetcher_test.py
@@ -14,6 +14,11 @@
 # ==============================================================================
 """Tests for object_detection.core.prefetcher."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import range
 import tensorflow as tf
 from object_detection.core import prefetcher

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -63,9 +63,16 @@ we pass it to the functions. At the end of the preprocess we expand the image
 back to rank 4.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import functools
 import inspect
 import sys
+import six
+from six.moves import range
+from six.moves import zip
 import tensorflow as tf
 from tensorflow.python.ops import control_flow_ops
@@ -75,6 +82,7 @@ from object_detection.core import box_list_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import preprocessor_cache
 from object_detection.core import standard_fields as fields
+from object_detection.utils import autoaugment_utils
 from object_detection.utils import shape_utils
@@ -344,6 +352,112 @@ def retain_boxes_above_threshold(boxes,
    return result
+def drop_label_probabilistically(boxes,
+                                 labels,
+                                 label_weights,
+                                 label_confidences=None,
+                                 multiclass_scores=None,
+                                 masks=None,
+                                 keypoints=None,
+                                 dropped_label=None,
+                                 drop_probability=0.0,
+                                 seed=None):
+  """Drops boxes of a certain label with probability drop_probability.
+  Boxes of the label dropped_label will not appear in the returned tensor.
+  Args:
+    boxes: float32 tensor of shape [num_instance, 4] representing boxes
+      location in normalized coordinates.
+    labels: rank 1 int32 tensor of shape [num_instance] containing the object
+      classes.
+    label_weights: float32 tensor of shape [num_instance] representing the
+      weight for each box.
+    label_confidences: float32 tensor of shape [num_instance] representing the
+      confidence for each box.
+    multiclass_scores: (optional) float32 tensor of shape
+      [num_instances, num_classes] representing the score for each box for each
+      class.
+    masks: (optional) rank 3 float32 tensor with shape
+      [num_instances, height, width] containing instance masks. The masks are of
+      the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+    dropped_label: int32 id of label to drop.
+    drop_probability: float32 probability of dropping a label.
+    seed: random seed.
+  Returns:
+    retained_boxes: [num_retained_instance, 4]
+    retianed_labels: [num_retained_instance]
+    retained_label_weights: [num_retained_instance]
+    If multiclass_scores, masks, or keypoints are not None, the function also
+      returns:
+    retained_multiclass_scores: [num_retained_instance, num_classes]
+    retained_masks: [num_retained_instance, height, width]
+    retained_keypoints: [num_retained_instance, num_keypoints, 2]
+  """
+  with tf.name_scope('DropLabelProbabilistically',
+                     values=[boxes, labels]):
+    indices = tf.where(
+        tf.logical_or(
+            tf.random_uniform(tf.shape(labels), seed=seed) > drop_probability,
+            tf.not_equal(labels, dropped_label)))
+    indices = tf.squeeze(indices, axis=1)
+    retained_boxes = tf.gather(boxes, indices)
+    retained_labels = tf.gather(labels, indices)
+    retained_label_weights = tf.gather(label_weights, indices)
+    result = [retained_boxes, retained_labels, retained_label_weights]
+    if label_confidences is not None:
+      retained_label_confidences = tf.gather(label_confidences, indices)
+      result.append(retained_label_confidences)
+    if multiclass_scores is not None:
+      retained_multiclass_scores = tf.gather(multiclass_scores, indices)
+      result.append(retained_multiclass_scores)
+    if masks is not None:
+      retained_masks = tf.gather(masks, indices)
+      result.append(retained_masks)
+    if keypoints is not None:
+      retained_keypoints = tf.gather(keypoints, indices)
+      result.append(retained_keypoints)
+    return result
+def remap_labels(labels,
+                 original_labels=None,
+                 new_label=None):
+  """Remaps labels that have an id in original_labels to new_label.
+  Args:
+    labels: rank 1 int32 tensor of shape [num_instance] containing the object
+      classes.
+      original_labels: int list of original labels that should be mapped from.
+      new_label: int label to map to
+  Returns:
+    Remapped labels
+  """
+  new_labels = labels
+  for original_label in original_labels:
+    change = tf.where(
+        tf.equal(new_labels, original_label),
+        tf.add(tf.zeros_like(new_labels), new_label - original_label),
+        tf.zeros_like(new_labels))
+    new_labels = tf.add(
+        new_labels,
+        change)
+  new_labels = tf.reshape(new_labels, tf.shape(labels))
+  return new_labels
 def _flip_boxes_left_right(boxes):
  """Left-right flip the boxes.
@@ -2170,6 +2284,37 @@ def random_black_patches(image,
    return image
+# TODO(barretzoph): Put in AutoAugment Paper link when paper is live.
+def autoaugment_image(image, boxes, policy_name='v0'):
+  """Apply an autoaugment policy to the image and boxes.
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 255].
+    boxes: rank 2 float32 tensor containing the bounding boxes with shape
+           [num_instances, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    policy_name: The name of the AutoAugment policy to use. The available
+      options are `v0`, `v1`, `v2`, `v3` and `test`. `v0` is the policy used for
+      all of the results in the paper and was found to achieve the best results
+      on the COCO dataset. `v1`, `v2` and `v3` are additional good policies
+      found on the COCO dataset that have slight variation in what operations
+      were used during the search procedure along with how many operations are
+      applied in parallel to a single image (2 vs 3).
+  Returns:
+    image: the augmented image.
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form. boxes will have been augmented along with image.
+  """
+  return autoaugment_utils.distort_image_with_autoaugment(
+      image, boxes, policy_name)
 def image_to_float(image):
  """Used in Faster R-CNN. Casts image pixel values to float.
@@ -3393,6 +3538,8 @@ def get_default_func_arg_map(include_label_weights=True,
          groundtruth_keypoints,
      ),
      random_black_patches: (fields.InputDataFields.image,),
+      autoaugment_image: (fields.InputDataFields.image,
+                          fields.InputDataFields.groundtruth_boxes,),
      retain_boxes_above_threshold: (
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_classes,
@@ -3402,6 +3549,16 @@ def get_default_func_arg_map(include_label_weights=True,
          groundtruth_instance_masks,
          groundtruth_keypoints,
      ),
+      drop_label_probabilistically: (
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          groundtruth_label_weights,
+          groundtruth_label_confidences,
+          multiclass_scores,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,
+      ),
+      remap_labels: (fields.InputDataFields.groundtruth_classes,),
      image_to_float: (fields.InputDataFields.image,),
      random_resize_method: (fields.InputDataFields.image,),
      resize_to_range: (
@@ -3540,9 +3697,15 @@ def preprocess(tensor_dict,
      return tensor_dict[key] if key is not None else None
    args = [get_arg(a) for a in arg_names]
-    if (preprocess_vars_cache is not None and
+    if preprocess_vars_cache is not None:
-        'preprocess_vars_cache' in inspect.getargspec(func).args):
+      if six.PY2:
-      params['preprocess_vars_cache'] = preprocess_vars_cache
+        # pylint: disable=deprecated-method
+        arg_spec = inspect.getargspec(func)
+        # pylint: enable=deprecated-method
+      else:
+        arg_spec = inspect.getfullargspec(func)
+      if 'preprocess_vars_cache' in arg_spec.args:
+        params['preprocess_vars_cache'] = preprocess_vars_cache
    results = func(*args, **params)
    if not isinstance(results, (list, tuple)):

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -15,9 +15,15 @@
 """Tests for object_detection.core.preprocessor."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import numpy as np
 import six
+from six.moves import range
+from six.moves import zip
 import tensorflow as tf
 from object_detection.core import preprocessor
@@ -111,6 +117,10 @@ class PreprocessorTest(tf.test.TestCase):
    labels = tf.constant([1, 2], dtype=tf.int32)
    return labels
+  def createTestLabelsLong(self):
+    labels = tf.constant([1, 2, 4], dtype=tf.int32)
+    return labels
  def createTestBoxesOutOfImage(self):
    boxes = tf.constant(
        [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
@@ -294,6 +304,30 @@ class PreprocessorTest(tf.test.TestCase):
  def expectedLabelsAfterThresholdingWithMissingScore(self):
    return tf.constant([2], dtype=tf.float32)
+  def expectedLabelScoresAfterDropping(self):
+    return tf.constant([0.5], dtype=tf.float32)
+  def expectedBoxesAfterDropping(self):
+    return tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+  def expectedLabelsAfterDropping(self):
+    return tf.constant([2], dtype=tf.float32)
+  def expectedMultiClassScoresAfterDropping(self):
+    return tf.constant([[0.5, 0.5]], dtype=tf.float32)
+  def expectedMasksAfterDropping(self):
+    masks = np.array([[[255.0, 255.0, 0.0], [255.0, 255.0, 0.0],
+                       [255.0, 255.0, 0.0]]])
+    return tf.constant(masks, dtype=tf.float32)
+  def expectedKeypointsAfterDropping(self):
+    keypoints = np.array([[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]])
+    return tf.constant(keypoints, dtype=tf.float32)
+  def expectedLabelsAfterRemapping(self):
+    return tf.constant([3, 3, 4], dtype=tf.float32)
  def testRgbToGrayscale(self):
    images = self.createTestImages()
    grayscale_images = preprocessor._rgb_to_grayscale(images)
@@ -400,6 +434,110 @@ class PreprocessorTest(tf.test.TestCase):
      self.assertAllClose(
          retained_keypoints_, expected_retained_keypoints_)
+  def testDropLabelProbabilistically(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    weights = self.createTestGroundtruthWeights()
+    (retained_boxes, retained_labels,
+     retained_weights) = preprocessor.drop_label_probabilistically(
+         boxes, labels, weights, dropped_label=1, drop_probability=1.0)
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_, retained_weights_,
+       expected_retained_boxes_, expected_retained_labels_,
+       expected_retained_weights_) = sess.run([
+           retained_boxes, retained_labels, retained_weights,
+           self.expectedBoxesAfterDropping(),
+           self.expectedLabelsAfterDropping(),
+           self.expectedLabelScoresAfterDropping()
+       ])
+      self.assertAllClose(retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(retained_labels_, expected_retained_labels_)
+      self.assertAllClose(retained_weights_, expected_retained_weights_)
+  def testDropLabelProbabilisticallyWithProbabilityHalf(self):
+    # Boxes contain one box of label 2 and one box of label 1 which should be
+    # dropped ~50% of the time.
+    num_tests = 100
+    total = 0
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    weights = self.createTestGroundtruthWeights()
+    (_, retained_labels, _) = preprocessor.drop_label_probabilistically(
+        boxes, labels, weights, dropped_label=1, drop_probability=0.5)
+    for _ in range(num_tests):
+      with self.test_session() as sess:
+        retained_labels_ = sess.run(retained_labels)
+        total += len(retained_labels_)
+        self.assertIn(2, retained_labels_)
+    av = total * 1.0 / num_tests
+    self.assertGreater(av, 1.40)
+    self.assertLess(av, 1.50)
+  def testDropLabelProbabilisticallyWithMultiClassScores(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    weights = self.createTestGroundtruthWeights()
+    multiclass_scores = self.createTestMultiClassScores()
+    (_, _, _,
+     retained_multiclass_scores) = preprocessor.drop_label_probabilistically(
+         boxes,
+         labels,
+         weights,
+         multiclass_scores=multiclass_scores,
+         dropped_label=1,
+         drop_probability=1.0)
+    with self.test_session() as sess:
+      (retained_multiclass_scores_,
+       expected_retained_multiclass_scores_) = sess.run([
+           retained_multiclass_scores,
+           self.expectedMultiClassScoresAfterDropping()
+       ])
+      self.assertAllClose(retained_multiclass_scores_,
+                          expected_retained_multiclass_scores_)
+  def testDropLabelProbabilisticallyWithMasks(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    weights = self.createTestGroundtruthWeights()
+    masks = self.createTestMasks()
+    (_, _, _, retained_masks) = preprocessor.drop_label_probabilistically(
+        boxes,
+        labels,
+        weights,
+        masks=masks,
+        dropped_label=1,
+        drop_probability=1.0)
+    with self.test_session() as sess:
+      (retained_masks_, expected_retained_masks_) = sess.run(
+          [retained_masks, self.expectedMasksAfterDropping()])
+      self.assertAllClose(retained_masks_, expected_retained_masks_)
+  def testDropLabelProbabilisticallyWithKeypoints(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    weights = self.createTestGroundtruthWeights()
+    keypoints = self.createTestKeypoints()
+    (_, _, _, retained_keypoints) = preprocessor.drop_label_probabilistically(
+        boxes,
+        labels,
+        weights,
+        keypoints=keypoints,
+        dropped_label=1,
+        drop_probability=1.0)
+    with self.test_session() as sess:
+      (retained_keypoints_, expected_retained_keypoints_) = sess.run(
+          [retained_keypoints,
+           self.expectedKeypointsAfterDropping()])
+      self.assertAllClose(retained_keypoints_, expected_retained_keypoints_)
+  def testRemapLabels(self):
+    labels = self.createTestLabelsLong()
+    remapped_labels = preprocessor.remap_labels(labels, [1, 2], 3)
+    with self.test_session() as sess:
+      (remapped_labels_, expected_remapped_labels_) = sess.run(
+          [remapped_labels, self.expectedLabelsAfterRemapping()])
+      self.assertAllClose(remapped_labels_, expected_remapped_labels_)
  def testFlipBoxesLeftRight(self):
    boxes = self.createTestBoxes()
    flipped_boxes = preprocessor._flip_boxes_left_right(boxes)
@@ -2340,6 +2478,33 @@ class PreprocessorTest(tf.test.TestCase):
          [images_shape, blacked_images_shape])
      self.assertAllEqual(images_shape_, blacked_images_shape_)
+  def testAutoAugmentImage(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.autoaugment_image, {
+        'policy_name': 'v1'
+    }))
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    autoaugment_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options)
+    augmented_images = autoaugment_tensor_dict[fields.InputDataFields.image]
+    augmented_boxes = autoaugment_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    images_shape = tf.shape(images)
+    boxes_shape = tf.shape(boxes)
+    augmented_images_shape = tf.shape(augmented_images)
+    augmented_boxes_shape = tf.shape(augmented_boxes)
+    with self.test_session() as sess:
+      (images_shape_, boxes_shape_,
+       augmented_images_shape_, augmented_boxes_shape_) = sess.run(
+           [images_shape, boxes_shape,
+            augmented_images_shape, augmented_boxes_shape])
+      self.assertAllEqual(images_shape_, augmented_images_shape_)
+      self.assertAllEqual(boxes_shape_, augmented_boxes_shape_)
  def testRandomResizeMethodWithCache(self):
    preprocess_options = []
    preprocess_options.append((preprocessor.normalize_image, {

--- a/research/object_detection/core/region_similarity_calculator.py
+++ b/research/object_detection/core/region_similarity_calculator.py
@@ -18,18 +18,22 @@
 Region Similarity Calculators compare a pairwise measure of similarity
 between the boxes in two BoxLists.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 from abc import ABCMeta
 from abc import abstractmethod
+import six
 import tensorflow as tf
 from object_detection.core import box_list_ops
 from object_detection.core import standard_fields as fields
-class RegionSimilarityCalculator(object):
+class RegionSimilarityCalculator(six.with_metaclass(ABCMeta, object)):
  """Abstract base class for region similarity calculator."""
-  __metaclass__ = ABCMeta
  def compare(self, boxlist1, boxlist2, scope=None):
    """Computes matrix of pairwise similarity between BoxLists.
@@ -131,6 +135,7 @@ class ThresholdedIouSimilarity(RegionSimilarityCalculator):
        then the comparison result will be the foreground probability of
        the first box, otherwise it will be zero.
    """
+    super(ThresholdedIouSimilarity, self).__init__()
    self._iou_threshold = iou_threshold
  def _compare(self, boxlist1, boxlist2):

--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -119,6 +119,9 @@ class DetectionResultFields(object):
    raw_detection_boxes: contains decoded detection boxes without Non-Max
      suppression.
    raw_detection_scores: contains class score logits for raw detection boxes.
+    detection_anchor_indices: The anchor indices of the detections after NMS.
+    detection_features: contains extracted features for each detected box
+      after NMS.
  """
  source_id = 'source_id'
@@ -126,6 +129,7 @@ class DetectionResultFields(object):
  detection_boxes = 'detection_boxes'
  detection_scores = 'detection_scores'
  detection_multiclass_scores = 'detection_multiclass_scores'
+  detection_features = 'detection_features'
  detection_classes = 'detection_classes'
  detection_masks = 'detection_masks'
  detection_boundaries = 'detection_boundaries'
@@ -133,6 +137,7 @@ class DetectionResultFields(object):
  num_detections = 'num_detections'
  raw_detection_boxes = 'raw_detection_boxes'
  raw_detection_scores = 'raw_detection_scores'
+  detection_anchor_indices = 'detection_anchor_indices'
 class BoxListFields(object):

--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -31,6 +31,12 @@ Note that TargetAssigners only operate on detections from a single
 image at a time, so any logic for applying a TargetAssigner to multiple
 images must be handled externally.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import range
+from six.moves import zip
 import tensorflow as tf
 from object_detection.box_coders import faster_rcnn_box_coder

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -17,6 +17,11 @@
 A decoder to decode string tensors containing serialized tensorflow.Example
 protos for object detection.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import zip
 import tensorflow as tf
 from object_detection.core import data_decoder

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -16,6 +16,7 @@
 import os
 import numpy as np
+import six
 import tensorflow as tf
 from object_detection.core import standard_fields as fields
@@ -66,9 +67,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
                    dataset_util.bytes_list_feature(
                        [encoded_additional_channel] * 2),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/source_id':
-                    dataset_util.bytes_feature('image_id'),
+                    dataset_util.bytes_feature(six.b('image_id')),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder(
@@ -88,9 +89,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
-                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+                'image/encoded':
-                'image/format': dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(encoded_jpeg),
-                'image/source_id': dataset_util.bytes_feature('image_id'),
+                'image/format':
+                    dataset_util.bytes_feature(six.b('jpeg')),
+                'image/source_id':
+                    dataset_util.bytes_feature(six.b('image_id')),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
@@ -107,7 +111,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
                                            original_image_spatial_shape])
-    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+    self.assertEqual(
+        six.b('image_id'), tensor_dict[fields.InputDataFields.source_id])
  def testDecodeImageKeyAndFilename(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -116,8 +121,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
-                'image/key/sha256': dataset_util.bytes_feature('abc'),
+                'image/key/sha256': dataset_util.bytes_feature(six.b('abc')),
-                'image/filename': dataset_util.bytes_feature('filename')
+                'image/filename': dataset_util.bytes_feature(six.b('filename'))
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
@@ -126,8 +131,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
-    self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
+    self.assertEqual(six.b('abc'), tensor_dict[fields.InputDataFields.key])
-    self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
+    self.assertEqual(
+        six.b('filename'), tensor_dict[fields.InputDataFields.filename])
  def testDecodePngImage(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -137,8 +143,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_png),
-                'image/format': dataset_util.bytes_feature('png'),
+                'image/format': dataset_util.bytes_feature(six.b('png')),
-                'image/source_id': dataset_util.bytes_feature('image_id')
+                'image/source_id': dataset_util.bytes_feature(
+                    six.b('image_id'))
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
@@ -155,7 +162,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
    self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
                                            original_image_spatial_shape])
-    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+    self.assertEqual(
+        six.b('image_id'), tensor_dict[fields.InputDataFields.source_id])
  def testDecodePngInstanceMasks(self):
    image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
@@ -174,7 +182,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks)
            })).SerializeToString()
@@ -200,7 +208,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/mask':
                    dataset_util.bytes_list_feature(encoded_masks),
                'image/height':
@@ -232,7 +240,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
@@ -271,7 +279,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
@@ -321,7 +329,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/bbox/ymin':
                    dataset_util.float_list_feature(bbox_ymins),
                'image/object/bbox/xmin':
@@ -354,7 +362,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes),
            })).SerializeToString()
@@ -385,7 +393,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/multiclass_scores':
                    dataset_util.float_list_feature(flattened_multiclass_scores
                                                   ),
@@ -404,10 +412,41 @@ class TfExampleDecoderTest(tf.test.TestCase):
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
    self.assertAllEqual(flattened_multiclass_scores,
                        tensor_dict[fields.InputDataFields.multiclass_scores])
+  def testDecodeEmptyMultiClassScores(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_ymins = [0.0, 4.0]
+    bbox_xmins = [1.0, 5.0]
+    bbox_ymaxs = [2.0, 6.0]
+    bbox_xmaxs = [3.0, 7.0]
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded':
+                    dataset_util.bytes_feature(encoded_jpeg),
+                'image/format':
+                    dataset_util.bytes_feature(six.b('jpeg')),
+                'image/object/bbox/ymin':
+                    dataset_util.float_list_feature(bbox_ymins),
+                'image/object/bbox/xmin':
+                    dataset_util.float_list_feature(bbox_xmins),
+                'image/object/bbox/ymax':
+                    dataset_util.float_list_feature(bbox_ymaxs),
+                'image/object/bbox/xmax':
+                    dataset_util.float_list_feature(bbox_xmaxs),
+            })).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        load_multiclass_scores=True)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+    self.assertEqual(0,
+                     tensor_dict[fields.InputDataFields.multiclass_scores].size)
  def testDecodeObjectLabelNoText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -418,7 +457,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/label':
                    dataset_util.int64_list_feature(bbox_classes),
            })).SerializeToString()
@@ -454,7 +493,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
  def testDecodeObjectLabelWithText(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    bbox_classes_text = ['cat', 'dog']
+    bbox_classes_text = [six.b('cat'), six.b('dog')]
    # Annotation label gets overridden by labelmap id.
    annotated_bbox_classes = [3, 4]
    expected_bbox_classes = [1, 2]
@@ -464,7 +503,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
@@ -499,14 +538,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
  def testDecodeObjectLabelUnrecognizedName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    bbox_classes_text = ['cat', 'cheetah']
+    bbox_classes_text = [six.b('cat'), six.b('cheetah')]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
            })).SerializeToString()
@@ -541,14 +580,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
  def testDecodeObjectLabelWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    bbox_classes_text = ['cat', 'dog']
+    bbox_classes_text = [six.b('cat'), six.b('dog')]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
            })).SerializeToString()
@@ -583,7 +622,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
  def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    bbox_classes_text = ['cat', 'cheetah']
+    bbox_classes_text = [six.b('cat'), six.b('cheetah')]
    bbox_classes_id = [5, 6]
    example = tf.train.Example(
        features=tf.train.Features(
@@ -591,7 +630,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
                'image/object/class/label':
@@ -627,14 +666,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
  def testDecodeObjectLabelWithMappingWithName(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
-    bbox_classes_text = ['cat', 'dog']
+    bbox_classes_text = [six.b('cat'), six.b('dog')]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/class/text':
                    dataset_util.bytes_list_feature(bbox_classes_text),
            })).SerializeToString()
@@ -676,7 +715,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/area':
                    dataset_util.float_list_feature(object_area),
            })).SerializeToString()
@@ -702,7 +741,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/is_crowd':
                    dataset_util.int64_list_feature(object_is_crowd),
            })).SerializeToString()
@@ -730,7 +769,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/difficult':
                    dataset_util.int64_list_feature(object_difficult),
            })).SerializeToString()
@@ -758,7 +797,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/group_of':
                    dataset_util.int64_list_feature(object_group_of),
            })).SerializeToString()
@@ -786,7 +825,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/object/weight':
                    dataset_util.float_list_feature(object_weights),
            })).SerializeToString()
@@ -828,7 +867,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
@@ -883,7 +922,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/height':
                    dataset_util.int64_feature(image_height),
                'image/width':
@@ -905,7 +944,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
        features=tf.train.Features(
            feature={
                'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
-                'image/format': dataset_util.bytes_feature('jpeg'),
+                'image/format': dataset_util.bytes_feature(six.b('jpeg')),
                'image/class/label': dataset_util.int64_list_feature([1, 2]),
            })).SerializeToString()
    example_decoder = tf_example_decoder.TfExampleDecoder()
@@ -923,9 +962,10 @@ class TfExampleDecoderTest(tf.test.TestCase):
                'image/encoded':
                    dataset_util.bytes_feature(encoded_jpeg),
                'image/format':
-                    dataset_util.bytes_feature('jpeg'),
+                    dataset_util.bytes_feature(six.b('jpeg')),
                'image/class/text':
-                    dataset_util.bytes_list_feature(['dog', 'cat']),
+                    dataset_util.bytes_list_feature(
+                        [six.b('dog'), six.b('cat')]),
            })).SerializeToString()
    label_map_string = """
      item {

--- a/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
+++ b/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
@@ -12,7 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-r"""An executable to expand hierarchically image-level labels and boxes.
+r"""An executable to expand image-level labels, boxes and segments.
+The expansion is performed using class hierarchy, provided in JSON file.
+The expected file formats are the following:
+- for box and segment files: CSV file is expected to have LabelName field
+- for image-level labels: CSV file is expected to have LabelName and Confidence
+fields
+Note, that LabelName is the only field used for expansion.
 Example usage:
 python models/research/object_detection/dataset_tools/\
@@ -20,13 +29,30 @@ oid_hierarchical_labels_expansion.py \
 --json_hierarchy_file=<path to JSON hierarchy> \
 --input_annotations=<input csv file> \
 --output_annotations=<output csv file> \
--annotation_type=<1 (for boxes) or 2 (for image-level labels)>
+--annotation_type=<1 (for boxes and segments) or 2 (for image-level labels)>
 """
 from __future__ import print_function
-import argparse
+import copy
 import json
+from absl import app
+from absl import flags
+flags.DEFINE_string(
+    'json_hierarchy_file', None,
+    'Path to the file containing label hierarchy in JSON format.')
+flags.DEFINE_string(
+    'input_annotations', None, 'Path to Open Images annotations file'
+    '(either bounding boxes, segments or image-level labels).')
+flags.DEFINE_string('output_annotations', None, 'Path to the output file.')
+flags.DEFINE_integer(
+    'annotation_type', None,
+    'Type of the input annotations: 1 - boxes or segments,'
+    '2 - image-level labels.'
+)
+FLAGS = flags.FLAGS
 def _update_dict(initial_dict, update):
@@ -37,11 +63,11 @@ def _update_dict(initial_dict, update):
   update: updated dictionary.
  """
-  for key, value_list in update.iteritems():
+  for key, value_list in update.items():
    if key in initial_dict:
-      initial_dict[key].extend(value_list)
+      initial_dict[key].update(value_list)
    else:
-      initial_dict[key] = value_list
+      initial_dict[key] = set(value_list)
 def _build_plain_hierarchy(hierarchy, skip_root=False):
@@ -57,7 +83,7 @@ def _build_plain_hierarchy(hierarchy, skip_root=False):
    keyed_child  - dictionary of children - all its parent nodes
    children - all children of the current node.
  """
-  all_children = []
+  all_children = set([])
  all_keyed_parent = {}
  all_keyed_child = {}
  if 'Subcategory' in hierarchy:
@@ -67,14 +93,14 @@ def _build_plain_hierarchy(hierarchy, skip_root=False):
      # ple parents in the hiearchy.
      _update_dict(all_keyed_parent, keyed_parent)
      _update_dict(all_keyed_child, keyed_child)
-      all_children.extend(children)
+      all_children.update(children)
  if not skip_root:
-    all_keyed_parent[hierarchy['LabelName']] = all_children
+    all_keyed_parent[hierarchy['LabelName']] = copy.deepcopy(all_children)
-    all_children = [hierarchy['LabelName']] + all_children
+    all_children.add(hierarchy['LabelName'])
-    for child, _ in all_keyed_child.iteritems():
+    for child, _ in all_keyed_child.items():
-      all_keyed_child[child].append(hierarchy['LabelName'])
+      all_keyed_child[child].add(hierarchy['LabelName'])
-    all_keyed_child[hierarchy['LabelName']] = []
+    all_keyed_child[hierarchy['LabelName']] = set([])
  return all_keyed_parent, all_keyed_child, all_children
@@ -92,110 +118,112 @@ class OIDHierarchicalLabelsExpansion(object):
    self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
        _build_plain_hierarchy(hierarchy, skip_root=True))
-  def expand_boxes_from_csv(self, csv_row):
+  def expand_boxes_or_segments_from_csv(self, csv_row,
-    """Expands a row containing bounding boxes from CSV file.
+                                        labelname_column_index=1):
+    """Expands a row containing bounding boxes/segments from CSV file.
    Args:
      csv_row: a single row of Open Images released groundtruth file.
+      labelname_column_index: 0-based index of LabelName column in CSV file.
    Returns:
      a list of strings (including the initial row) corresponding to the ground
      truth expanded to multiple annotation for evaluation with Open Images
-      Challenge 2018 metric.
+      Challenge 2018/2019 metrics.
    """
-    # Row header is expected to be exactly:
+    # Row header is expected to be the following for boxes:
-    # ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
+    # ImageID,LabelName,Confidence,XMin,XMax,YMin,YMax,IsGroupOf
-    # IsTruncated,IsGroupOf,IsDepiction,IsInside
+    # Row header is expected to be the following for segments:
-    cvs_row_splitted = csv_row.split(',')
+    # ImageID,LabelName,ImageWidth,ImageHeight,XMin,XMax,YMin,YMax,
-    assert len(cvs_row_splitted) == 13
+    # IsGroupOf,Mask
+    split_csv_row = csv_row.split(',')
    result = [csv_row]
-    assert cvs_row_splitted[2] in self._hierarchy_keyed_child
+    assert split_csv_row[
-    parent_nodes = self._hierarchy_keyed_child[cvs_row_splitted[2]]
+        labelname_column_index] in self._hierarchy_keyed_child
+    parent_nodes = self._hierarchy_keyed_child[
+        split_csv_row[labelname_column_index]]
    for parent_node in parent_nodes:
-      cvs_row_splitted[2] = parent_node
+      split_csv_row[labelname_column_index] = parent_node
-      result.append(','.join(cvs_row_splitted))
+      result.append(','.join(split_csv_row))
    return result
-  def expand_labels_from_csv(self, csv_row):
+  def expand_labels_from_csv(self,
-    """Expands a row containing bounding boxes from CSV file.
+                             csv_row,
+                             labelname_column_index=1,
+                             confidence_column_index=2):
+    """Expands a row containing labels from CSV file.
    Args:
      csv_row: a single row of Open Images released groundtruth file.
+      labelname_column_index: 0-based index of LabelName column in CSV file.
+      confidence_column_index: 0-based index of Confidence column in CSV file.
    Returns:
      a list of strings (including the initial row) corresponding to the ground
      truth expanded to multiple annotation for evaluation with Open Images
-      Challenge 2018 metric.
+      Challenge 2018/2019 metrics.
    """
    # Row header is expected to be exactly:
    # ImageID,Source,LabelName,Confidence
-    cvs_row_splited = csv_row.split(',')
+    split_csv_row = csv_row.split(',')
-    assert len(cvs_row_splited) == 4
    result = [csv_row]
-    if int(cvs_row_splited[3]) == 1:
+    if int(split_csv_row[confidence_column_index]) == 1:
-      assert cvs_row_splited[2] in self._hierarchy_keyed_child
+      assert split_csv_row[
-      parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
+          labelname_column_index] in self._hierarchy_keyed_child
+      parent_nodes = self._hierarchy_keyed_child[
+          split_csv_row[labelname_column_index]]
      for parent_node in parent_nodes:
-        cvs_row_splited[2] = parent_node
+        split_csv_row[labelname_column_index] = parent_node
-        result.append(','.join(cvs_row_splited))
+        result.append(','.join(split_csv_row))
    else:
-      assert cvs_row_splited[2] in self._hierarchy_keyed_parent
+      assert split_csv_row[
-      child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
+          labelname_column_index] in self._hierarchy_keyed_parent
+      child_nodes = self._hierarchy_keyed_parent[
+          split_csv_row[labelname_column_index]]
      for child_node in child_nodes:
-        cvs_row_splited[2] = child_node
+        split_csv_row[labelname_column_index] = child_node
-        result.append(','.join(cvs_row_splited))
+        result.append(','.join(split_csv_row))
    return result
-def main(parsed_args):
+def main(unused_args):
+  del unused_args
-  with open(parsed_args.json_hierarchy_file) as f:
+  with open(FLAGS.json_hierarchy_file) as f:
    hierarchy = json.load(f)
  expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
  labels_file = False
-  if parsed_args.annotation_type == 2:
+  if FLAGS.annotation_type == 2:
    labels_file = True
-  elif parsed_args.annotation_type != 1:
+  elif FLAGS.annotation_type != 1:
    print('--annotation_type expected value is 1 or 2.')
    return -1
-  with open(parsed_args.input_annotations, 'r') as source:
+  confidence_column_index = -1
-    with open(parsed_args.output_annotations, 'w') as target:
+  labelname_column_index = -1
-      header = None
+  with open(FLAGS.input_annotations, 'r') as source:
+    with open(FLAGS.output_annotations, 'w') as target:
+      header = source.readline()
+      target.writelines([header])
+      column_names = header.strip().split(',')
+      labelname_column_index = column_names.index('LabelName')
+      if labels_file:
+        confidence_column_index = column_names.index('Confidence')
      for line in source:
-        if not header:
-          header = line
-          target.writelines(header)
-          continue
        if labels_file:
-          expanded_lines = expansion_generator.expand_labels_from_csv(line)
+          expanded_lines = expansion_generator.expand_labels_from_csv(
+              line, labelname_column_index, confidence_column_index)
        else:
-          expanded_lines = expansion_generator.expand_boxes_from_csv(line)
+          expanded_lines = (
+              expansion_generator.expand_boxes_or_segments_from_csv(
+                  line, labelname_column_index))
        target.writelines(expanded_lines)
 if __name__ == '__main__':
+  flags.mark_flag_as_required('json_hierarchy_file')
+  flags.mark_flag_as_required('input_annotations')
+  flags.mark_flag_as_required('output_annotations')
+  flags.mark_flag_as_required('annotation_type')
-  parser = argparse.ArgumentParser(
+  app.run(main)
-      description='Hierarchically expand annotations (excluding root node).')
-  parser.add_argument(
-      '--json_hierarchy_file',
-      required=True,
-      help='Path to the file containing label hierarchy in JSON format.')
-  parser.add_argument(
-      '--input_annotations',
-      required=True,
-      help="""Path to Open Images annotations file (either bounding boxes or
-      image-level labels).""")
-  parser.add_argument(
-      '--output_annotations',
-      required=True,
-      help="""Path to the output file.""")
-  parser.add_argument(
-      '--annotation_type',
-      type=int,
-      required=True,
-      help="""Type of the input annotations: 1 - boxes, 2 - image-level
-      labels"""
-  )
-  args = parser.parse_args()
-  main(args)
--- a/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py
+++ b/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py
@@ -30,11 +30,17 @@ def create_test_data():
      'Subcategory': [{
          'LabelName': 'b'
      }, {
-          'LabelName': 'c',
+          'LabelName':
+              'c',
          'Subcategory': [{
              'LabelName': 'd'
          }, {
              'LabelName': 'e'
+          }, {
+              'LabelName': 'f',
+              'Subcategory': [{
+                  'LabelName': 'd'
+              },]
          }]
      }, {
          'LabelName': 'f',
@@ -50,19 +56,24 @@ def create_test_data():
  label_rows = [
      '123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
  ]
-  return hierarchy, bbox_rows, label_rows
+  segm_rows = [
+      '123,cc,b,100,100,0.1,0.2,0.1,0.2,0,MASK',
+      '123,cc,d,100,100,0.2,0.3,0.1,0.2,0,MASK',
+  ]
+  return hierarchy, bbox_rows, segm_rows, label_rows
 class HierarchicalLabelsExpansionTest(tf.test.TestCase):
  def test_bbox_expansion(self):
-    hierarchy, bbox_rows, _ = create_test_data()
+    hierarchy, bbox_rows, _, _ = create_test_data()
    expansion_generator = (
        oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
            hierarchy))
    all_result_rows = []
    for row in bbox_rows:
-      all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
+      all_result_rows.extend(
+          expansion_generator.expand_boxes_or_segments_from_csv(row, 2))
    self.assertItemsEqual([
        '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
        '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
@@ -70,18 +81,35 @@ class HierarchicalLabelsExpansionTest(tf.test.TestCase):
        '123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
    ], all_result_rows)
+  def test_segm_expansion(self):
+    hierarchy, _, segm_rows, _ = create_test_data()
+    expansion_generator = (
+        oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
+            hierarchy))
+    all_result_rows = []
+    for row in segm_rows:
+      all_result_rows.extend(
+          expansion_generator.expand_boxes_or_segments_from_csv(row, 2))
+    self.assertItemsEqual([
+        '123,cc,b,100,100,0.1,0.2,0.1,0.2,0,MASK',
+        '123,cc,d,100,100,0.2,0.3,0.1,0.2,0,MASK',
+        '123,cc,f,100,100,0.2,0.3,0.1,0.2,0,MASK',
+        '123,cc,c,100,100,0.2,0.3,0.1,0.2,0,MASK'
+    ], all_result_rows)
  def test_labels_expansion(self):
-    hierarchy, _, label_rows = create_test_data()
+    hierarchy, _, _, label_rows = create_test_data()
    expansion_generator = (
        oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
            hierarchy))
    all_result_rows = []
    for row in label_rows:
-      all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
+      all_result_rows.extend(
+          expansion_generator.expand_labels_from_csv(row, 2, 3))
    self.assertItemsEqual([
        '123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
-        '123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
+        '123,verification,f,0', '123,verification,e,0', '124,verification,d,1',
-        '124,verification,c,1'
+        '124,verification,f,1', '124,verification,c,1'
    ], all_result_rows)
 if __name__ == '__main__':

--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -13,12 +13,17 @@
 # limitations under the License.
 # ==============================================================================
 """Common utility functions for evaluation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
 import collections
 import os
 import re
 import time
 import numpy as np
+from six.moves import range
 import tensorflow as tf
 from object_detection.core import box_list

--- a/research/object_detection/eval_util_test.py
+++ b/research/object_detection/eval_util_test.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 from absl.testing import parameterized
+import six
+from six.moves import range
 import tensorflow as tf
 from object_detection import eval_util
@@ -113,7 +115,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    with self.test_session() as sess:
      metrics = {}
-      for key, (value_op, _) in metric_ops.iteritems():
+      for key, (value_op, _) in six.iteritems(metric_ops):
        metrics[key] = value_op
      sess.run(update_op)
      metrics = sess.run(metrics)
@@ -142,7 +144,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    with self.test_session() as sess:
      metrics = {}
-      for key, (value_op, _) in metric_ops.iteritems():
+      for key, (value_op, _) in six.iteritems(metric_ops):
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)
@@ -173,7 +175,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
    with self.test_session() as sess:
      metrics = {}
-      for key, (value_op, _) in metric_ops.iteritems():
+      for key, (value_op, _) in six.iteritems(metric_ops):
        metrics[key] = value_op
      sess.run(update_op_boxes)
      sess.run(update_op_masks)

--- a/research/object_detection/export_inference_graph.py
+++ b/research/object_detection/export_inference_graph.py
@@ -45,10 +45,16 @@ and the following output nodes returned by the model.postprocess(..):
  * `raw_detection_scores`: Outputs float32 tensors of the form
      [batch, raw_num_boxes, num_classes_with_background] containing class score
      logits for raw detection boxes.
-  * `detection_masks`: Outputs float32 tensors of the form
+  * `detection_masks`: (Optional) Outputs float32 tensors of the form
      [batch, num_boxes, mask_height, mask_width] containing predicted instance
      masks for each box if its present in the dictionary of postprocessed
      tensors returned by the model.
+  * detection_multiclass_scores: (Optional) Outputs float32 tensor of shape
+      [batch, num_boxes, num_classes_with_background] for containing class
+      score distribution for detected boxes including background if any.
+  * detection_features: (Optional) float32 tensor of shape
+      [batch, num_boxes, roi_height, roi_width, depth]
+  containing classifier features
 Notes:
 * This tool uses `use_moving_averages` from eval_config to decide which

--- a/research/object_detection/exporter.py
+++ b/research/object_detection/exporter.py
@@ -58,8 +58,11 @@ def rewrite_nn_resize_op(is_quantized=False):
      inputs=[graph_matcher.OneofPattern([fake_quant_pattern, mul_pattern]),
              'Const'],
      ordered_inputs=False)
+  add_type_name = 'Add'
+  if tf.compat.forward_compatible(2019, 6, 26):
+    add_type_name = 'AddV2'
  add_pattern = graph_matcher.OpTypePattern(
-      'Add', inputs=[reshape_2_pattern, '*'], ordered_inputs=False)
+      add_type_name, inputs=[reshape_2_pattern, '*'], ordered_inputs=False)
  matcher = graph_matcher.GraphMatcher(add_pattern)
  for match in matcher.match_graph(tf.get_default_graph()):
@@ -179,6 +182,10 @@ def add_output_tensor_nodes(postprocessed_tensors,
    * detection_multiclass_scores: (Optional) float32 tensor of shape
      [batch_size, num_boxes, num_classes_with_background] for containing class
      score distribution for detected boxes including background if any.
+    * detection_features: (Optional) float32 tensor of shape
+      [batch, num_boxes, roi_height, roi_width, depth]
+      containing classifier features
+      for each detected box
    * detection_classes: float32 tensor of shape [batch_size, num_boxes]
      containing class predictions for the detected boxes.
    * detection_keypoints: (Optional) float32 tensor of shape
@@ -194,6 +201,7 @@ def add_output_tensor_nodes(postprocessed_tensors,
      'detection_scores': [batch, max_detections]
      'detection_multiclass_scores': [batch, max_detections,
        num_classes_with_background]
+      'detection_features': [batch, num_boxes, roi_height, roi_width, depth]
      'detection_classes': [batch, max_detections]
      'detection_masks': [batch, max_detections, mask_height, mask_width]
        (optional).
@@ -211,6 +219,8 @@ def add_output_tensor_nodes(postprocessed_tensors,
  scores = postprocessed_tensors.get(detection_fields.detection_scores)
  multiclass_scores = postprocessed_tensors.get(
      detection_fields.detection_multiclass_scores)
+  box_classifier_features = postprocessed_tensors.get(
+      detection_fields.detection_features)
  raw_boxes = postprocessed_tensors.get(detection_fields.raw_detection_boxes)
  raw_scores = postprocessed_tensors.get(detection_fields.raw_detection_scores)
  classes = postprocessed_tensors.get(
@@ -226,6 +236,10 @@ def add_output_tensor_nodes(postprocessed_tensors,
  if multiclass_scores is not None:
    outputs[detection_fields.detection_multiclass_scores] = tf.identity(
        multiclass_scores, name=detection_fields.detection_multiclass_scores)
+  if box_classifier_features is not None:
+    outputs[detection_fields.detection_features] = tf.identity(
+        box_classifier_features,
+        name=detection_fields.detection_features)
  outputs[detection_fields.detection_classes] = tf.identity(
      classes, name=detection_fields.detection_classes)
  outputs[detection_fields.num_detections] = tf.identity(

--- a/research/object_detection/exporter_test.py
+++ b/research/object_detection/exporter_test.py
@@ -28,6 +28,7 @@ from object_detection.core import model
 from object_detection.protos import graph_rewriter_pb2
 from object_detection.protos import pipeline_pb2
 from object_detection.utils import ops
+from object_detection.utils import variables_helper
 if six.PY2:
  import mock  # pylint: disable=g-import-not-at-top
@@ -39,9 +40,11 @@ slim = tf.contrib.slim
 class FakeModel(model.DetectionModel):
-  def __init__(self, add_detection_keypoints=False, add_detection_masks=False):
+  def __init__(self, add_detection_keypoints=False, add_detection_masks=False,
+               add_detection_features=False):
    self._add_detection_keypoints = add_detection_keypoints
    self._add_detection_masks = add_detection_masks
+    self._add_detection_features = add_detection_features
  def preprocess(self, inputs):
    true_image_shapes = []  # Doesn't matter for the fake model.
@@ -79,6 +82,11 @@ class FakeModel(model.DetectionModel):
      if self._add_detection_masks:
        postprocessed_tensors['detection_masks'] = tf.constant(
            np.arange(64).reshape([2, 2, 4, 4]), tf.float32)
+      if self._add_detection_features:
+        # let fake detection features have shape [4, 4, 10]
+        postprocessed_tensors['detection_features'] = tf.constant(
+            np.ones((2, 2, 4, 4, 10)), tf.float32)
    return postprocessed_tensors
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
@@ -345,7 +353,7 @@ class ExportInferenceGraphTest(tf.test.TestCase):
          write_inference_graph=True)
    self._load_inference_graph(inference_graph_path, is_binary=False)
    has_quant_nodes = False
-    for v in tf.global_variables():
+    for v in variables_helper.get_global_variables_safely():
      if v.op.name.endswith('act_quant/min'):
        has_quant_nodes = True
        break
@@ -362,7 +370,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
    with mock.patch.object(
        model_builder, 'build', autospec=True) as mock_builder:
      mock_builder.return_value = FakeModel(
-          add_detection_keypoints=True, add_detection_masks=True)
+          add_detection_keypoints=True, add_detection_masks=True,
+          add_detection_features=True)
      pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
      exporter.export_inference_graph(
          input_type='image_tensor',
@@ -379,6 +388,7 @@ class ExportInferenceGraphTest(tf.test.TestCase):
      inference_graph.get_tensor_by_name('detection_keypoints:0')
      inference_graph.get_tensor_by_name('detection_masks:0')
      inference_graph.get_tensor_by_name('num_detections:0')
+      inference_graph.get_tensor_by_name('detection_features:0')
  def test_export_model_with_detection_only_nodes(self):
    tmp_dir = self.get_temp_dir()
@@ -409,6 +419,36 @@ class ExportInferenceGraphTest(tf.test.TestCase):
        inference_graph.get_tensor_by_name('detection_keypoints:0')
        inference_graph.get_tensor_by_name('detection_masks:0')
+  def test_export_model_with_detection_only_nodes_and_detection_features(self):
+    tmp_dir = self.get_temp_dir()
+    trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
+    self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
+                                          use_moving_averages=True)
+    output_directory = os.path.join(tmp_dir, 'output')
+    inference_graph_path = os.path.join(output_directory,
+                                        'frozen_inference_graph.pb')
+    with mock.patch.object(
+        model_builder, 'build', autospec=True) as mock_builder:
+      mock_builder.return_value = FakeModel(add_detection_features=True)
+      pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+      exporter.export_inference_graph(
+          input_type='image_tensor',
+          pipeline_config=pipeline_config,
+          trained_checkpoint_prefix=trained_checkpoint_prefix,
+          output_directory=output_directory)
+    inference_graph = self._load_inference_graph(inference_graph_path)
+    with self.test_session(graph=inference_graph):
+      inference_graph.get_tensor_by_name('image_tensor:0')
+      inference_graph.get_tensor_by_name('detection_boxes:0')
+      inference_graph.get_tensor_by_name('detection_scores:0')
+      inference_graph.get_tensor_by_name('detection_multiclass_scores:0')
+      inference_graph.get_tensor_by_name('detection_classes:0')
+      inference_graph.get_tensor_by_name('num_detections:0')
+      inference_graph.get_tensor_by_name('detection_features:0')
+      with self.assertRaises(KeyError):
+        inference_graph.get_tensor_by_name('detection_keypoints:0')
+        inference_graph.get_tensor_by_name('detection_masks:0')
  def test_export_and_run_inference_with_image_tensor(self):
    tmp_dir = self.get_temp_dir()
    trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
@@ -738,6 +778,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
            signature.outputs['detection_boxes'].name)
        scores = od_graph.get_tensor_by_name(
            signature.outputs['detection_scores'].name)
+        multiclass_scores = od_graph.get_tensor_by_name(
+            signature.outputs['detection_multiclass_scores'].name)
        classes = od_graph.get_tensor_by_name(
            signature.outputs['detection_classes'].name)
        keypoints = od_graph.get_tensor_by_name(
@@ -747,9 +789,10 @@ class ExportInferenceGraphTest(tf.test.TestCase):
        num_detections = od_graph.get_tensor_by_name(
            signature.outputs['num_detections'].name)
-        (boxes_np, scores_np, classes_np, keypoints_np, masks_np,
+        (boxes_np, scores_np, multiclass_scores_np, classes_np, keypoints_np,
-         num_detections_np) = sess.run(
+         masks_np, num_detections_np) = sess.run(
-             [boxes, scores, classes, keypoints, masks, num_detections],
+             [boxes, scores, multiclass_scores, classes, keypoints, masks,
+              num_detections],
             feed_dict={tf_example: tf_example_np})
        self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
                                        [0.5, 0.5, 0.8, 0.8]],
@@ -757,6 +800,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
                                        [0.0, 0.0, 0.0, 0.0]]])
        self.assertAllClose(scores_np, [[0.7, 0.6],
                                        [0.9, 0.0]])
+        self.assertAllClose(multiclass_scores_np, [[[0.3, 0.7], [0.4, 0.6]],
+                                                   [[0.1, 0.9], [0.0, 0.0]]])
        self.assertAllClose(classes_np, [[1, 2],
                                         [2, 1]])
        self.assertAllClose(keypoints_np, np.arange(48).reshape([2, 2, 6, 2]))