Merge pull request #4460 from pkulzc/master

Release evaluation code for OI Challenge 2018 and minor fixes.

Merge pull request #4460 from pkulzc/master
Release evaluation code for OI Challenge 2018 and minor fixes.
97760186 · Jonathan Huang · GitHub · ed901b73 · a703fc0c · 97760186
Unverified Commit 97760186 authored Jun 05, 2018 by Jonathan Huang Committed by GitHub Jun 05, 2018
20 changed files
--- a/research/object_detection/builders/dataset_builder.py
+++ b/research/object_detection/builders/dataset_builder.py
@@ -56,15 +56,26 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
  else:
    height, width = spatial_image_shape  # pylint: disable=unpacking-non-sequence

+  num_additional_channels = 0
+  if fields.InputDataFields.image_additional_channels in dataset.output_shapes:
+    num_additional_channels = dataset.output_shapes[
+        fields.InputDataFields.image_additional_channels].dims[2].value
  padding_shapes = {
-      fields.InputDataFields.image: [height, width, 3],
+      # Additional channels are merged before batching.
+      fields.InputDataFields.image: [
+          height, width, 3 + num_additional_channels
+      ],
+      fields.InputDataFields.image_additional_channels: [
+          height, width, num_additional_channels
+      ],
      fields.InputDataFields.source_id: [],
      fields.InputDataFields.filename: [],
      fields.InputDataFields.key: [],
      fields.InputDataFields.groundtruth_difficult: [max_num_boxes],
      fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4],
-      fields.InputDataFields.groundtruth_instance_masks: [max_num_boxes, height,
-                                                          width],
+      fields.InputDataFields.groundtruth_instance_masks: [
+          max_num_boxes, height, width
+      ],
      fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes],
      fields.InputDataFields.groundtruth_group_of: [max_num_boxes],
      fields.InputDataFields.groundtruth_area: [max_num_boxes],
@@ -74,7 +85,8 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
      fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
      fields.InputDataFields.true_image_shape: [3],
      fields.InputDataFields.multiclass_scores: [
-          max_num_boxes, num_classes + 1 if num_classes is not None else None],
+          max_num_boxes, num_classes + 1 if num_classes is not None else None
+      ],
  }
  # Determine whether groundtruth_classes are integers or one-hot encodings, and
  # apply batching appropriately.
@@ -90,7 +102,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
                     'rank 2 tensor (one-hot encodings)')

  if fields.InputDataFields.original_image in dataset.output_shapes:
-    padding_shapes[fields.InputDataFields.original_image] = [None, None, 3]
+    padding_shapes[fields.InputDataFields.original_image] = [
+        None, None, 3 + num_additional_channels
+    ]
  if fields.InputDataFields.groundtruth_keypoints in dataset.output_shapes:
    tensor_shape = dataset.output_shapes[fields.InputDataFields.
                                         groundtruth_keypoints]
@@ -108,9 +122,13 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
          for tensor_key, _ in dataset.output_shapes.items()}


-def build(input_reader_config, transform_input_data_fn=None,
-          batch_size=None, max_num_boxes=None, num_classes=None,
-          spatial_image_shape=None):
+def build(input_reader_config,
+          transform_input_data_fn=None,
+          batch_size=None,
+          max_num_boxes=None,
+          num_classes=None,
+          spatial_image_shape=None,
+          num_additional_channels=0):
  """Builds a tf.data.Dataset.

  Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
@@ -128,6 +146,7 @@ def build(input_reader_config, transform_input_data_fn=None,
    spatial_image_shape: A list of two integers of the form [height, width]
      containing expected spatial shape of the image after applying
      transform_input_data_fn. If None, will use dynamic shapes.
+    num_additional_channels: Number of additional channels to use in the input.

  Returns:
    A tf.data.Dataset based on the input_reader_config.
@@ -152,7 +171,9 @@ def build(input_reader_config, transform_input_data_fn=None,
    decoder = tf_example_decoder.TfExampleDecoder(
        load_instance_masks=input_reader_config.load_instance_masks,
        instance_mask_type=input_reader_config.mask_type,
-        label_map_proto_file=label_map_proto_file)
+        label_map_proto_file=label_map_proto_file,
+        use_display_name=input_reader_config.use_display_name,
+        num_additional_channels=num_additional_channels)

    def process_fn(value):
      processed = decoder.decode(value)

--- a/research/object_detection/builders/dataset_builder_test.py
+++ b/research/object_detection/builders/dataset_builder_test.py
@@ -30,49 +30,50 @@ from object_detection.utils import dataset_util

 class DatasetBuilderTest(tf.test.TestCase):

-  def create_tf_record(self):
+  def create_tf_record(self, has_additional_channels=False):
    path = os.path.join(self.get_temp_dir(), 'tfrecord')
    writer = tf.python_io.TFRecordWriter(path)

    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    additional_channels_tensor = np.random.randint(
+        255, size=(4, 5, 1)).astype(np.uint8)
    flat_mask = (4 * 5) * [1.0]
    with self.test_session():
      encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
-    example = example_pb2.Example(
-        features=feature_pb2.Features(
-            feature={
+      encoded_additional_channels_jpeg = tf.image.encode_jpeg(
+          tf.constant(additional_channels_tensor)).eval()
+    features = {
        'image/encoded':
            feature_pb2.Feature(
                bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])),
        'image/format':
            feature_pb2.Feature(
-                        bytes_list=feature_pb2.BytesList(
-                            value=['jpeg'.encode('utf-8')])),
+                bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])
+            ),
        'image/height':
-                    feature_pb2.Feature(
-                        int64_list=feature_pb2.Int64List(value=[4])),
+            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])),
        'image/width':
-                    feature_pb2.Feature(
-                        int64_list=feature_pb2.Int64List(value=[5])),
+            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])),
        'image/object/bbox/xmin':
-                    feature_pb2.Feature(
-                        float_list=feature_pb2.FloatList(value=[0.0])),
+            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
        'image/object/bbox/xmax':
-                    feature_pb2.Feature(
-                        float_list=feature_pb2.FloatList(value=[1.0])),
+            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
        'image/object/bbox/ymin':
-                    feature_pb2.Feature(
-                        float_list=feature_pb2.FloatList(value=[0.0])),
+            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])),
        'image/object/bbox/ymax':
-                    feature_pb2.Feature(
-                        float_list=feature_pb2.FloatList(value=[1.0])),
+            feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])),
        'image/object/class/label':
-                    feature_pb2.Feature(
-                        int64_list=feature_pb2.Int64List(value=[2])),
+            feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])),
        'image/object/mask':
            feature_pb2.Feature(
                float_list=feature_pb2.FloatList(value=flat_mask)),
-            }))
+    }
+    if has_additional_channels:
+      features['image/additional_channels/encoded'] = feature_pb2.Feature(
+          bytes_list=feature_pb2.BytesList(
+              value=[encoded_additional_channels_jpeg] * 2))
+    example = example_pb2.Example(
+        features=feature_pb2.Features(feature=features))
    writer.write(example.SerializeToString())
    writer.close()

@@ -218,6 +219,31 @@ class DatasetBuilderTest(tf.test.TestCase):
        [2, 2, 4, 5],
        output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)

+  def test_build_tf_record_input_reader_with_additional_channels(self):
+    tf_record_path = self.create_tf_record(has_additional_channels=True)
+
+    input_reader_text_proto = """
+      shuffle: false
+      num_readers: 1
+      tf_record_input_reader {{
+        input_path: '{0}'
+      }}
+    """.format(tf_record_path)
+    input_reader_proto = input_reader_pb2.InputReader()
+    text_format.Merge(input_reader_text_proto, input_reader_proto)
+    tensor_dict = dataset_util.make_initializable_iterator(
+        dataset_builder.build(
+            input_reader_proto, batch_size=2,
+            num_additional_channels=2)).get_next()
+
+    sv = tf.train.Supervisor(logdir=self.get_temp_dir())
+    with sv.prepare_or_wait_for_session() as sess:
+      sv.start_queue_runners(sess)
+      output_dict = sess.run(tensor_dict)
+
+    self.assertEquals((2, 4, 5, 5),
+                      output_dict[fields.InputDataFields.image].shape)
+
  def test_raises_error_with_no_input_paths(self):
    input_reader_text_proto = """
      shuffle: false

--- a/research/object_detection/builders/image_resizer_builder.py
+++ b/research/object_detection/builders/image_resizer_builder.py
@@ -79,12 +79,17 @@ def build(image_resizer_config):
            keep_aspect_ratio_config.max_dimension):
      raise ValueError('min_dimension > max_dimension')
    method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
+    per_channel_pad_value = (0, 0, 0)
+    if keep_aspect_ratio_config.per_channel_pad_value:
+      per_channel_pad_value = tuple(keep_aspect_ratio_config.
+                                    per_channel_pad_value)
    image_resizer_fn = functools.partial(
        preprocessor.resize_to_range,
        min_dimension=keep_aspect_ratio_config.min_dimension,
        max_dimension=keep_aspect_ratio_config.max_dimension,
        method=method,
-        pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension)
+        pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension,
+        per_channel_pad_value=per_channel_pad_value)
    if not keep_aspect_ratio_config.convert_to_grayscale:
      return image_resizer_fn
  elif image_resizer_oneof == 'fixed_shape_resizer':

--- a/research/object_detection/builders/image_resizer_builder_test.py
+++ b/research/object_detection/builders/image_resizer_builder_test.py
@@ -52,6 +52,9 @@ class ImageResizerBuilderTest(tf.test.TestCase):
        min_dimension: 10
        max_dimension: 20
        pad_to_max_dimension: true
+        per_channel_pad_value: 3
+        per_channel_pad_value: 4
+        per_channel_pad_value: 5
      }
    """
    input_shape = (50, 25, 3)

--- a/research/object_detection/core/box_list_ops.py
+++ b/research/object_detection/core/box_list_ops.py
@@ -778,7 +778,7 @@ def to_absolute_coordinates(boxlist,
                            height,
                            width,
                            check_range=True,
-                            maximum_normalized_coordinate=1.01,
+                            maximum_normalized_coordinate=1.1,
                            scope=None):
  """Converts normalized box coordinates to absolute pixel coordinates.

@@ -792,7 +792,7 @@ def to_absolute_coordinates(boxlist,
    width: Maximum value for width of absolute box coordinates.
    check_range: If True, checks if the coordinates are normalized or not.
    maximum_normalized_coordinate: Maximum coordinate value to be considered
-      as normalized, default to 1.01.
+      as normalized, default to 1.1.
    scope: name scope.

  Returns:

--- a/research/object_detection/core/box_list_ops_test.py
+++ b/research/object_detection/core/box_list_ops_test.py
@@ -931,6 +931,21 @@ class CoordinatesConversionTest(tf.test.TestCase):
      out = sess.run(boxlist.get())
      self.assertAllClose(out, coordinates)

+  def test_to_absolute_coordinates_maximum_coordinate_check(self):
+    coordinates = tf.constant([[0, 0, 1.2, 1.2],
+                               [0.25, 0.25, 0.75, 0.75]], tf.float32)
+    img = tf.ones((128, 100, 100, 3))
+    boxlist = box_list.BoxList(coordinates)
+    absolute_boxlist = box_list_ops.to_absolute_coordinates(
+        boxlist,
+        tf.shape(img)[1],
+        tf.shape(img)[2],
+        maximum_normalized_coordinate=1.1)
+
+    with self.test_session() as sess:
+      with self.assertRaisesOpError('assertion failed'):
+        sess.run(absolute_boxlist.get())
+

 class BoxRefinementTest(tf.test.TestCase):


--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -79,10 +79,12 @@ class BoxPredictor(object):

    Returns:
      A dictionary containing at least the following tensors.
-        box_encodings: A list of float tensors of shape
-          [batch_size, num_anchors_i, q, code_size] representing the location of
-          the objects, where q is 1 or the number of classes. Each entry in the
-          list corresponds to a feature map in the input `image_features` list.
+        box_encodings: A list of float tensors. Each entry in the list
+          corresponds to a feature map in the input `image_features` list. All
+          tensors in the list have one of the two following shapes:
+          a. [batch_size, num_anchors_i, q, code_size] representing the location
+            of the objects, where q is 1 or the number of classes.
+          b. [batch_size, num_anchors_i, code_size].
        class_predictions_with_background: A list of float tensors of shape
          [batch_size, num_anchors_i, num_classes + 1] representing the class
          predictions for the proposals. Each entry in the list corresponds to a
@@ -120,10 +122,12 @@ class BoxPredictor(object):

    Returns:
      A dictionary containing at least the following tensors.
-        box_encodings: A list of float tensors of shape
-          [batch_size, num_anchors_i, q, code_size] representing the location of
-          the objects, where q is 1 or the number of classes. Each entry in the
-          list corresponds to a feature map in the input `image_features` list.
+        box_encodings: A list of float tensors. Each entry in the list
+          corresponds to a feature map in the input `image_features` list. All
+          tensors in the list have one of the two following shapes:
+          a. [batch_size, num_anchors_i, q, code_size] representing the location
+            of the objects, where q is 1 or the number of classes.
+          b. [batch_size, num_anchors_i, code_size].
        class_predictions_with_background: A list of float tensors of shape
          [batch_size, num_anchors_i, num_classes + 1] representing the class
          predictions for the proposals. Each entry in the list corresponds to a
@@ -765,6 +769,13 @@ class ConvolutionalBoxPredictor(BoxPredictor):
    }


+# TODO(rathodv): Replace with slim.arg_scope_func_key once its available
+# externally.
+def _arg_scope_func_key(op):
+  """Returns a key that can be used to index arg_scope dictionary."""
+  return getattr(op, '_key_op', str(op))
+
+
 # TODO(rathodv): Merge the implementation with ConvolutionalBoxPredictor above
 # since they are very similar.
 class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
@@ -773,8 +784,12 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
  Defines the box predictor as defined in
  https://arxiv.org/abs/1708.02002. This class differs from
  ConvolutionalBoxPredictor in that it shares weights and biases while
-  predicting from different feature maps.  Separate multi-layer towers are
-  constructed for the box encoding and class predictors respectively.
+  predicting from different feature maps. However, batch_norm parameters are not
+  shared because the statistics of the activations vary among the different
+  feature maps.
+
+  Also note that separate multi-layer towers are constructed for the box
+  encoding and class predictors respectively.
  """

  def __init__(self,
@@ -833,14 +848,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):

    Returns:
      box_encodings: A list of float tensors of shape
-        [batch_size, num_anchors_i, q, code_size] representing the location of
-        the objects, where q is 1 or the number of classes. Each entry in the
-        list corresponds to a feature map in the input `image_features` list.
+        [batch_size, num_anchors_i, code_size] representing the location of
+        the objects. Each entry in the list corresponds to a feature map in the
+        input `image_features` list.
      class_predictions_with_background: A list of float tensors of shape
        [batch_size, num_anchors_i, num_classes + 1] representing the class
        predictions for the proposals. Each entry in the list corresponds to a
        feature map in the input `image_features` list.

+
    Raises:
      ValueError: If the image feature maps do not have the same number of
        channels or if the num predictions per locations is differs between the
@@ -858,15 +874,18 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
                       'channels, found: {}'.format(feature_channels))
    box_encodings_list = []
    class_predictions_list = []
-    for (image_feature, num_predictions_per_location) in zip(
-        image_features, num_predictions_per_location_list):
+    for feature_index, (image_feature,
+                        num_predictions_per_location) in enumerate(
+                            zip(image_features,
+                                num_predictions_per_location_list)):
      # Add a slot for the background class.
      with tf.variable_scope('WeightSharedConvolutionalBoxPredictor',
                             reuse=tf.AUTO_REUSE):
        num_class_slots = self.num_classes + 1
        box_encodings_net = image_feature
        class_predictions_net = image_feature
-        with slim.arg_scope(self._conv_hyperparams_fn()):
+        with slim.arg_scope(self._conv_hyperparams_fn()) as sc:
+          apply_batch_norm = _arg_scope_func_key(slim.batch_norm) in sc
          for i in range(self._num_layers_before_predictor):
            box_encodings_net = slim.conv2d(
                box_encodings_net,
@@ -874,14 +893,22 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
                [self._kernel_size, self._kernel_size],
                stride=1,
                padding='SAME',
-                scope='BoxEncodingPredictionTower/conv2d_{}'.format(i))
+                activation_fn=None,
+                normalizer_fn=(tf.identity if apply_batch_norm else None),
+                scope='BoxPredictionTower/conv2d_{}'.format(i))
+            if apply_batch_norm:
+              box_encodings_net = slim.batch_norm(
+                  box_encodings_net,
+                  scope='BoxPredictionTower/conv2d_{}/BatchNorm/feature_{}'.
+                  format(i, feature_index))
+            box_encodings_net = tf.nn.relu6(box_encodings_net)
          box_encodings = slim.conv2d(
              box_encodings_net,
              num_predictions_per_location * self._box_code_size,
              [self._kernel_size, self._kernel_size],
              activation_fn=None, stride=1, padding='SAME',
              normalizer_fn=None,
-              scope='BoxEncodingPredictor')
+              scope='BoxPredictor')

          for i in range(self._num_layers_before_predictor):
            class_predictions_net = slim.conv2d(
@@ -890,7 +917,15 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
                [self._kernel_size, self._kernel_size],
                stride=1,
                padding='SAME',
+                activation_fn=None,
+                normalizer_fn=(tf.identity if apply_batch_norm else None),
                scope='ClassPredictionTower/conv2d_{}'.format(i))
+            if apply_batch_norm:
+              class_predictions_net = slim.batch_norm(
+                  class_predictions_net,
+                  scope='ClassPredictionTower/conv2d_{}/BatchNorm/feature_{}'
+                  .format(i, feature_index))
+            class_predictions_net = tf.nn.relu6(class_predictions_net)
          if self._use_dropout:
            class_predictions_net = slim.dropout(
                class_predictions_net, keep_prob=self._dropout_keep_prob)
@@ -912,7 +947,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
                                       combined_feature_map_shape[1] *
                                       combined_feature_map_shape[2] *
                                       num_predictions_per_location,
-                                       1, self._box_code_size]))
+                                       self._box_code_size]))
          box_encodings_list.append(box_encodings)
          class_predictions_with_background = tf.reshape(
              class_predictions_with_background,

--- a/research/object_detection/core/box_predictor_test.py
+++ b/research/object_detection/core/box_predictor_test.py
@@ -442,6 +442,24 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.build(conv_hyperparams, is_training=True)

+  def _build_conv_arg_scope_no_batch_norm(self):
+    conv_hyperparams = hyperparams_pb2.Hyperparams()
+    conv_hyperparams_text_proto = """
+      activation: RELU_6
+      regularizer {
+        l2_regularizer {
+        }
+      }
+      initializer {
+        random_normal_initializer {
+          stddev: 0.01
+          mean: 0.0
+        }
+      }
+    """
+    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    return hyperparams_builder.build(conv_hyperparams, is_training=True)
+
  def test_get_boxes_for_five_aspect_ratios_per_location(self):

    def graph_fn(image_features):
@@ -463,7 +481,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
    image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
    (box_encodings, objectness_predictions) = self.execute(
        graph_fn, [image_features])
-    self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
+    self.assertAllEqual(box_encodings.shape, [4, 320, 4])
    self.assertAllEqual(objectness_predictions.shape, [4, 320, 1])

  def test_bias_predictions_to_background_with_sigmoid_score_conversion(self):
@@ -512,7 +530,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
    image_features = np.random.rand(4, 8, 8, 64).astype(np.float32)
    (box_encodings, class_predictions_with_background) = self.execute(
        graph_fn, [image_features])
-    self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4])
+    self.assertAllEqual(box_encodings.shape, [4, 320, 4])
    self.assertAllEqual(class_predictions_with_background.shape,
                        [4, 320, num_classes_without_background+1])

@@ -543,11 +561,12 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
    image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32)
    (box_encodings, class_predictions_with_background) = self.execute(
        graph_fn, [image_features1, image_features2])
-    self.assertAllEqual(box_encodings.shape, [4, 640, 1, 4])
+    self.assertAllEqual(box_encodings.shape, [4, 640, 4])
    self.assertAllEqual(class_predictions_with_background.shape,
                        [4, 640, num_classes_without_background+1])

-  def test_predictions_from_multiple_feature_maps_share_weights(self):
+  def test_predictions_from_multiple_feature_maps_share_weights_not_batchnorm(
+      self):
    num_classes_without_background = 6
    def graph_fn(image_features1, image_features2):
      conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
@@ -574,26 +593,95 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
      actual_variable_set = set(
          [var.op.name for var in tf.trainable_variables()])
    expected_variable_set = set([
+        # Box prediction tower
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictionTower/conv2d_0/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictionTower/conv2d_0/weights'),
+         'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictionTower/conv2d_0/BatchNorm/beta'),
+         'BoxPredictionTower/conv2d_1/weights'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictionTower/conv2d_1/weights'),
+         'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictionTower/conv2d_1/BatchNorm/beta'),
+         'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
+        # Box prediction head
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictor/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictor/biases'),
+        # Class prediction tower
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
         'ClassPredictionTower/conv2d_0/weights'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'ClassPredictionTower/conv2d_0/BatchNorm/beta'),
+         'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
         'ClassPredictionTower/conv2d_1/weights'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'ClassPredictionTower/conv2d_1/BatchNorm/beta'),
+         'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'),
+        # Class prediction head
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictor/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictor/biases')])
+    self.assertEqual(expected_variable_set, actual_variable_set)
+
+  def test_no_batchnorm_params_when_batchnorm_is_not_configured(self):
+    num_classes_without_background = 6
+    def graph_fn(image_features1, image_features2):
+      conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
+          is_training=False,
+          num_classes=num_classes_without_background,
+          conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(),
+          depth=32,
+          num_layers_before_predictor=2,
+          box_code_size=4)
+      box_predictions = conv_box_predictor.predict(
+          [image_features1, image_features2],
+          num_predictions_per_location=[5, 5],
+          scope='BoxPredictor')
+      box_encodings = tf.concat(
+          box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
+      class_predictions_with_background = tf.concat(
+          box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
+          axis=1)
+      return (box_encodings, class_predictions_with_background)
+
+    with self.test_session(graph=tf.Graph()):
+      graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32),
+               tf.random_uniform([4, 16, 16, 3], dtype=tf.float32))
+      actual_variable_set = set(
+          [var.op.name for var in tf.trainable_variables()])
+    expected_variable_set = set([
+        # Box prediction tower
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictionTower/conv2d_0/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictionTower/conv2d_0/biases'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictionTower/conv2d_1/weights'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictor/weights'),
+         'BoxPredictionTower/conv2d_1/biases'),
+        # Box prediction head
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
-         'BoxEncodingPredictor/biases'),
+         'BoxPredictor/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'BoxPredictor/biases'),
+        # Class prediction tower
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_0/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_0/biases'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_1/weights'),
+        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
+         'ClassPredictionTower/conv2d_1/biases'),
+        # Class prediction head
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
         'ClassPredictor/weights'),
        ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
@@ -628,7 +716,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
           feed_dict={image_features:
                      np.random.rand(4, resolution, resolution, 64)})
-      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
+      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])


--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -2128,7 +2128,8 @@ def resize_to_range(image,
                    max_dimension=None,
                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False,
-                    pad_to_max_dimension=False):
+                    pad_to_max_dimension=False,
+                    per_channel_pad_value=(0, 0, 0)):
  """Resizes an image so its dimensions are within the provided value.

  The output size can be described by two cases:
@@ -2153,6 +2154,8 @@ def resize_to_range(image,
      so the resulting image is of the spatial size
      [max_dimension, max_dimension]. If masks are included they are padded
      similarly.
+    per_channel_pad_value: A tuple of per-channel scalar value to use for
+      padding. By default pads zeros.

  Returns:
    Note that the position of the resized_image_shape changes based on whether
@@ -2181,8 +2184,20 @@ def resize_to_range(image,
        image, new_size[:-1], method=method, align_corners=align_corners)

    if pad_to_max_dimension:
-      new_image = tf.image.pad_to_bounding_box(
-          new_image, 0, 0, max_dimension, max_dimension)
+      channels = tf.unstack(new_image, axis=2)
+      if len(channels) != len(per_channel_pad_value):
+        raise ValueError('Number of channels must be equal to the length of '
+                         'per-channel pad value.')
+      new_image = tf.stack(
+          [
+              tf.pad(
+                  channels[i], [[0, max_dimension - new_size[0]],
+                                [0, max_dimension - new_size[1]]],
+                  constant_values=per_channel_pad_value[i])
+              for i in range(len(channels))
+          ],
+          axis=2)
+      new_image.set_shape([max_dimension, max_dimension, 3])

    result = [new_image]
    if masks is not None:

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -2316,6 +2316,46 @@ class PreprocessorTest(tf.test.TestCase):
                                              np.random.randn(*in_shape)})
        self.assertAllEqual(out_image_shape, expected_shape)

+  def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self):
+    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+    min_dim = 50
+    max_dim = 100
+    expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]]
+
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+      out_image, _ = preprocessor.resize_to_range(
+          in_image,
+          min_dimension=min_dim,
+          max_dimension=max_dim,
+          pad_to_max_dimension=True)
+      self.assertAllEqual(out_image.shape.as_list(), expected_shape)
+      out_image_shape = tf.shape(out_image)
+      with self.test_session() as sess:
+        out_image_shape = sess.run(
+            out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)})
+        self.assertAllEqual(out_image_shape, expected_shape)
+
+  def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self):
+    in_image_np = np.array([[[0, 1, 2]]], np.float32)
+    ex_image_np = np.array(
+        [[[0, 1, 2], [123.68, 116.779, 103.939]],
+         [[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32)
+    min_dim = 1
+    max_dim = 2
+
+    in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+    out_image, _ = preprocessor.resize_to_range(
+        in_image,
+        min_dimension=min_dim,
+        max_dimension=max_dim,
+        pad_to_max_dimension=True,
+        per_channel_pad_value=(123.68, 116.779, 103.939))
+
+    with self.test_session() as sess:
+      out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np})
+      self.assertAllClose(ex_image_np, out_image_np)
+
  def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
    """Tests image resizing, checking output sizes."""
    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]

--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -34,6 +34,7 @@ class InputDataFields(object):

  Attributes:
    image: image.
+    image_additional_channels: additional channels.
    original_image: image in the original input size.
    key: unique key corresponding to image.
    source_id: source of the original image.
@@ -66,6 +67,7 @@ class InputDataFields(object):
    multiclass_scores: the label score per class for each box.
  """
  image = 'image'
+  image_additional_channels = 'image_additional_channels'
  original_image = 'original_image'
  key = 'key'
  source_id = 'source_id'
@@ -161,6 +163,8 @@ class TfExampleFields(object):
    height: height of image in pixels, e.g. 462
    width: width of image in pixels, e.g. 581
    source_id: original source of the image
+    image_class_text: image-level label in text format
+    image_class_label: image-level label in numerical format
    object_class_text: labels in text format, e.g. ["person", "cat"]
    object_class_label: labels in numbers, e.g. [16, 8]
    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
@@ -195,6 +199,8 @@ class TfExampleFields(object):
  height = 'image/height'
  width = 'image/width'
  source_id = 'image/source_id'
+  image_class_text = 'image/class/text'
+  image_class_label = 'image/class/label'
  object_class_text = 'image/object/class/text'
  object_class_label = 'image/object/class/label'
  object_bbox_ymin = 'image/object/bbox/ymin'

--- a/research/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt
+++ b/research/object_detection/data/oid_object_detection_challenge_500_label_map.pbtxt
--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -112,7 +112,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
               label_map_proto_file=None,
               use_display_name=False,
               dct_method='',
-               num_keypoints=0):
+               num_keypoints=0,
+               num_additional_channels=0):
    """Constructor sets keys_to_features and items_to_handlers.

    Args:
@@ -133,6 +134,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
        example, the jpeg library does not have that specific option.
      num_keypoints: the number of keypoints per object.
+      num_additional_channels: how many additional channels to use.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
@@ -178,15 +180,28 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        'image/object/weight':
            tf.VarLenFeature(tf.float32),
    }
+    # We are checking `dct_method` instead of passing it directly in order to
+    # ensure TF version 1.6 compatibility.
    if dct_method:
      image = slim_example_decoder.Image(
          image_key='image/encoded',
          format_key='image/format',
          channels=3,
          dct_method=dct_method)
+      additional_channel_image = slim_example_decoder.Image(
+          image_key='image/additional_channels/encoded',
+          format_key='image/format',
+          channels=1,
+          repeated=True,
+          dct_method=dct_method)
    else:
      image = slim_example_decoder.Image(
          image_key='image/encoded', format_key='image/format', channels=3)
+      additional_channel_image = slim_example_decoder.Image(
+          image_key='image/additional_channels/encoded',
+          format_key='image/format',
+          channels=1,
+          repeated=True)
    self.items_to_handlers = {
        fields.InputDataFields.image:
            image,
@@ -211,6 +226,13 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        fields.InputDataFields.groundtruth_weights: (
            slim_example_decoder.Tensor('image/object/weight')),
    }
+    if num_additional_channels > 0:
+      self.keys_to_features[
+          'image/additional_channels/encoded'] = tf.FixedLenFeature(
+              (num_additional_channels,), tf.string)
+      self.items_to_handlers[
+          fields.InputDataFields.
+          image_additional_channels] = additional_channel_image
    self._num_keypoints = num_keypoints
    if num_keypoints > 0:
      self.keys_to_features['image/object/keypoint/x'] = (
@@ -294,6 +316,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        [None] indicating if the boxes enclose a crowd.

    Optional:
+      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
+        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
+        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
@@ -316,6 +341,12 @@ class TfExampleDecoder(data_decoder.DataDecoder):
    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

+    if fields.InputDataFields.image_additional_channels in tensor_dict:
+      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
+      channels = tf.squeeze(channels, axis=3)
+      channels = tf.transpose(channels, perm=[1, 2, 0])
+      tensor_dict[fields.InputDataFields.image_additional_channels] = channels
+
    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -23,6 +23,7 @@ from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import parsing_ops
@@ -72,10 +73,41 @@ class TfExampleDecoderTest(tf.test.TestCase):

  def _BytesFeatureFromList(self, ndarray):
    values = ndarray.flatten().tolist()
-    for i in range(len(values)):
-      values[i] = values[i].encode('utf-8')
    return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))

+  def testDecodeAdditionalChannels(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+
+    additional_channel_tensor = np.random.randint(
+        256, size=(4, 5, 1)).astype(np.uint8)
+    encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
+    decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
+
+    example = tf.train.Example(
+        features=tf.train.Features(
+            feature={
+                'image/encoded':
+                    self._BytesFeature(encoded_jpeg),
+                'image/additional_channels/encoded':
+                    self._BytesFeatureFromList(
+                        np.array([encoded_additional_channel] * 2)),
+                'image/format':
+                    self._BytesFeature('jpeg'),
+                'image/source_id':
+                    self._BytesFeature('image_id'),
+            })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder(
+        num_additional_channels=2)
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+      self.assertAllEqual(
+          np.concatenate([decoded_additional_channel] * 2, axis=2),
+          tensor_dict[fields.InputDataFields.image_additional_channels])
+
  def testDecodeExampleWithBranchedBackupHandler(self):
    example1 = example_pb2.Example(
        features=feature_pb2.Features(
@@ -304,6 +336,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual(
        2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])

+  @test_util.enable_c_shapes
  def testDecodeKeypoint(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -331,7 +364,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                         get_shape().as_list()), [None, 4])
    self.assertAllEqual((tensor_dict[fields.InputDataFields.
                                     groundtruth_keypoints].
-                         get_shape().as_list()), [None, 3, 2])
+                         get_shape().as_list()), [2, 3, 2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

@@ -376,6 +409,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
                        np.ones(2, dtype=np.float32))

+  @test_util.enable_c_shapes
  def testDecodeObjectLabel(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -391,7 +425,7 @@ class TfExampleDecoderTest(tf.test.TestCase):

    self.assertAllEqual((tensor_dict[
        fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
-                        [None])
+                        [2])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)
@@ -522,6 +556,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
    self.assertAllEqual([3, 1],
                        tensor_dict[fields.InputDataFields.groundtruth_classes])

+  @test_util.enable_c_shapes
  def testDecodeObjectArea(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -536,13 +571,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))

    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
-                         get_shape().as_list()), [None])
+                         get_shape().as_list()), [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

    self.assertAllEqual(object_area,
                        tensor_dict[fields.InputDataFields.groundtruth_area])

+  @test_util.enable_c_shapes
  def testDecodeObjectIsCrowd(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -558,7 +594,7 @@ class TfExampleDecoderTest(tf.test.TestCase):

    self.assertAllEqual((tensor_dict[
        fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
-                        [None])
+                        [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

@@ -566,6 +602,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                        tensor_dict[
                            fields.InputDataFields.groundtruth_is_crowd])

+  @test_util.enable_c_shapes
  def testDecodeObjectDifficult(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -581,7 +618,7 @@ class TfExampleDecoderTest(tf.test.TestCase):

    self.assertAllEqual((tensor_dict[
        fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
-                        [None])
+                        [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

@@ -589,6 +626,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
                        tensor_dict[
                            fields.InputDataFields.groundtruth_difficult])

+  @test_util.enable_c_shapes
  def testDecodeObjectGroupOf(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
    encoded_jpeg = self._EncodeImage(image_tensor)
@@ -605,7 +643,7 @@ class TfExampleDecoderTest(tf.test.TestCase):

    self.assertAllEqual((tensor_dict[
        fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
-                        [None])
+                        [2])
    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

@@ -637,6 +675,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
        object_weights,
        tensor_dict[fields.InputDataFields.groundtruth_weights])

+  @test_util.enable_c_shapes
  def testDecodeInstanceSegmentation(self):
    num_instances = 4
    image_height = 5
@@ -673,11 +712,11 @@ class TfExampleDecoderTest(tf.test.TestCase):

    self.assertAllEqual((
        tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
-        get_shape().as_list()), [None, None, None])
+        get_shape().as_list()), [4, 5, 3])

    self.assertAllEqual((
        tensor_dict[fields.InputDataFields.groundtruth_classes].
-        get_shape().as_list()), [None])
+        get_shape().as_list()), [4])

    with self.test_session() as sess:
      tensor_dict = sess.run(tensor_dict)

--- a/research/object_detection/dataset_tools/create_oid_tf_record.py
+++ b/research/object_detection/dataset_tools/create_oid_tf_record.py
@@ -16,7 +16,8 @@ r"""Creates TFRecords of Open Images dataset for object detection.

 Example usage:
  python object_detection/dataset_tools/create_oid_tf_record.py \
-    --input_annotations_csv=/path/to/input/annotations-human-bbox.csv \
+    --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
+    --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
    --input_images_directory=/path/to/input/image_pixels_directory \
    --input_label_map=/path/to/input/labels_bbox_545.labelmap \
    --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
@@ -27,7 +28,9 @@ https://github.com/openimages/dataset

 This script will include every image found in the input_images_directory in the
 output TFRecord, even if the image has no corresponding bounding box annotations
-in the input_annotations_csv.
+in the input_annotations_csv. If input_image_label_annotations_csv is specified,
+it will add image-level labels as well. Note that the information of whether a
+label is positivelly or negativelly verified is NOT added to tfrecord.
 """
 from __future__ import absolute_import
 from __future__ import division
@@ -40,13 +43,16 @@ import pandas as pd
 import tensorflow as tf

 from object_detection.dataset_tools import oid_tfrecord_creation
+from object_detection.dataset_tools import tf_record_creation_util
 from object_detection.utils import label_map_util

-tf.flags.DEFINE_string('input_annotations_csv', None,
+tf.flags.DEFINE_string('input_box_annotations_csv', None,
                       'Path to CSV containing image bounding box annotations')
 tf.flags.DEFINE_string('input_images_directory', None,
                       'Directory containing the image pixels '
                       'downloaded from the OpenImages GitHub repository.')
+tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
+                       'Path to CSV containing image-level labels annotations')
 tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
 tf.flags.DEFINE_string(
    'output_tf_record_path_prefix', None,
@@ -61,7 +67,7 @@ def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  required_flags = [
-      'input_annotations_csv', 'input_images_directory', 'input_label_map',
+      'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
      'output_tf_record_path_prefix'
  ]
  for flag_name in required_flags:
@@ -69,17 +75,24 @@ def main(_):
      raise ValueError('Flag --{} is required'.format(flag_name))

  label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
-  all_annotations = pd.read_csv(FLAGS.input_annotations_csv)
+  all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
+  if FLAGS.input_image_label_annotations_csv:
+    all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
+    all_label_annotations.rename(
+        columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
+  else:
+    all_label_annotations = None
  all_images = tf.gfile.Glob(
      os.path.join(FLAGS.input_images_directory, '*.jpg'))
  all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
  all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
-  all_annotations = pd.concat([all_annotations, all_image_ids])
+  all_annotations = pd.concat(
+      [all_box_annotations, all_image_ids, all_label_annotations])

  tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

  with contextlib2.ExitStack() as tf_record_close_stack:
-    output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
+    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
        FLAGS.num_shards)


--- a/research/object_detection/dataset_tools/create_pet_tf_record.py
+++ b/research/object_detection/dataset_tools/create_pet_tf_record.py
@@ -33,11 +33,13 @@ import os
 import random
 import re

+import contextlib2
 from lxml import etree
 import numpy as np
 import PIL.Image
 import tensorflow as tf

+from object_detection.dataset_tools import tf_record_creation_util
 from object_detection.utils import dataset_util
 from object_detection.utils import label_map_util

@@ -52,6 +54,8 @@ flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
                     'in the latter case, the resulting files are much larger.')
 flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
                    'segmentation masks. Options are "png" or "numerical".')
+flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
+
 FLAGS = flags.FLAGS


@@ -208,6 +212,7 @@ def dict_to_tf_example(data,


 def create_tf_record(output_filename,
+                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
@@ -218,6 +223,7 @@ def create_tf_record(output_filename,

  Args:
    output_filename: Path to where output file is saved.
+    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
@@ -227,7 +233,9 @@ def create_tf_record(output_filename,
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
-  writer = tf.python_io.TFRecordWriter(output_filename)
+  with contextlib2.ExitStack() as tf_record_close_stack:
+    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
+        tf_record_close_stack, output_filename, num_shards)
    for idx, example in enumerate(examples):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples))
@@ -250,12 +258,12 @@ def create_tf_record(output_filename,
            image_dir,
            faces_only=faces_only,
            mask_type=mask_type)
-      writer.write(tf_example.SerializeToString())
+        if tf_example:
+          shard_idx = idx % num_shards
+          output_tfrecords[shard_idx].write(tf_example.SerializeToString())
      except ValueError:
        logging.warning('Invalid example: %s, ignoring.', xml_path)

-  writer.close()
-

 # TODO(derekjchow): Add test for pet/PASCAL main files.
 def main(_):
@@ -279,15 +287,16 @@ def main(_):
  logging.info('%d training and %d validation examples.',
               len(train_examples), len(val_examples))

-  train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
-  val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
-  if FLAGS.faces_only:
+  train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
+  val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
+  if not FLAGS.faces_only:
    train_output_path = os.path.join(FLAGS.output_dir,
-                                     'pet_train_with_masks.record')
+                                     'pets_fullbody_with_masks_train.record')
    val_output_path = os.path.join(FLAGS.output_dir,
-                                   'pet_val_with_masks.record')
+                                   'pets_fullbody_with_masks_val.record')
  create_tf_record(
      train_output_path,
+      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,
@@ -296,6 +305,7 @@ def main(_):
      mask_type=FLAGS.mask_type)
  create_tf_record(
      val_output_path,
+      FLAGS.num_shards,
      label_map_dict,
      annotations_dir,
      image_dir,

--- a/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
+++ b/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A class and executable to expand hierarchically image-level labels and boxes.
+
+Example usage:
+    ./hierarchical_labels_expansion <path to JSON hierarchy> <input csv file>
+    <output csv file> [optional]labels_file
+"""
+
+import json
+import sys
+
+
+def _update_dict(initial_dict, update):
+  """Updates dictionary with update content.
+
+  Args:
+   initial_dict: initial dictionary.
+   update: updated dictionary.
+  """
+
+  for key, value_list in update.iteritems():
+    if key in initial_dict:
+      initial_dict[key].extend(value_list)
+    else:
+      initial_dict[key] = value_list
+
+
+def _build_plain_hierarchy(hierarchy, skip_root=False):
+  """Expands tree hierarchy representation to parent-child dictionary.
+
+  Args:
+   hierarchy: labels hierarchy as JSON file.
+   skip_root: if true skips root from the processing (done for the case when all
+     classes under hierarchy are collected under virtual node).
+
+  Returns:
+    keyed_parent - dictionary of parent - all its children nodes.
+    keyed_child  - dictionary of children - all its parent nodes
+    children - all children of the current node.
+  """
+  all_children = []
+  all_keyed_parent = {}
+  all_keyed_child = {}
+  if 'Subcategory' in hierarchy:
+    for node in hierarchy['Subcategory']:
+      keyed_parent, keyed_child, children = _build_plain_hierarchy(node)
+      # Update is not done through dict.update() since some children have multi-
+      # ple parents in the hiearchy.
+      _update_dict(all_keyed_parent, keyed_parent)
+      _update_dict(all_keyed_child, keyed_child)
+      all_children.extend(children)
+
+  if not skip_root:
+    all_keyed_parent[hierarchy['LabelName']] = all_children
+    all_children = [hierarchy['LabelName']] + all_children
+    for child, _ in all_keyed_child.iteritems():
+      all_keyed_child[child].append(hierarchy['LabelName'])
+    all_keyed_child[hierarchy['LabelName']] = []
+
+  return all_keyed_parent, all_keyed_child, all_children
+
+
+class OIDHierarchicalLabelsExpansion(object):
+  """ Main class to perform labels hierachical expansion."""
+
+  def __init__(self, hierarchy):
+    """Constructor.
+
+    Args:
+      hierarchy: labels hierarchy as JSON file.
+    """
+
+    self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
+        _build_plain_hierarchy(hierarchy, skip_root=True))
+
+  def expand_boxes_from_csv(self, csv_row):
+    """Expands a row containing bounding boxes from CSV file.
+
+    Args:
+      csv_row: a single row of Open Images released groundtruth file.
+
+    Returns:
+      a list of strings (including the initial row) corresponding to the ground
+      truth expanded to multiple annotation for evaluation with Open Images
+      Challenge 2018 metric.
+    """
+    # Row header is expected to be exactly:
+    # ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
+    # IsTruncated,IsGroupOf,IsDepiction,IsInside
+    cvs_row_splited = csv_row.split(',')
+    assert len(cvs_row_splited) == 13
+    result = [csv_row]
+    assert cvs_row_splited[2] in self._hierarchy_keyed_child
+    parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
+    for parent_node in parent_nodes:
+      cvs_row_splited[2] = parent_node
+      result.append(','.join(cvs_row_splited))
+    return result
+
+  def expand_labels_from_csv(self, csv_row):
+    """Expands a row containing bounding boxes from CSV file.
+
+    Args:
+      csv_row: a single row of Open Images released groundtruth file.
+
+    Returns:
+      a list of strings (including the initial row) corresponding to the ground
+      truth expanded to multiple annotation for evaluation with Open Images
+      Challenge 2018 metric.
+    """
+    # Row header is expected to be exactly:
+    # ImageID,Source,LabelName,Confidence
+    cvs_row_splited = csv_row.split(',')
+    assert len(cvs_row_splited) == 4
+    result = [csv_row]
+    if int(cvs_row_splited[3]) == 1:
+      assert cvs_row_splited[2] in self._hierarchy_keyed_child
+      parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
+      for parent_node in parent_nodes:
+        cvs_row_splited[2] = parent_node
+        result.append(','.join(cvs_row_splited))
+    else:
+      assert cvs_row_splited[2] in self._hierarchy_keyed_parent
+      child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
+      for child_node in child_nodes:
+        cvs_row_splited[2] = child_node
+        result.append(','.join(cvs_row_splited))
+    return result
+
+
+def main(argv):
+
+  if len(argv) < 4:
+    print """Missing arguments. \n
+             Usage: ./hierarchical_labels_expansion <path to JSON hierarchy>
+             <input csv file> <output csv file> [optional]labels_file"""
+    return
+  with open(argv[1]) as f:
+    hierarchy = json.load(f)
+  expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
+  labels_file = False
+  if len(argv) > 4 and argv[4] == 'labels_file':
+    labels_file = True
+  with open(argv[2], 'r') as source:
+    with open(argv[3], 'w') as target:
+      header_skipped = False
+      for line in source:
+        if not header_skipped:
+          header_skipped = True
+          continue
+        if labels_file:
+          expanded_lines = expansion_generator.expand_labels_from_csv(line)
+        else:
+          expanded_lines = expansion_generator.expand_boxes_from_csv(line)
+        target.writelines(expanded_lines)
+
+
+if __name__ == '__main__':
+  main(sys.argv)
--- a/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py
+++ b/research/object_detection/dataset_tools/oid_hierarchical_labels_expansion_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from object_detection.dataset_tools import oid_hierarchical_labels_expansion
+
+
+def create_test_data():
+  hierarchy = {
+      'LabelName':
+          'a',
+      'Subcategory': [{
+          'LabelName': 'b'
+      }, {
+          'LabelName': 'c',
+          'Subcategory': [{
+              'LabelName': 'd'
+          }, {
+              'LabelName': 'e'
+          }]
+      }, {
+          'LabelName': 'f',
+          'Subcategory': [{
+              'LabelName': 'd'
+          },]
+      }]
+  }
+  bbox_rows = [
+      '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
+      '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
+  ]
+  label_rows = [
+      '123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
+  ]
+  return hierarchy, bbox_rows, label_rows
+
+
+class HierarchicalLabelsExpansionTest(tf.test.TestCase):
+
+  def test_bbox_expansion(self):
+    hierarchy, bbox_rows, _ = create_test_data()
+    expansion_generator = (
+        oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
+            hierarchy))
+    all_result_rows = []
+    for row in bbox_rows:
+      all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
+    self.assertItemsEqual([
+        '123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
+        '123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
+        '123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
+        '123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
+    ], all_result_rows)
+
+  def test_labels_expansion(self):
+    hierarchy, _, label_rows = create_test_data()
+    expansion_generator = (
+        oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
+            hierarchy))
+    all_result_rows = []
+    for row in label_rows:
+      all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
+    self.assertItemsEqual([
+        '123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
+        '123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
+        '124,verification,c,1'
+    ], all_result_rows)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation.py
@@ -41,24 +41,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,

  filtered_data_frame = annotations_data_frame[
      annotations_data_frame.LabelName.isin(label_map)]
-
+  filtered_data_frame_boxes = filtered_data_frame[
+      ~filtered_data_frame.YMin.isnull()]
+  filtered_data_frame_labels = filtered_data_frame[
+      filtered_data_frame.YMin.isnull()]
  image_id = annotations_data_frame.ImageID.iloc[0]

  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
-          dataset_util.float_list_feature(filtered_data_frame.YMin.as_matrix()),
+          dataset_util.float_list_feature(
+              filtered_data_frame_boxes.YMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmin:
-          dataset_util.float_list_feature(filtered_data_frame.XMin.as_matrix()),
+          dataset_util.float_list_feature(
+              filtered_data_frame_boxes.XMin.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_ymax:
-          dataset_util.float_list_feature(filtered_data_frame.YMax.as_matrix()),
+          dataset_util.float_list_feature(
+              filtered_data_frame_boxes.YMax.as_matrix()),
      standard_fields.TfExampleFields.object_bbox_xmax:
-          dataset_util.float_list_feature(filtered_data_frame.XMax.as_matrix()),
+          dataset_util.float_list_feature(
+              filtered_data_frame_boxes.XMax.as_matrix()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature(
-              filtered_data_frame.LabelName.as_matrix()),
+              filtered_data_frame_boxes.LabelName.as_matrix()),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
-              filtered_data_frame.LabelName.map(lambda x: label_map[x])
+              filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
              .as_matrix()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature('{}.jpg'.format(image_id)),
@@ -71,43 +78,29 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
-                    filtered_data_frame.IsGroupOf.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
-                    filtered_data_frame.IsOccluded.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
+                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
-                    filtered_data_frame.IsTruncated.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
+                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
-                    filtered_data_frame.IsDepiction.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
+                        int))

+  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
+    feature_map[standard_fields.TfExampleFields.
+                image_class_label] = dataset_util.int64_list_feature(
+                    filtered_data_frame_labels.LabelName.map(
+                        lambda x: label_map[x]).as_matrix())
+    feature_map[standard_fields.TfExampleFields.
+                image_class_text] = dataset_util.bytes_list_feature(
+                    filtered_data_frame_labels.LabelName.as_matrix()),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
-
-
-def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
-  """Opens all TFRecord shards for writing and adds them to an exit stack.
-
-  Args:
-    exit_stack: A context2.ExitStack used to automatically closed the TFRecords
-      opened in this function.
-    base_path: The base path for all shards
-    num_shards: The number of shards
-
-  Returns:
-    The list of opened TFRecords. Position k in the list corresponds to shard k.
-  """
-  tf_record_output_filenames = [
-      '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
-      for idx in range(num_shards)
-  ]
-
-  tfrecords = [
-      exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
-      for file_name in tf_record_output_filenames
-  ]
-
-  return tfrecords
--- a/research/object_detection/dataset_tools/oid_tfrecord_creation_test.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation_test.py
@@ -14,8 +14,6 @@
 # ==============================================================================
 """Tests for oid_tfrecord_creation.py."""

-import os
-import contextlib2
 import pandas as pd
 import tensorflow as tf

@@ -24,16 +22,17 @@ from object_detection.dataset_tools import oid_tfrecord_creation

 def create_test_data():
  data = {
-      'ImageID': ['i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
-      'LabelName': ['a', 'a', 'b', 'b', 'b', 'c'],
-      'YMin': [0.3, 0.6, 0.8, 0.1, 0.0, 0.0],
-      'XMin': [0.1, 0.3, 0.7, 0.0, 0.1, 0.1],
-      'XMax': [0.2, 0.3, 0.8, 0.5, 0.9, 0.9],
-      'YMax': [0.3, 0.6, 1, 0.8, 0.8, 0.8],
-      'IsOccluded': [0, 1, 1, 0, 0, 0],
-      'IsTruncated': [0, 0, 0, 1, 0, 0],
-      'IsGroupOf': [0, 0, 0, 0, 0, 1],
-      'IsDepiction': [1, 0, 0, 0, 0, 0],
+      'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
+      'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'],
+      'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0],
+      'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1],
+      'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9],
+      'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8],
+      'IsOccluded': [0, 1, 1, 0, None, 0, 0],
+      'IsTruncated': [0, 0, 0, 1, None, 0, 0],
+      'IsGroupOf': [0, 0, 0, 0, None, 0, 1],
+      'IsDepiction': [1, 0, 0, 0, None, 0, 0],
+      'ConfidenceImageLabel': [None, None, None, None, 0, None, None],
  }
  df = pd.DataFrame(data=data)
  label_map = {'a': 0, 'b': 1, 'c': 2}
@@ -47,7 +46,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):

    tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
        df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
-    self.assertProtoEquals("""
+    self.assertProtoEquals(
+        """
        features {
          feature {
            key: "image/encoded"
@@ -87,7 +87,13 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
            value { int64_list { value: [0, 1, 1, 0] } } }
          feature {
            key: "image/object/truncated"
-            value { int64_list { value: [0, 0, 0, 1] } } } }
+            value { int64_list { value: [0, 0, 0, 1] } } }
+          feature {
+            key: "image/class/label"
+            value { int64_list { value: [2] } } }
+          feature {
+            key: "image/class/text"
+            value { bytes_list { value: ["c"] } } } }
    """, tf_example)

  def test_no_attributes(self):
@@ -97,6 +103,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
    del df['IsGroupOf']
    del df['IsOccluded']
    del df['IsTruncated']
+    del df['ConfidenceImageLabel']

    tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
        df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
@@ -138,7 +145,8 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):

    tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
        df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
-    self.assertProtoEquals("""
+    self.assertProtoEquals(
+        """
        features {
          feature {
            key: "image/encoded"
@@ -178,26 +186,15 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
            value { int64_list { value: [0, 1] } } }
          feature {
            key: "image/object/truncated"
-            value { int64_list { value: [0, 0] } } } }
+            value { int64_list { value: [0, 0] } } }
+          feature {
+            key: "image/class/label"
+            value { int64_list { } } }
+          feature {
+            key: "image/class/text"
+            value { bytes_list { } } } }
    """, tf_example)


-class OpenOutputTfrecordsTests(tf.test.TestCase):
-
-  def test_sharded_tfrecord_writes(self):
-    with contextlib2.ExitStack() as tf_record_close_stack:
-      output_tfrecords = oid_tfrecord_creation.open_sharded_output_tfrecords(
-          tf_record_close_stack,
-          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
-      for idx in range(10):
-        output_tfrecords[idx].write('test_{}'.format(idx))
-
-    for idx in range(10):
-      tf_record_path = '{}-{:05d}-of-00010'.format(
-          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
-      records = list(tf.python_io.tf_record_iterator(tf_record_path))
-      self.assertAllEqual(records, ['test_{}'.format(idx)])
-
-
 if __name__ == '__main__':
  tf.test.main()