Merge pull request #2 from tensorflow/master

Sync to tensorflow-master

Merge pull request #2 from tensorflow/master
Sync to tensorflow-master
30aeec75 · Toby Boyd · GitHub · 68a18b70 · 78007443 · 30aeec75
Commit 30aeec75 authored Jul 26, 2017 by Toby Boyd Committed by GitHub Jul 26, 2017
20 changed files
--- a/object_detection/core/preprocessor.py
+++ b/object_detection/core/preprocessor.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+    data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+                   randomly changing the brightness, contrast, hue and
+                   randomly jittering the bounding boxes.
+
+The preprocess function receives a tensor_dict which is a dictionary that maps
+different field names to their tensors. For example,
+tensor_dict[fields.InputDataFields.image] holds the image tensor.
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import sys
+import tensorflow as tf
+
+from tensorflow.python.ops import control_flow_ops
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import keypoint_ops
+from object_detection.core import standard_fields as fields
+
+
+def _apply_with_random_selector(x, func, num_cases):
+  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+  Args:
+    x: input Tensor.
+    func: Python function to apply.
+    num_cases: Python int32, number of cases to sample sel from.
+
+  Returns:
+    The result of func(x, sel), where func receives the value of the
+    selector as a python integer, but sel is sampled dynamically.
+  """
+  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  # Pass the real x only to one of the func calls.
+  return control_flow_ops.merge([func(
+      control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
+                                 for case in range(num_cases)])[0]
+
+
+def _apply_with_random_selector_tuples(x, func, num_cases):
+  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+  Args:
+    x: A tuple of input tensors.
+    func: Python function to apply.
+    num_cases: Python int32, number of cases to sample sel from.
+
+  Returns:
+    The result of func(x, sel), where func receives the value of the
+    selector as a python integer, but sel is sampled dynamically.
+  """
+  num_inputs = len(x)
+  rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  # Pass the real x only to one of the func calls.
+
+  tuples = [list() for t in x]
+  for case in range(num_cases):
+    new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
+    output = func(tuple(new_x), case)
+    for j in range(num_inputs):
+      tuples[j].append(output[j])
+
+  for i in range(num_inputs):
+    tuples[i] = control_flow_ops.merge(tuples[i])[0]
+  return tuple(tuples)
+
+
+def _random_integer(minval, maxval, seed):
+  """Returns a random 0-D tensor between minval and maxval.
+
+  Args:
+    minval: minimum value of the random tensor.
+    maxval: maximum value of the random tensor.
+    seed: random seed.
+
+  Returns:
+    A random 0-D tensor between minval and maxval.
+  """
+  return tf.random_uniform(
+      [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
+
+
+def normalize_image(image, original_minval, original_maxval, target_minval,
+                    target_maxval):
+  """Normalizes pixel values in the image.
+
+  Moves the pixel values from the current [original_minval, original_maxval]
+  range to a the [target_minval, target_maxval] range.
+
+  Args:
+    image: rank 3 float32 tensor containing 1
+           image -> [height, width, channels].
+    original_minval: current image minimum value.
+    original_maxval: current image maximum value.
+    target_minval: target image minimum value.
+    target_maxval: target image maximum value.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('NormalizeImage', values=[image]):
+    original_minval = float(original_minval)
+    original_maxval = float(original_maxval)
+    target_minval = float(target_minval)
+    target_maxval = float(target_maxval)
+    image = tf.to_float(image)
+    image = tf.subtract(image, original_minval)
+    image = tf.multiply(image, (target_maxval - target_minval) /
+                        (original_maxval - original_minval))
+    image = tf.add(image, target_minval)
+    return image
+
+
+def flip_boxes(boxes):
+  """Left-right flip the boxes.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+
+  Returns:
+    Flipped boxes.
+  """
+  # Flip boxes.
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+
+
+def retain_boxes_above_threshold(
+    boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
+  """Retains boxes whose label score is above a given threshold.
+
+  If the label score for a box is missing (represented by NaN), the box is
+  retained. The boxes that don't pass the threshold will not appear in the
+  returned tensor.
+
+  Args:
+    boxes: float32 tensor of shape [num_instance, 4] representing boxes
+      location in normalized coordinates.
+    labels: rank 1 int32 tensor of shape [num_instance] containing the object
+      classes.
+    label_scores: float32 tensor of shape [num_instance] representing the
+      score for each box.
+    masks: (optional) rank 3 float32 tensor with shape
+      [num_instances, height, width] containing instance masks. The masks are of
+      the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+    threshold: scalar python float.
+
+  Returns:
+    retained_boxes: [num_retained_instance, 4]
+    retianed_labels: [num_retained_instance]
+    retained_label_scores: [num_retained_instance]
+
+    If masks, or keypoints are not None, the function also returns:
+
+    retained_masks: [num_retained_instance, height, width]
+    retained_keypoints: [num_retained_instance, num_keypoints, 2]
+  """
+  with tf.name_scope('RetainBoxesAboveThreshold',
+                     values=[boxes, labels, label_scores]):
+    indices = tf.where(
+        tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
+    indices = tf.squeeze(indices, axis=1)
+    retained_boxes = tf.gather(boxes, indices)
+    retained_labels = tf.gather(labels, indices)
+    retained_label_scores = tf.gather(label_scores, indices)
+    result = [retained_boxes, retained_labels, retained_label_scores]
+
+    if masks is not None:
+      retained_masks = tf.gather(masks, indices)
+      result.append(retained_masks)
+
+    if keypoints is not None:
+      retained_keypoints = tf.gather(keypoints, indices)
+      result.append(retained_keypoints)
+
+    return result
+
+
+def _flip_masks(masks):
+  """Left-right flips masks.
+
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, :, ::-1]
+
+
+def random_horizontal_flip(
+    image,
+    boxes=None,
+    masks=None,
+    keypoints=None,
+    keypoint_flip_permutation=None,
+    seed=None):
+  """Randomly decides whether to mirror the image and detections or not.
+
+  The probability of flipping the image is 50%.
+
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
+                               permutation.
+    seed: random seed
+
+  Returns:
+    image: image which is the same shape as input image.
+
+    If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
+    the function also returns the following tensors.
+
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_left_right(image)
+    return image_flipped
+
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+
+  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
+    result = []
+    # random variable defining whether to do flip or not
+    do_a_flip_random = tf.random_uniform([], seed=seed)
+    # flip only if there are bounding boxes in image!
+    do_a_flip_random = tf.logical_and(
+        tf.greater(tf.size(boxes), 0), tf.greater(do_a_flip_random, 0.5))
+
+    # flip image
+    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+    result.append(image)
+
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(
+          do_a_flip_random, lambda: flip_boxes(boxes), lambda: boxes)
+      result.append(boxes)
+
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(
+          do_a_flip_random, lambda: _flip_masks(masks), lambda: masks)
+      result.append(masks)
+
+    # flip keypoints
+    if keypoints is not None and keypoint_flip_permutation is not None:
+      permutation = keypoint_flip_permutation
+      keypoints = tf.cond(
+          do_a_flip_random,
+          lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
+          lambda: keypoints)
+      result.append(keypoints)
+
+    return tuple(result)
+
+
+def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
+  """Scales each value in the pixels of the image.
+
+     This function scales each pixel independent of the other ones.
+     For each value in image tensor, draws a random number between
+     minval and maxval and multiples the values with them.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    minval: lower ratio of scaling pixel values.
+    maxval: upper ratio of scaling pixel values.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomPixelValueScale', values=[image]):
+    color_coef = tf.random_uniform(
+        tf.shape(image),
+        minval=minval,
+        maxval=maxval,
+        dtype=tf.float32,
+        seed=seed)
+    image = tf.multiply(image, color_coef)
+    image = tf.clip_by_value(image, 0.0, 1.0)
+
+  return image
+
+
+def random_image_scale(image,
+                       masks=None,
+                       min_scale_ratio=0.5,
+                       max_scale_ratio=2.0,
+                       seed=None):
+  """Scales the image size.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
+    masks: (optional) rank 3 float32 tensor containing masks with
+      size [height, width, num_masks]. The value is set to None if there are no
+      masks.
+    min_scale_ratio: minimum scaling ratio.
+    max_scale_ratio: maximum scaling ratio.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    masks: If masks is not none, resized masks which are the same rank as input
+      masks will be returned.
+  """
+  with tf.name_scope('RandomImageScale', values=[image]):
+    result = []
+    image_shape = tf.shape(image)
+    image_height = image_shape[0]
+    image_width = image_shape[1]
+    size_coef = tf.random_uniform([],
+                                  minval=min_scale_ratio,
+                                  maxval=max_scale_ratio,
+                                  dtype=tf.float32, seed=seed)
+    image_newysize = tf.to_int32(
+        tf.multiply(tf.to_float(image_height), size_coef))
+    image_newxsize = tf.to_int32(
+        tf.multiply(tf.to_float(image_width), size_coef))
+    image = tf.image.resize_images(
+        image, [image_newysize, image_newxsize], align_corners=True)
+    result.append(image)
+    if masks:
+      masks = tf.image.resize_nearest_neighbor(
+          masks, [image_newysize, image_newxsize], align_corners=True)
+      result.append(masks)
+    return tuple(result)
+
+
+def random_rgb_to_gray(image, probability=0.1, seed=None):
+  """Changes the image from RGB to Grayscale with the given probability.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    probability: the probability of returning a grayscale image.
+            The probability should be a number between [0, 1].
+    seed: random seed.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  def _image_to_gray(image):
+    image_gray1 = tf.image.rgb_to_grayscale(image)
+    image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
+    return image_gray3
+
+  with tf.name_scope('RandomRGBtoGray', values=[image]):
+    # random variable defining whether to do flip or not
+    do_gray_random = tf.random_uniform([], seed=seed)
+
+    image = tf.cond(
+        tf.greater(do_gray_random, probability), lambda: image,
+        lambda: _image_to_gray(image))
+
+  return image
+
+
+def random_adjust_brightness(image, max_delta=0.2):
+  """Randomly adjusts brightness.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_delta: how much to change the brightness. A value between [0, 1).
+
+  Returns:
+    image: image which is the same shape as input image.
+    boxes: boxes which is the same shape as input boxes.
+  """
+  with tf.name_scope('RandomAdjustBrightness', values=[image]):
+    image = tf.image.random_brightness(image, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
+  """Randomly adjusts contrast.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    min_delta: see max_delta.
+    max_delta: how much to change the contrast. Contrast will change with a
+               value between min_delta and max_delta. This value will be
+               multiplied to the current contrast of the image.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustContrast', values=[image]):
+    image = tf.image.random_contrast(image, min_delta, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_hue(image, max_delta=0.02):
+  """Randomly adjusts hue.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_delta: change hue randomly with a value between 0 and max_delta.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustHue', values=[image]):
+    image = tf.image.random_hue(image, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
+  """Randomly adjusts saturation.
+
+  Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    min_delta: see max_delta.
+    max_delta: how much to change the saturation. Saturation will change with a
+               value between min_delta and max_delta. This value will be
+               multiplied to the current saturation of the image.
+
+  Returns:
+    image: image which is the same shape as input image.
+  """
+  with tf.name_scope('RandomAdjustSaturation', values=[image]):
+    image = tf.image.random_saturation(image, min_delta, max_delta)
+    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
+    return image
+
+
+def random_distort_color(image, color_ordering=0):
+  """Randomly distorts color.
+
+  Randomly distorts color using a combination of brightness, hue, contrast
+  and saturation changes. Makes sure the output image is still between 0 and 1.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    color_ordering: Python int, a type of distortion (valid values: 0, 1).
+
+  Returns:
+    image: image which is the same shape as input image.
+
+  Raises:
+    ValueError: if color_ordering is not in {0, 1}.
+  """
+  with tf.name_scope('RandomDistortColor', values=[image]):
+    if color_ordering == 0:
+      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      image = tf.image.random_hue(image, max_delta=0.2)
+      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+    elif color_ordering == 1:
+      image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+      image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      image = tf.image.random_hue(image, max_delta=0.2)
+    else:
+      raise ValueError('color_ordering must be in {0, 1}')
+
+    # The random_* ops do not necessarily clamp.
+    image = tf.clip_by_value(image, 0.0, 1.0)
+    return image
+
+
+def random_jitter_boxes(boxes, ratio=0.05, seed=None):
+  """Randomly jitter boxes in image.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    ratio: The ratio of the box width and height that the corners can jitter.
+           For example if the width is 100 pixels and ratio is 0.05,
+           the corners can jitter up to 5 pixels in the x direction.
+    seed: random seed.
+
+  Returns:
+    boxes: boxes which is the same shape as input boxes.
+  """
+  def random_jitter_box(box, ratio, seed):
+    """Randomly jitter box.
+
+    Args:
+      box: bounding box [1, 1, 4].
+      ratio: max ratio between jittered box and original box,
+      a number between [0, 0.5].
+      seed: random seed.
+
+    Returns:
+      jittered_box: jittered box.
+    """
+    rand_numbers = tf.random_uniform(
+        [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
+    box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
+    box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
+    hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
+    hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
+    jittered_box = tf.add(box, hw_rand_coefs)
+    jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
+    return jittered_box
+
+  with tf.name_scope('RandomJitterBoxes', values=[boxes]):
+    # boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
+    boxes_shape = tf.shape(boxes)
+    boxes = tf.expand_dims(boxes, 1)
+    boxes = tf.expand_dims(boxes, 2)
+
+    distorted_boxes = tf.map_fn(
+        lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
+
+    distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
+
+    return distorted_boxes
+
+
+def _strict_random_crop_image(image,
+                              boxes,
+                              labels,
+                              masks=None,
+                              keypoints=None,
+                              min_object_covered=1.0,
+                              aspect_ratio_range=(0.75, 1.33),
+                              area_range=(0.1, 1.0),
+                              overlap_thresh=0.3):
+  """Performs random crop.
+
+  Note: boxes will be clipped to the crop. Keypoint coordinates that are
+  outside the crop will be set to NaN, which is consistent with the original
+  keypoint encoding for non-existing keypoints. This function always crops
+  the image and is supposed to be used by `random_crop_image` function which
+  sometimes returns image unchanged.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes with shape
+           [num_instances, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope('RandomCropImage', values=[image, boxes]):
+    image_shape = tf.shape(image)
+
+    # boxes are [N, 4]. Lets first make them [N, 1, 4].
+    boxes_expanded = tf.expand_dims(
+        tf.clip_by_value(
+            boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
+
+    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+        image_shape,
+        bounding_boxes=boxes_expanded,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=100,
+        use_image_if_no_bounding_boxes=True)
+
+    im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
+
+    new_image = tf.slice(image, im_box_begin, im_box_size)
+    new_image.set_shape([None, None, image.get_shape()[2]])
+
+    # [1, 4]
+    im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
+    # [4]
+    im_box_rank1 = tf.squeeze(im_box)
+
+    boxlist = box_list.BoxList(boxes)
+    boxlist.add_field('labels', labels)
+
+    im_boxlist = box_list.BoxList(im_box_rank2)
+
+    # remove boxes that are outside cropped image
+    boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
+        boxlist, im_box_rank1)
+
+    # remove boxes that are outside image
+    overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+        boxlist, im_boxlist, overlap_thresh)
+
+    # change the coordinate of the remaining boxes
+    new_labels = overlapping_boxlist.get_field('labels')
+    new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+                                                       im_box_rank1)
+    new_boxes = new_boxlist.get()
+    new_boxes = tf.clip_by_value(
+        new_boxes, clip_value_min=0.0, clip_value_max=1.0)
+
+    result = [new_image, new_boxes, new_labels]
+
+    if masks is not None:
+      masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
+      masks_of_boxes_completely_inside_window = tf.gather(
+          masks_of_boxes_inside_window, keep_ids)
+      masks_box_begin = [0, im_box_begin[0], im_box_begin[1]]
+      masks_box_size = [-1, im_box_size[0], im_box_size[1]]
+      new_masks = tf.slice(
+          masks_of_boxes_completely_inside_window,
+          masks_box_begin, masks_box_size)
+      result.append(new_masks)
+
+    if keypoints is not None:
+      keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
+      keypoints_of_boxes_completely_inside_window = tf.gather(
+          keypoints_of_boxes_inside_window, keep_ids)
+      new_keypoints = keypoint_ops.change_coordinate_frame(
+          keypoints_of_boxes_completely_inside_window, im_box_rank1)
+      new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+                                                        [0.0, 0.0, 1.0, 1.0])
+      result.append(new_keypoints)
+
+    return tuple(result)
+
+
+def random_crop_image(image,
+                      boxes,
+                      labels,
+                      masks=None,
+                      keypoints=None,
+                      min_object_covered=1.0,
+                      aspect_ratio_range=(0.75, 1.33),
+                      area_range=(0.1, 1.0),
+                      overlap_thresh=0.3,
+                      random_coef=0.0,
+                      seed=None):
+  """Randomly crops the image.
+
+  Given the input image and its bounding boxes, this op randomly
+  crops a subimage.  Given a user-provided set of input constraints,
+  the crop window is resampled until it satisfies these constraints.
+  If within 100 trials it is unable to find a valid crop, the original
+  image is returned. See the Args section for a description of the input
+  constraints. Both input boxes and returned Boxes are in normalized
+  form (e.g., lie in the unit square [0, 1]).
+  This function will return the original image with probability random_coef.
+
+  Note: boxes will be clipped to the crop. Keypoint coordinates that are
+  outside the crop will be set to NaN, which is consistent with the original
+  keypoint encoding for non-existing keypoints.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes with shape
+           [num_instances, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+    labels: new labels.
+
+    If masks, or keypoints are not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+
+  def strict_random_crop_image_fn():
+    return _strict_random_crop_image(
+        image,
+        boxes,
+        labels,
+        masks=masks,
+        keypoints=keypoints,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        overlap_thresh=overlap_thresh)
+
+  # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
+  if random_coef < sys.float_info.min:
+    result = strict_random_crop_image_fn()
+  else:
+    do_a_crop_random = tf.random_uniform([], seed=seed)
+    do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
+
+    outputs = [image, boxes, labels]
+    if masks is not None:
+      outputs.append(masks)
+    if keypoints is not None:
+      outputs.append(keypoints)
+
+    result = tf.cond(do_a_crop_random,
+                     strict_random_crop_image_fn,
+                     lambda: tuple(outputs))
+  return result
+
+
+def random_pad_image(image,
+                     boxes,
+                     min_image_size=None,
+                     max_image_size=None,
+                     pad_color=None,
+                     seed=None):
+  """Randomly pads the image.
+
+  This function randomly pads the image with zeros. The final size of the
+  padded image will be between min_image_size and max_image_size.
+  if min_image_size is smaller than the input image size, min_image_size will
+  be set to the input image size. The same for max_image_size. The input image
+  will be located at a uniformly random location inside the padded image.
+  The relative location of the boxes to the original image will remain the same.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    min_image_size: a tensor of size [min_height, min_width], type tf.int32.
+                    If passed as None, will be set to image size
+                    [height, width].
+    max_image_size: a tensor of size [max_height, max_width], type tf.int32.
+                    If passed as None, will be set to twice the
+                    image [height * 2, width * 2].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the input
+               image.
+
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+  """
+  if pad_color is None:
+    pad_color = tf.reduce_mean(image, reduction_indices=[0, 1])
+
+  image_shape = tf.shape(image)
+  image_height = image_shape[0]
+  image_width = image_shape[1]
+
+  if max_image_size is None:
+    max_image_size = tf.stack([image_height * 2, image_width * 2])
+  max_image_size = tf.maximum(max_image_size,
+                              tf.stack([image_height, image_width]))
+
+  if min_image_size is None:
+    min_image_size = tf.stack([image_height, image_width])
+  min_image_size = tf.maximum(min_image_size,
+                              tf.stack([image_height, image_width]))
+
+  target_height = tf.cond(
+      max_image_size[0] > min_image_size[0],
+      lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
+      lambda: max_image_size[0])
+
+  target_width = tf.cond(
+      max_image_size[1] > min_image_size[1],
+      lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
+      lambda: max_image_size[1])
+
+  offset_height = tf.cond(
+      target_height > image_height,
+      lambda: _random_integer(0, target_height - image_height, seed),
+      lambda: tf.constant(0, dtype=tf.int32))
+
+  offset_width = tf.cond(
+      target_width > image_width,
+      lambda: _random_integer(0, target_width - image_width, seed),
+      lambda: tf.constant(0, dtype=tf.int32))
+
+  new_image = tf.image.pad_to_bounding_box(
+      image, offset_height=offset_height, offset_width=offset_width,
+      target_height=target_height, target_width=target_width)
+
+  # Setting color of the padded pixels
+  image_ones = tf.ones_like(image)
+  image_ones_padded = tf.image.pad_to_bounding_box(
+      image_ones, offset_height=offset_height, offset_width=offset_width,
+      target_height=target_height, target_width=target_width)
+  image_color_paded = (1.0 - image_ones_padded) * pad_color
+  new_image += image_color_paded
+
+  # setting boxes
+  new_window = tf.to_float(
+      tf.stack([
+          -offset_height, -offset_width, target_height - offset_height,
+          target_width - offset_width
+      ]))
+  new_window /= tf.to_float(
+      tf.stack([image_height, image_width, image_height, image_width]))
+  boxlist = box_list.BoxList(boxes)
+  new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  new_boxes = new_boxlist.get()
+
+  return new_image, new_boxes
+
+
+def random_crop_pad_image(image,
+                          boxes,
+                          labels,
+                          min_object_covered=1.0,
+                          aspect_ratio_range=(0.75, 1.33),
+                          area_range=(0.1, 1.0),
+                          overlap_thresh=0.3,
+                          random_coef=0.0,
+                          min_padded_size_ratio=None,
+                          max_padded_size_ratio=None,
+                          pad_color=None,
+                          seed=None):
+  """Randomly crops and pads the image.
+
+  Given an input image and its bounding boxes, this op first randomly crops
+  the image and then randomly pads the image with background values. Parameters
+  min_padded_size_ratio and max_padded_size_ratio, determine the range of the
+  final output image size.  Specifically, the final image size will have a size
+  in the range of min_padded_size_ratio * tf.shape(image) and
+  max_padded_size_ratio * tf.shape(image). Note that these ratios are with
+  respect to the size of the original image, so we can't capture the same
+  effect easily by independently applying RandomCropImage
+  followed by RandomPadImage.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [0.0, 0.0].
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [2.0, 2.0].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the randomly
+               cropped image.
+    seed: random seed.
+
+  Returns:
+    padded_image: padded image.
+    padded_boxes: boxes which is the same rank as input boxes. Boxes are in
+                  normalized form.
+    cropped_labels: cropped labels.
+  """
+  image_size = tf.shape(image)
+  image_height = image_size[0]
+  image_width = image_size[1]
+  if min_padded_size_ratio is None:
+    min_padded_size_ratio = tf.constant([0.0, 0.0], tf.float32)
+  if max_padded_size_ratio is None:
+    max_padded_size_ratio = tf.constant([2.0, 2.0], tf.float32)
+  cropped_image, cropped_boxes, cropped_labels = random_crop_image(
+      image=image,
+      boxes=boxes,
+      labels=labels,
+      min_object_covered=min_object_covered,
+      aspect_ratio_range=aspect_ratio_range,
+      area_range=area_range,
+      overlap_thresh=overlap_thresh,
+      random_coef=random_coef,
+      seed=seed)
+
+  min_image_size = tf.to_int32(
+      tf.to_float(tf.stack([image_height, image_width])) *
+      min_padded_size_ratio)
+  max_image_size = tf.to_int32(
+      tf.to_float(tf.stack([image_height, image_width])) *
+      max_padded_size_ratio)
+
+  padded_image, padded_boxes = random_pad_image(
+      cropped_image,
+      cropped_boxes,
+      min_image_size=min_image_size,
+      max_image_size=max_image_size,
+      pad_color=pad_color,
+      seed=seed)
+
+  return padded_image, padded_boxes, cropped_labels
+
+
+def random_crop_to_aspect_ratio(image,
+                                boxes,
+                                labels,
+                                masks=None,
+                                keypoints=None,
+                                aspect_ratio=1.0,
+                                overlap_thresh=0.3,
+                                seed=None):
+  """Randomly crops an image to the specified aspect ratio.
+
+  Randomly crops the a portion of the image such that the crop is of the
+  specified aspect ratio, and the crop is as large as possible. If the specified
+  aspect ratio is larger than the aspect ratio of the image, this op will
+  randomly remove rows from the top and bottom of the image. If the specified
+  aspect ratio is less than the aspect ratio of the image, this op will randomly
+  remove cols from the left and right of the image. If the specified aspect
+  ratio is the same as the aspect ratio of the image, this op will return the
+  image.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    aspect_ratio: the aspect ratio of cropped image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: If image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('RandomCropToAspectRatio', values=[image]):
+    image_shape = tf.shape(image)
+    orig_height = image_shape[0]
+    orig_width = image_shape[1]
+    orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
+    new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
+    def target_height_fn():
+      return tf.to_int32(
+          tf.round(
+              tf.to_float(orig_height) * orig_aspect_ratio / new_aspect_ratio))
+    target_height = tf.cond(
+        orig_aspect_ratio >= new_aspect_ratio,
+        lambda: orig_height,
+        target_height_fn)
+    def target_width_fn():
+      return tf.to_int32(
+          tf.round(
+              tf.to_float(orig_width) * new_aspect_ratio / orig_aspect_ratio))
+    target_width = tf.cond(
+        orig_aspect_ratio <= new_aspect_ratio,
+        lambda: orig_width,
+        target_width_fn)
+
+    # either offset_height = 0 and offset_width is randomly chosen from
+    # [0, offset_width - target_width), or else offset_width = 0 and
+    # offset_height is randomly chosen from [0, offset_height - target_height)
+    offset_height = _random_integer(0, orig_height - target_height + 1, seed)
+    offset_width = _random_integer(0, orig_width - target_width + 1, seed)
+    new_image = tf.image.crop_to_bounding_box(
+        image, offset_height, offset_width, target_height, target_width)
+
+    im_box = tf.stack([
+        tf.to_float(offset_height) / tf.to_float(orig_height),
+        tf.to_float(offset_width) / tf.to_float(orig_width),
+        tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
+        tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
+    ])
+
+    boxlist = box_list.BoxList(boxes)
+    boxlist.add_field('labels', labels)
+
+    im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
+
+    # remove boxes whose overlap with the image is less than overlap_thresh
+    overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+        boxlist, im_boxlist, overlap_thresh)
+
+    # change the coordinate of the remaining boxes
+    new_labels = overlapping_boxlist.get_field('labels')
+    new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+                                                       im_box)
+    new_boxlist = box_list_ops.clip_to_window(new_boxlist,
+                                              tf.constant(
+                                                  [0.0, 0.0, 1.0, 1.0],
+                                                  tf.float32))
+    new_boxes = new_boxlist.get()
+
+    result = [new_image, new_boxes, new_labels]
+
+    if masks is not None:
+      masks_inside_window = tf.gather(masks, keep_ids)
+      masks_box_begin = tf.stack([0, offset_height, offset_width])
+      masks_box_size = tf.stack([-1, target_height, target_width])
+      new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
+      result.append(new_masks)
+
+    if keypoints is not None:
+      keypoints_inside_window = tf.gather(keypoints, keep_ids)
+      new_keypoints = keypoint_ops.change_coordinate_frame(
+          keypoints_inside_window, im_box)
+      new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+                                                        [0.0, 0.0, 1.0, 1.0])
+      result.append(new_keypoints)
+
+    return tuple(result)
+
+
+def random_black_patches(image,
+                         max_black_patches=10,
+                         probability=0.5,
+                         size_to_image_ratio=0.1,
+                         random_seed=None):
+  """Randomly adds some black patches to the image.
+
+  This op adds up to max_black_patches square black patches of a fixed size
+  to the image where size is specified via the size_to_image_ratio parameter.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    max_black_patches: number of times that the function tries to add a
+                       black box to the image.
+    probability: at each try, what is the chance of adding a box.
+    size_to_image_ratio: Determines the ratio of the size of the black patches
+                         to the size of the image.
+                         box_size = size_to_image_ratio *
+                                    min(image_width, image_height)
+    random_seed: random seed.
+
+  Returns:
+    image
+  """
+  def add_black_patch_to_image(image):
+    """Function for adding one patch to the image.
+
+    Args:
+      image: image
+
+    Returns:
+      image with a randomly added black box
+    """
+    image_shape = tf.shape(image)
+    image_height = image_shape[0]
+    image_width = image_shape[1]
+    box_size = tf.to_int32(
+        tf.multiply(
+            tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
+            size_to_image_ratio))
+    normalized_y_min = tf.random_uniform(
+        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+    normalized_x_min = tf.random_uniform(
+        [], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
+    y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
+    x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
+    black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
+    mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
+                                              image_height, image_width)
+    image = tf.multiply(image, mask)
+    return image
+
+  with tf.name_scope('RandomBlackPatchInImage', values=[image]):
+    for _ in range(max_black_patches):
+      random_prob = tf.random_uniform([], minval=0.0, maxval=1.0,
+                                      dtype=tf.float32, seed=random_seed)
+      image = tf.cond(
+          tf.greater(random_prob, probability), lambda: image,
+          lambda: add_black_patch_to_image(image))
+
+    return image
+
+
+def image_to_float(image):
+  """Used in Faster R-CNN. Casts image pixel values to float.
+
+  Args:
+    image: input image which might be in tf.uint8 or sth else format
+
+  Returns:
+    image: image in tf.float32 format.
+  """
+  with tf.name_scope('ImageToFloat', values=[image]):
+    image = tf.to_float(image)
+    return image
+
+
+def random_resize_method(image, target_size):
+  """Uses a random resize method to resize the image to target size.
+
+  Args:
+    image: a rank 3 tensor.
+    target_size: a list of [target_height, target_width]
+
+  Returns:
+    resized image.
+  """
+
+  resized_image = _apply_with_random_selector(
+      image,
+      lambda x, method: tf.image.resize_images(x, target_size, method),
+      num_cases=4)
+
+  return resized_image
+
+
+def resize_to_range(image,
+                    masks=None,
+                    min_dimension=None,
+                    max_dimension=None,
+                    align_corners=False):
+  """Resizes an image so its dimensions are within the provided value.
+
+  The output size can be described by two cases:
+  1. If the image can be rescaled so its minimum dimension is equal to the
+     provided value without the other dimension exceeding max_dimension,
+     then do so.
+  2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    min_dimension: (optional) (scalar) desired size of the smaller image
+                   dimension.
+    max_dimension: (optional) (scalar) maximum allowed size
+                   of the larger image dimension.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+
+  Returns:
+    A 3D tensor of shape [new_height, new_width, channels],
+    where the image has been resized (with bilinear interpolation) so that
+    min(new_height, new_width) == min_dimension or
+    max(new_height, new_width) == max_dimension.
+
+    If masks is not None, also outputs masks:
+    A 3D tensor of shape [num_instances, new_height, new_width]
+
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
+    image_shape = tf.shape(image)
+    orig_height = tf.to_float(image_shape[0])
+    orig_width = tf.to_float(image_shape[1])
+    orig_min_dim = tf.minimum(orig_height, orig_width)
+
+    # Calculates the larger of the possible sizes
+    min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+    large_scale_factor = min_dimension / orig_min_dim
+    # Scaling orig_(height|width) by large_scale_factor will make the smaller
+    # dimension equal to min_dimension, save for floating point rounding errors.
+    # For reasonably-sized images, taking the nearest integer will reliably
+    # eliminate this error.
+    large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
+    large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
+    large_size = tf.stack([large_height, large_width])
+
+    if max_dimension:
+      # Calculates the smaller of the possible sizes, use that if the larger
+      # is too big.
+      orig_max_dim = tf.maximum(orig_height, orig_width)
+      max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+      small_scale_factor = max_dimension / orig_max_dim
+      # Scaling orig_(height|width) by small_scale_factor will make the larger
+      # dimension equal to max_dimension, save for floating point rounding
+      # errors. For reasonably-sized images, taking the nearest integer will
+      # reliably eliminate this error.
+      small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
+      small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
+      small_size = tf.stack([small_height, small_width])
+
+      new_size = tf.cond(
+          tf.to_float(tf.reduce_max(large_size)) > max_dimension,
+          lambda: small_size, lambda: large_size)
+    else:
+      new_size = large_size
+
+    new_image = tf.image.resize_images(image, new_size,
+                                       align_corners=align_corners)
+
+    result = new_image
+    if masks is not None:
+      num_instances = tf.shape(masks)[0]
+
+      def resize_masks_branch():
+        new_masks = tf.expand_dims(masks, 3)
+        new_masks = tf.image.resize_nearest_neighbor(
+            new_masks, new_size, align_corners=align_corners)
+        new_masks = tf.squeeze(new_masks, axis=3)
+        return new_masks
+
+      def reshape_masks_branch():
+        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
+        return new_masks
+
+      masks = tf.cond(num_instances > 0,
+                      resize_masks_branch,
+                      reshape_masks_branch)
+      result = [new_image, masks]
+
+    return result
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+  """Scales boxes from normalized to pixel coordinates.
+
+  Args:
+    image: A 3D float32 tensor of shape [height, width, channels].
+    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+      boxes in normalized coordinates. Each row is of the form
+      [ymin, xmin, ymax, xmax].
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+
+  Returns:
+    image: unchanged input image.
+    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+      bounding boxes in pixel coordinates.
+    scaled_keypoints: a 3D float32 tensor with shape
+      [num_instances, num_keypoints, 2] containing the keypoints in pixel
+      coordinates.
+  """
+  boxlist = box_list.BoxList(boxes)
+  image_height = tf.shape(image)[0]
+  image_width = tf.shape(image)[1]
+  scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
+  result = [image, scaled_boxes]
+  if keypoints is not None:
+    scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
+    result.append(scaled_keypoints)
+  return tuple(result)
+
+
+# pylint: disable=g-doc-return-or-yield
+def resize_image(image,
+                 masks=None,
+                 new_height=600,
+                 new_width=1024,
+                 method=tf.image.ResizeMethod.BILINEAR,
+                 align_corners=False):
+  """See `tf.image.resize_images` for detailed doc."""
+  with tf.name_scope(
+      'ResizeImage',
+      values=[image, new_height, new_width, method, align_corners]):
+    new_image = tf.image.resize_images(image, [new_height, new_width],
+                                       method=method,
+                                       align_corners=align_corners)
+    result = new_image
+    if masks is not None:
+      num_instances = tf.shape(masks)[0]
+      new_size = tf.constant([new_height, new_width], dtype=tf.int32)
+      def resize_masks_branch():
+        new_masks = tf.expand_dims(masks, 3)
+        new_masks = tf.image.resize_nearest_neighbor(
+            new_masks, new_size, align_corners=align_corners)
+        new_masks = tf.squeeze(new_masks, axis=3)
+        return new_masks
+
+      def reshape_masks_branch():
+        new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
+        return new_masks
+
+      masks = tf.cond(num_instances > 0,
+                      resize_masks_branch,
+                      reshape_masks_branch)
+      result = [new_image, masks]
+
+    return result
+
+
+def subtract_channel_mean(image, means=None):
+  """Normalizes an image by subtracting a mean from each channel.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    means: float list containing a mean for each channel
+  Returns:
+    normalized_images: a tensor of shape [height, width, channels]
+  Raises:
+    ValueError: if images is not a 4D tensor or if the number of means is not
+      equal to the number of channels.
+  """
+  with tf.name_scope('SubtractChannelMean', values=[image, means]):
+    if len(image.get_shape()) != 3:
+      raise ValueError('Input must be of size [height, width, channels]')
+    if len(means) != image.get_shape()[-1]:
+      raise ValueError('len(means) must match the number of channels')
+    return image - [[means]]
+
+
+def one_hot_encoding(labels, num_classes=None):
+  """One-hot encodes the multiclass labels.
+
+  Example usage:
+    labels = tf.constant([1, 4], dtype=tf.int32)
+    one_hot = OneHotEncoding(labels, num_classes=5)
+    one_hot.eval()    # evaluates to [0, 1, 0, 0, 1]
+
+  Args:
+    labels: A tensor of shape [None] corresponding to the labels.
+    num_classes: Number of classes in the dataset.
+  Returns:
+    onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
+      encoding of the labels.
+  Raises:
+    ValueError: if num_classes is not specified.
+  """
+  with tf.name_scope('OneHotEncoding', values=[labels]):
+    if num_classes is None:
+      raise ValueError('num_classes must be specified')
+
+    labels = tf.one_hot(labels, num_classes, 1, 0)
+    return tf.reduce_max(labels, 0)
+
+
+def rgb_to_gray(image):
+  """Converts a 3 channel RGB image to a 1 channel grayscale image.
+
+  Args:
+    image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
+           with pixel values varying between [0, 1].
+
+  Returns:
+    image: A single channel grayscale image -> [image, height, 1].
+  """
+  return tf.image.rgb_to_grayscale(image)
+
+
+def ssd_random_crop(image,
+                    boxes,
+                    labels,
+                    masks=None,
+                    keypoints=None,
+                    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                    aspect_ratio_range=((0.5, 2.0),) * 7,
+                    area_range=((0.1, 1.0),) * 7,
+                    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                    random_coef=(0.15,) * 7,
+                    seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+  """
+  def random_crop_selector(selected_result, index):
+    """Applies random_crop_image to selected result.
+
+    Args:
+      selected_result: A tuple containing image, boxes, labels, keypoints (if
+                       not None), and masks (if not None).
+      index: The index that was randomly selected.
+
+    Returns: A tuple containing image, boxes, labels, keypoints (if not None),
+             and masks (if not None).
+    """
+    i = 3
+    image, boxes, labels = selected_result[:i]
+    selected_masks = None
+    selected_keypoints = None
+    if masks is not None:
+      selected_masks = selected_result[i]
+      i += 1
+    if keypoints is not None:
+      selected_keypoints = selected_result[i]
+
+    return random_crop_image(
+        image=image,
+        boxes=boxes,
+        labels=labels,
+        masks=selected_masks,
+        keypoints=selected_keypoints,
+        min_object_covered=min_object_covered[index],
+        aspect_ratio_range=aspect_ratio_range[index],
+        area_range=area_range[index],
+        overlap_thresh=overlap_thresh[index],
+        random_coef=random_coef[index],
+        seed=seed)
+
+  result = _apply_with_random_selector_tuples(
+      tuple(
+          t for t in (image, boxes, labels, masks, keypoints) if t is not None),
+      random_crop_selector,
+      num_cases=len(min_object_covered))
+  return result
+
+
+def ssd_random_crop_pad(image,
+                        boxes,
+                        labels,
+                        min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                        aspect_ratio_range=((0.5, 2.0),) * 6,
+                        area_range=((0.1, 1.0),) * 6,
+                        overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+                        random_coef=(0.15,) * 6,
+                        min_padded_size_ratio=(None,) * 6,
+                        max_padded_size_ratio=(None,) * 6,
+                        pad_color=(None,) * 6,
+                        seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio_range: allowed range for aspect ratio of cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    min_padded_size_ratio: min ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [0.0, 0.0].
+    max_padded_size_ratio: max ratio of padded image height and width to the
+                           input image's height and width. If None, it will
+                           be set to [2.0, 2.0].
+    pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+               if set as None, it will be set to average color of the randomly
+               cropped image.
+    seed: random seed.
+
+  Returns:
+    image: Image shape will be [new_height, new_width, channels].
+    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+           form.
+    new_labels: new labels.
+  """
+  def random_crop_pad_selector(image_boxes_labels, index):
+    image, boxes, labels = image_boxes_labels
+
+    return random_crop_pad_image(
+        image,
+        boxes,
+        labels,
+        min_object_covered=min_object_covered[index],
+        aspect_ratio_range=aspect_ratio_range[index],
+        area_range=area_range[index],
+        overlap_thresh=overlap_thresh[index],
+        random_coef=random_coef[index],
+        min_padded_size_ratio=min_padded_size_ratio[index],
+        max_padded_size_ratio=max_padded_size_ratio[index],
+        pad_color=pad_color[index],
+        seed=seed)
+
+  new_image, new_boxes, new_labels = _apply_with_random_selector_tuples(
+      (image, boxes, labels),
+      random_crop_pad_selector,
+      num_cases=len(min_object_covered))
+  return new_image, new_boxes, new_labels
+
+
+def ssd_random_crop_fixed_aspect_ratio(
+    image,
+    boxes,
+    labels,
+    masks=None,
+    keypoints=None,
+    min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    aspect_ratio=1.0,
+    area_range=((0.1, 1.0),) * 7,
+    overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+    random_coef=(0.15,) * 7,
+    seed=None):
+  """Random crop preprocessing with default parameters as in SSD paper.
+
+  Liu et al., SSD: Single shot multibox detector.
+  For further information on random crop preprocessing refer to RandomCrop
+  function above.
+
+  The only difference is that the aspect ratio of the crops are fixed.
+
+  Args:
+    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+           with pixel values varying between [0, 1].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    labels: rank 1 int32 tensor containing the object classes.
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    min_object_covered: the cropped image must cover at least this fraction of
+                        at least one of the input bounding boxes.
+    aspect_ratio: aspect ratio of the cropped image.
+    area_range: allowed range for area ratio between cropped image and the
+                original image.
+    overlap_thresh: minimum overlap thresh with new cropped
+                    image to keep the box.
+    random_coef: a random coefficient that defines the chance of getting the
+                 original image. If random_coef is 0, we will always get the
+                 cropped image, and if it is 1.0, we will always get the
+                 original image.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+
+    If masks, or keypoints is not None, the function also returns:
+
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  """
+  aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
+
+  crop_result = ssd_random_crop(image, boxes, labels, masks, keypoints,
+                                min_object_covered, aspect_ratio_range,
+                                area_range, overlap_thresh, random_coef, seed)
+  i = 3
+  new_image, new_boxes, new_labels = crop_result[:i]
+  new_masks = None
+  new_keypoints = None
+  if masks is not None:
+    new_masks = crop_result[i]
+    i += 1
+  if keypoints is not None:
+    new_keypoints = crop_result[i]
+  result = random_crop_to_aspect_ratio(
+      new_image,
+      new_boxes,
+      new_labels,
+      new_masks,
+      new_keypoints,
+      aspect_ratio=aspect_ratio,
+      seed=seed)
+
+  return result
+
+
+def get_default_func_arg_map(include_instance_masks=False,
+                             include_keypoints=False):
+  """Returns the default mapping from a preprocessor function to its args.
+
+  Args:
+    include_instance_masks: If True, preprocessing functions will modify the
+      instance masks, too.
+    include_keypoints: If True, preprocessing functions will modify the
+      keypoints, too.
+
+  Returns:
+    A map from preprocessing functions to the arguments they receive.
+  """
+  groundtruth_instance_masks = None
+  if include_instance_masks:
+    groundtruth_instance_masks = (
+        fields.InputDataFields.groundtruth_instance_masks)
+
+  groundtruth_keypoints = None
+  if include_keypoints:
+    groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
+
+  prep_func_arg_map = {
+      normalize_image: (fields.InputDataFields.image,),
+      random_horizontal_flip: (fields.InputDataFields.image,
+                               fields.InputDataFields.groundtruth_boxes,
+                               groundtruth_instance_masks,
+                               groundtruth_keypoints,),
+      random_pixel_value_scale: (fields.InputDataFields.image,),
+      random_image_scale: (fields.InputDataFields.image,
+                           groundtruth_instance_masks,),
+      random_rgb_to_gray: (fields.InputDataFields.image,),
+      random_adjust_brightness: (fields.InputDataFields.image,),
+      random_adjust_contrast: (fields.InputDataFields.image,),
+      random_adjust_hue: (fields.InputDataFields.image,),
+      random_adjust_saturation: (fields.InputDataFields.image,),
+      random_distort_color: (fields.InputDataFields.image,),
+      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
+      random_crop_image: (fields.InputDataFields.image,
+                          fields.InputDataFields.groundtruth_boxes,
+                          fields.InputDataFields.groundtruth_classes,
+                          groundtruth_instance_masks,
+                          groundtruth_keypoints,),
+      random_pad_image: (fields.InputDataFields.image,
+                         fields.InputDataFields.groundtruth_boxes),
+      random_crop_pad_image: (fields.InputDataFields.image,
+                              fields.InputDataFields.groundtruth_boxes,
+                              fields.InputDataFields.groundtruth_classes),
+      random_crop_to_aspect_ratio: (fields.InputDataFields.image,
+                                    fields.InputDataFields.groundtruth_boxes,
+                                    fields.InputDataFields.groundtruth_classes,
+                                    groundtruth_instance_masks,
+                                    groundtruth_keypoints,),
+      random_black_patches: (fields.InputDataFields.image,),
+      retain_boxes_above_threshold: (
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          fields.InputDataFields.groundtruth_label_scores,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+      image_to_float: (fields.InputDataFields.image,),
+      random_resize_method: (fields.InputDataFields.image,),
+      resize_to_range: (fields.InputDataFields.image,
+                        groundtruth_instance_masks,),
+      scale_boxes_to_pixel_coordinates: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          groundtruth_keypoints,),
+      flip_boxes: (fields.InputDataFields.groundtruth_boxes,),
+      resize_image: (fields.InputDataFields.image,
+                     groundtruth_instance_masks,),
+      subtract_channel_mean: (fields.InputDataFields.image,),
+      one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
+      rgb_to_gray: (fields.InputDataFields.image,),
+      ssd_random_crop: (fields.InputDataFields.image,
+                        fields.InputDataFields.groundtruth_boxes,
+                        fields.InputDataFields.groundtruth_classes,
+                        groundtruth_instance_masks,
+                        groundtruth_keypoints,),
+      ssd_random_crop_pad: (fields.InputDataFields.image,
+                            fields.InputDataFields.groundtruth_boxes,
+                            fields.InputDataFields.groundtruth_classes),
+      ssd_random_crop_fixed_aspect_ratio: (
+          fields.InputDataFields.image,
+          fields.InputDataFields.groundtruth_boxes,
+          fields.InputDataFields.groundtruth_classes,
+          groundtruth_instance_masks,
+          groundtruth_keypoints,),
+  }
+
+  return prep_func_arg_map
+
+
+def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
+  """Preprocess images and bounding boxes.
+
+  Various types of preprocessing (to be implemented) based on the
+  preprocess_options dictionary e.g. "crop image" (affects image and possibly
+  boxes), "white balance image" (affects only image), etc. If self._options
+  is None, no preprocessing is done.
+
+  Args:
+    tensor_dict: dictionary that contains images, boxes, and can contain other
+                 things as well.
+                 images-> rank 4 float32 tensor contains
+                          1 image -> [1, height, width, 3].
+                          with pixel values varying between [0, 1]
+                 boxes-> rank 2 float32 tensor containing
+                         the bounding boxes -> [N, 4].
+                         Boxes are in normalized form meaning
+                         their coordinates vary between [0, 1].
+                         Each row is in the form
+                         of [ymin, xmin, ymax, xmax].
+    preprocess_options: It is a list of tuples, where each tuple contains a
+                        function and a dictionary that contains arguments and
+                        their values.
+    func_arg_map: mapping from preprocessing functions to arguments that they
+                  expect to receive and return.
+
+  Returns:
+    tensor_dict: which contains the preprocessed images, bounding boxes, etc.
+
+  Raises:
+    ValueError: (a) If the functions passed to Preprocess
+                    are not in func_arg_map.
+                (b) If the arguments that a function needs
+                    do not exist in tensor_dict.
+                (c) If image in tensor_dict is not rank 4
+  """
+  if func_arg_map is None:
+    func_arg_map = get_default_func_arg_map()
+
+  # changes the images to image (rank 4 to rank 3) since the functions
+  # receive rank 3 tensor for image
+  if fields.InputDataFields.image in tensor_dict:
+    images = tensor_dict[fields.InputDataFields.image]
+    if len(images.get_shape()) != 4:
+      raise ValueError('images in tensor_dict should be rank 4')
+    image = tf.squeeze(images, squeeze_dims=[0])
+    tensor_dict[fields.InputDataFields.image] = image
+
+  # Preprocess inputs based on preprocess_options
+  for option in preprocess_options:
+    func, params = option
+    if func not in func_arg_map:
+      raise ValueError('The function %s does not exist in func_arg_map' %
+                       (func.__name__))
+    arg_names = func_arg_map[func]
+    for a in arg_names:
+      if a is not None and a not in tensor_dict:
+        raise ValueError('The function %s requires argument %s' %
+                         (func.__name__, a))
+
+    def get_arg(key):
+      return tensor_dict[key] if key is not None else None
+    args = [get_arg(a) for a in arg_names]
+    results = func(*args, **params)
+    if not isinstance(results, (list, tuple)):
+      results = (results,)
+    # Removes None args since the return values will not contain those.
+    arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
+    for res, arg_name in zip(results, arg_names):
+      tensor_dict[arg_name] = res
+
+  # changes the image to images (rank 3 to rank 4) to be compatible to what
+  # we received in the first place
+  if fields.InputDataFields.image in tensor_dict:
+    image = tensor_dict[fields.InputDataFields.image]
+    images = tf.expand_dims(image, 0)
+    tensor_dict[fields.InputDataFields.image] = images
+
+  return tensor_dict
--- a/object_detection/core/preprocessor_test.py
+++ b/object_detection/core/preprocessor_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.preprocessor."""
+
+import numpy as np
+import six
+
+import tensorflow as tf
+
+from object_detection.core import preprocessor
+from object_detection.core import standard_fields as fields
+
+if six.PY2:
+  import mock # pylint: disable=g-import-not-at-top
+else:
+  from unittest import mock # pylint: disable=g-import-not-at-top
+
+
+class PreprocessorTest(tf.test.TestCase):
+
+  def createColorfulTestImage(self):
+    ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
+    ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
+    ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
+    imr = tf.concat([ch255, ch0, ch0], 3)
+    img = tf.concat([ch255, ch255, ch0], 3)
+    imb = tf.concat([ch255, ch0, ch255], 3)
+    imw = tf.concat([ch128, ch128, ch128], 3)
+    imu = tf.concat([imr, img], 2)
+    imd = tf.concat([imb, imw], 2)
+    im = tf.concat([imu, imd], 1)
+    return im
+
+  def createTestImages(self):
+    images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
+                             [0, 128, 128, 128], [192, 192, 128, 128]]],
+                           dtype=tf.uint8)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
+                             [0, 128, 192, 192], [192, 192, 128, 192]]],
+                           dtype=tf.uint8)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
+                             [0, 128, 128, 0], [192, 192, 192, 128]]],
+                           dtype=tf.uint8)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def createTestBoxes(self):
+    boxes = tf.constant(
+        [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+    return boxes
+
+  def createTestLabelScores(self):
+    return tf.constant([1.0, 0.5], dtype=tf.float32)
+
+  def createTestLabelScoresWithMissingScore(self):
+    return tf.constant([0.5, np.nan], dtype=tf.float32)
+
+  def createTestMasks(self):
+    mask = np.array([
+        [[255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0]],
+        [[255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0],
+         [255.0, 255.0, 0.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def createTestKeypoints(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createTestKeypointsInsideCrop(self):
+    keypoints = np.array([
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createTestKeypointsOutsideCrop(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def createKeypointFlipPermutation(self):
+    return np.array([0, 2, 1], dtype=np.int32)
+
+  def createTestLabels(self):
+    labels = tf.constant([1, 2], dtype=tf.int32)
+    return labels
+
+  def createTestBoxesOutOfImage(self):
+    boxes = tf.constant(
+        [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
+    return boxes
+
+  def expectedImagesAfterNormalization(self):
+    images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
+                             [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
+                             [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
+                             [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedMaxImageAfterColorScale(self):
+    images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+                             [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+                             [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
+                             [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedMinImageAfterColorScale(self):
+    images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+                             [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+                             [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
+                             [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedImagesAfterMirroring(self):
+    images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
+                             [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_r = tf.expand_dims(images_r, 3)
+    images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
+                             [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_g = tf.expand_dims(images_g, 3)
+    images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
+                             [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
+                           dtype=tf.float32)
+    images_b = tf.expand_dims(images_b, 3)
+    images = tf.concat([images_r, images_g, images_b], 3)
+    return images
+
+  def expectedBoxesAfterMirroring(self):
+    boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
+                        dtype=tf.float32)
+    return boxes
+
+  def expectedBoxesAfterXY(self):
+    boxes = tf.constant([[0.25, 0.0, 1.0, 0.75], [0.5, 0.25, 1, 0.75]],
+                        dtype=tf.float32)
+    return boxes
+
+  def expectedMasksAfterMirroring(self):
+    mask = np.array([
+        [[0.0, 0.0, 255.0],
+         [0.0, 0.0, 255.0],
+         [0.0, 0.0, 255.0]],
+        [[0.0, 255.0, 255.0],
+         [0.0, 255.0, 255.0],
+         [0.0, 255.0, 255.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def expectedLabelScoresAfterThresholding(self):
+    return tf.constant([1.0], dtype=tf.float32)
+
+  def expectedBoxesAfterThresholding(self):
+    return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
+
+  def expectedLabelsAfterThresholding(self):
+    return tf.constant([1], dtype=tf.float32)
+
+  def expectedMasksAfterThresholding(self):
+    mask = np.array([
+        [[255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0],
+         [255.0, 0.0, 0.0]]])
+    return tf.constant(mask, dtype=tf.float32)
+
+  def expectedKeypointsAfterThresholding(self):
+    keypoints = np.array([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
+    ])
+    return tf.constant(keypoints, dtype=tf.float32)
+
+  def expectedLabelScoresAfterThresholdingWithMissingScore(self):
+    return tf.constant([np.nan], dtype=tf.float32)
+
+  def expectedBoxesAfterThresholdingWithMissingScore(self):
+    return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
+
+  def expectedLabelsAfterThresholdingWithMissingScore(self):
+    return tf.constant([2], dtype=tf.float32)
+
+  def testNormalizeImage(self):
+    preprocess_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 256,
+        'target_minval': -1,
+        'target_maxval': 1
+    })]
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    images_expected = self.expectedImagesAfterNormalization()
+
+    with self.test_session() as sess:
+      (images_, images_expected_) = sess.run(
+          [images, images_expected])
+      images_shape_ = images_.shape
+      images_expected_shape_ = images_expected_.shape
+      expected_shape = [1, 4, 4, 3]
+      self.assertAllEqual(images_expected_shape_, images_shape_)
+      self.assertAllEqual(images_shape_, expected_shape)
+      self.assertAllClose(images_, images_expected_)
+
+  def testRetainBoxesAboveThreshold(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    (retained_boxes, retained_labels,
+     retained_label_scores) = preprocessor.retain_boxes_above_threshold(
+         boxes, labels, label_scores, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_, retained_label_scores_,
+       expected_retained_boxes_, expected_retained_labels_,
+       expected_retained_label_scores_) = sess.run([
+           retained_boxes, retained_labels, retained_label_scores,
+           self.expectedBoxesAfterThresholding(),
+           self.expectedLabelsAfterThresholding(),
+           self.expectedLabelScoresAfterThresholding()])
+      self.assertAllClose(
+          retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(
+          retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRetainBoxesAboveThresholdWithMasks(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    masks = self.createTestMasks()
+    _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
+        boxes, labels, label_scores, masks, threshold=0.6)
+    with self.test_session() as sess:
+      retained_masks_, expected_retained_masks_ = sess.run([
+          retained_masks, self.expectedMasksAfterThresholding()])
+
+      self.assertAllClose(
+          retained_masks_, expected_retained_masks_)
+
+  def testRetainBoxesAboveThresholdWithKeypoints(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    keypoints = self.createTestKeypoints()
+    (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
+        boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_keypoints_,
+       expected_retained_keypoints_) = sess.run([
+           retained_keypoints,
+           self.expectedKeypointsAfterThresholding()])
+
+      self.assertAllClose(
+          retained_keypoints_, expected_retained_keypoints_)
+
+  def testRetainBoxesAboveThresholdWithMissingScore(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScoresWithMissingScore()
+    (retained_boxes, retained_labels,
+     retained_label_scores) = preprocessor.retain_boxes_above_threshold(
+         boxes, labels, label_scores, threshold=0.6)
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_, retained_label_scores_,
+       expected_retained_boxes_, expected_retained_labels_,
+       expected_retained_label_scores_) = sess.run([
+           retained_boxes, retained_labels, retained_label_scores,
+           self.expectedBoxesAfterThresholdingWithMissingScore(),
+           self.expectedLabelsAfterThresholdingWithMissingScore(),
+           self.expectedLabelScoresAfterThresholdingWithMissingScore()])
+      self.assertAllClose(
+          retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(
+          retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRandomFlipBoxes(self):
+    boxes = self.createTestBoxes()
+
+    # Case where the boxes are flipped.
+    boxes_expected1 = self.expectedBoxesAfterMirroring()
+
+    # Case where the boxes are not flipped.
+    boxes_expected2 = boxes
+
+    # After elementwise multiplication, the result should be all-zero since one
+    # of them is all-zero.
+    boxes_diff = tf.multiply(
+        tf.squared_difference(boxes, boxes_expected1),
+        tf.squared_difference(boxes, boxes_expected2))
+    expected_result = tf.zeros_like(boxes_diff)
+
+    with self.test_session() as sess:
+      (boxes_diff, expected_result) = sess.run([boxes_diff, expected_result])
+      self.assertAllEqual(boxes_diff, expected_result)
+
+  def testFlipMasks(self):
+    test_mask = self.createTestMasks()
+    flipped_mask = preprocessor._flip_masks(test_mask)
+    expected_mask = self.expectedMasksAfterMirroring()
+    with self.test_session() as sess:
+      flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
+      self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
+
+  def testRandomHorizontalFlip(self):
+    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+    images = self.expectedImagesAfterNormalization()
+    boxes = self.createTestBoxes()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes}
+    images_expected1 = self.expectedImagesAfterMirroring()
+    boxes_expected1 = self.expectedBoxesAfterMirroring()
+    images_expected2 = images
+    boxes_expected2 = boxes
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images = tensor_dict[fields.InputDataFields.image]
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+    boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+    boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+    boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+    boxes_diff_expected = tf.zeros_like(boxes_diff)
+
+    images_diff1 = tf.squared_difference(images, images_expected1)
+    images_diff2 = tf.squared_difference(images, images_expected2)
+    images_diff = tf.multiply(images_diff1, images_diff2)
+    images_diff_expected = tf.zeros_like(images_diff)
+
+    with self.test_session() as sess:
+      (images_diff_, images_diff_expected_, boxes_diff_,
+       boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+                                         boxes_diff, boxes_diff_expected])
+      self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+      self.assertAllClose(images_diff_, images_diff_expected_)
+
+  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
+    preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+    image_height = 3
+    image_width = 3
+    images = tf.random_uniform([1, image_height, image_width, 3])
+    boxes = self.createTestBoxes()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    keypoint_flip_permutation = self.createKeypointFlipPermutation()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+    preprocess_options = [
+        (preprocessor.random_horizontal_flip,
+         {'keypoint_flip_permutation': keypoint_flip_permutation})]
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+    boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+    keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+    with self.test_session() as sess:
+      boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+      self.assertTrue(boxes is not None)
+      self.assertTrue(masks is not None)
+      self.assertTrue(keypoints is not None)
+
+  def testRandomPixelValueScale(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_min = tf.to_float(images) * 0.9 / 255.0
+    images_max = tf.to_float(images) * 1.1 / 255.0
+    images = tensor_dict[fields.InputDataFields.image]
+    values_greater = tf.greater_equal(images, images_min)
+    values_less = tf.less_equal(images, images_max)
+    values_true = tf.fill([1, 4, 4, 3], True)
+    with self.test_session() as sess:
+      (values_greater_, values_less_, values_true_) = sess.run(
+          [values_greater, values_less, values_true])
+      self.assertAllClose(values_greater_, values_true_)
+      self.assertAllClose(values_less_, values_true_)
+
+  def testRandomImageScale(self):
+    preprocess_options = [(preprocessor.random_image_scale, {})]
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images_scaled = tensor_dict[fields.InputDataFields.image]
+    images_original_shape = tf.shape(images_original)
+    images_scaled_shape = tf.shape(images_scaled)
+    with self.test_session() as sess:
+      (images_original_shape_, images_scaled_shape_) = sess.run(
+          [images_original_shape, images_scaled_shape])
+      self.assertTrue(
+          images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
+      self.assertTrue(
+          images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
+      self.assertTrue(
+          images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
+      self.assertTrue(
+          images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
+
+  def testRandomRGBtoGray(self):
+    preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+    images_gray = tensor_dict[fields.InputDataFields.image]
+    images_gray_r, images_gray_g, images_gray_b = tf.split(
+        value=images_gray, num_or_size_splits=3, axis=3)
+    images_r, images_g, images_b = tf.split(
+        value=images_original, num_or_size_splits=3, axis=3)
+    images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
+                                           tf.to_float(images_gray_r))
+    images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
+                                           tf.to_float(images_gray_g))
+    images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
+    images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
+                                           tf.to_float(images_gray_g))
+    images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
+                                           tf.to_float(images_gray_b))
+    images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
+    images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
+                                           tf.to_float(images_gray_b))
+    images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
+                                           tf.to_float(images_gray_r))
+    images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
+    image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
+    with self.test_session() as sess:
+      (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
+          [images_r_diff, images_g_diff, images_b_diff, image_zero1])
+      self.assertAllClose(images_r_diff_, image_zero1_)
+      self.assertAllClose(images_g_diff_, image_zero1_)
+      self.assertAllClose(images_b_diff_, image_zero1_)
+
+  def testRandomAdjustBrightness(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_bright = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_bright_shape = tf.shape(images_bright)
+    with self.test_session() as sess:
+      (image_original_shape_, image_bright_shape_) = sess.run(
+          [image_original_shape, image_bright_shape])
+      self.assertAllEqual(image_original_shape_, image_bright_shape_)
+
+  def testRandomAdjustContrast(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_contrast = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_contrast_shape = tf.shape(images_contrast)
+    with self.test_session() as sess:
+      (image_original_shape_, image_contrast_shape_) = sess.run(
+          [image_original_shape, image_contrast_shape])
+      self.assertAllEqual(image_original_shape_, image_contrast_shape_)
+
+  def testRandomAdjustHue(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_adjust_hue, {}))
+    images_original = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_hue = tensor_dict[fields.InputDataFields.image]
+    image_original_shape = tf.shape(images_original)
+    image_hue_shape = tf.shape(images_hue)
+    with self.test_session() as sess:
+      (image_original_shape_, image_hue_shape_) = sess.run(
+          [image_original_shape, image_hue_shape])
+      self.assertAllEqual(image_original_shape_, image_hue_shape_)
+
+  def testRandomDistortColor(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_distort_color, {}))
+    images_original = self.createTestImages()
+    images_original_shape = tf.shape(images_original)
+    tensor_dict = {fields.InputDataFields.image: images_original}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images_distorted_color = tensor_dict[fields.InputDataFields.image]
+    images_distorted_color_shape = tf.shape(images_distorted_color)
+    with self.test_session() as sess:
+      (images_original_shape_, images_distorted_color_shape_) = sess.run(
+          [images_original_shape, images_distorted_color_shape])
+      self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
+
+  def testRandomJitterBoxes(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
+    boxes = self.createTestBoxes()
+    boxes_shape = tf.shape(boxes)
+    tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+    distorted_boxes_shape = tf.shape(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_shape_, distorted_boxes_shape_) = sess.run(
+          [boxes_shape, distorted_boxes_shape])
+      self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+
+  def testRandomCropImage(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_crop_image, {}))
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    self.assertEqual(3, distorted_images.get_shape()[3])
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run([
+           boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+       ])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageGrayscale(self):
+    preprocessing_options = [(preprocessor.rgb_to_gray, {}),
+                             (preprocessor.normalize_image, {
+                                 'original_minval': 0,
+                                 'original_maxval': 255,
+                                 'target_minval': 0,
+                                 'target_maxval': 1,
+                             }),
+                             (preprocessor.random_crop_image, {})]
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    self.assertEqual(1, distorted_images.get_shape()[3])
+
+    with self.test_session() as sess:
+      session_results = sess.run([
+          boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+      ])
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = session_results
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageWithBoxOutOfImage(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_crop_image, {}))
+    images = self.createTestImages()
+    boxes = self.createTestBoxesOutOfImage()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testRandomCropImageWithRandomCoefOne(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_image, {
+        'random_coef': 1.0
+    })]
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    distorted_labels = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    boxes_shape = tf.shape(boxes)
+    distorted_boxes_shape = tf.shape(distorted_boxes)
+    images_shape = tf.shape(images)
+    distorted_images_shape = tf.shape(distorted_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, distorted_boxes_shape_, images_shape_,
+       distorted_images_shape_, images_, distorted_images_,
+       boxes_, distorted_boxes_, labels_, distorted_labels_) = sess.run(
+           [boxes_shape, distorted_boxes_shape, images_shape,
+            distorted_images_shape, images, distorted_images,
+            boxes, distorted_boxes, labels, distorted_labels])
+      self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+      self.assertAllEqual(images_shape_, distorted_images_shape_)
+      self.assertAllClose(images_, distorted_images_)
+      self.assertAllClose(boxes_, distorted_boxes_)
+      self.assertAllEqual(labels_, distorted_labels_)
+
+  def testRandomCropWithMockSampleDistortedBoundingBox(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createColorfulTestImage()
+    boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
+                         [0.2, 0.4, 0.75, 0.75],
+                         [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
+    labels = tf.constant([1, 7, 11], dtype=tf.int32)
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (tf.constant(
+          [6, 143, 0], dtype=tf.int32), tf.constant(
+              [190, 237, -1], dtype=tf.int32), tf.constant(
+                  [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+
+      distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                      preprocessing_options)
+
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
+                                    [0.28421, 0.0, 0.38947365, 0.57805908]],
+                                   dtype=tf.float32)
+      expected_labels = tf.constant([7, 11], dtype=tf.int32)
+
+      with self.test_session() as sess:
+        (distorted_boxes_, distorted_labels_,
+         expected_boxes_, expected_labels_) = sess.run(
+             [distorted_boxes, distorted_labels,
+              expected_boxes, expected_labels])
+        self.assertAllClose(distorted_boxes_, expected_boxes_)
+        self.assertAllEqual(distorted_labels_, expected_labels_)
+
+  def testStrictRandomCropImageWithMasks(self):
+    image = self.createColorfulTestImage()[0]
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      (new_image, new_boxes, new_labels,
+       new_masks) = preprocessor._strict_random_crop_image(
+           image, boxes, labels, masks=masks)
+      with self.test_session() as sess:
+        new_image, new_boxes, new_labels, new_masks = sess.run([
+            new_image, new_boxes, new_labels, new_masks])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        self.assertAllEqual(new_image.shape, [190, 237, 3])
+        self.assertAllEqual(new_masks.shape, [2, 190, 237])
+        self.assertAllClose(
+            new_boxes.flatten(), expected_boxes.flatten())
+
+  def testStrictRandomCropImageWithKeypoints(self):
+    image = self.createColorfulTestImage()[0]
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypoints()
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      (new_image, new_boxes, new_labels,
+       new_keypoints) = preprocessor._strict_random_crop_image(
+           image, boxes, labels, keypoints=keypoints)
+      with self.test_session() as sess:
+        new_image, new_boxes, new_labels, new_keypoints = sess.run([
+            new_image, new_boxes, new_labels, new_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]]
+        ], dtype=np.float32)
+        self.assertAllEqual(new_image.shape, [190, 237, 3])
+        self.assertAllClose(
+            new_boxes.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            new_keypoints.flatten(), expected_keypoints.flatten())
+
+  def testRunRandomCropImageWithMasks(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_masks = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_instance_masks]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_masks_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_masks])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+
+  def testRunRandomCropImageWithKeypointsInsideCrop(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypointsInsideCrop()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]],
+            [[0.38947368, 0.07173],
+             [0.49473682, 0.24050637],
+             [0.60000002, 0.40928277]]
+        ])
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+  def testRunRandomCropImageWithKeypointsOutsideCrop(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypointsOutsideCrop()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+    with mock.patch.object(
+        tf.image,
+        'sample_distorted_bounding_box'
+    ) as mock_sample_distorted_bounding_box:
+      mock_sample_distorted_bounding_box.return_value = (
+          tf.constant([6, 143, 0], dtype=tf.int32),
+          tf.constant([190, 237, -1], dtype=tf.int32),
+          tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run(
+             [distorted_image, distorted_boxes, distorted_labels,
+              distorted_keypoints])
+
+        expected_boxes = np.array([
+            [0.0, 0.0, 0.75789469, 1.0],
+            [0.23157893, 0.24050637, 0.75789469, 1.0],
+        ], dtype=np.float32)
+        expected_keypoints = np.array([
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+            [[np.nan, np.nan],
+             [np.nan, np.nan],
+             [np.nan, np.nan]],
+        ])
+        self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+        self.assertAllEqual(distorted_labels_, [1, 2])
+        self.assertAllClose(
+            distorted_boxes_.flatten(), expected_boxes.flatten())
+        self.assertAllClose(
+            distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+  def testRunRetainBoxesAboveThreshold(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores
+    }
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options)
+    retained_boxes = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    retained_labels = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_classes]
+    retained_label_scores = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_label_scores]
+
+    with self.test_session() as sess:
+      (retained_boxes_, retained_labels_,
+       retained_label_scores_, expected_retained_boxes_,
+       expected_retained_labels_, expected_retained_label_scores_) = sess.run(
+           [retained_boxes, retained_labels, retained_label_scores,
+            self.expectedBoxesAfterThresholding(),
+            self.expectedLabelsAfterThresholding(),
+            self.expectedLabelScoresAfterThresholding()])
+
+      self.assertAllClose(retained_boxes_, expected_retained_boxes_)
+      self.assertAllClose(retained_labels_, expected_retained_labels_)
+      self.assertAllClose(
+          retained_label_scores_, expected_retained_label_scores_)
+
+  def testRunRetainBoxesAboveThresholdWithMasks(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    masks = self.createTestMasks()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores,
+        fields.InputDataFields.groundtruth_instance_masks: masks
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    retained_masks = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_instance_masks]
+
+    with self.test_session() as sess:
+      (retained_masks_, expected_masks_) = sess.run(
+          [retained_masks,
+           self.expectedMasksAfterThresholding()])
+      self.assertAllClose(retained_masks_, expected_masks_)
+
+  def testRunRetainBoxesAboveThresholdWithKeypoints(self):
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    label_scores = self.createTestLabelScores()
+    keypoints = self.createTestKeypoints()
+
+    tensor_dict = {
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_label_scores: label_scores,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [
+        (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+    ]
+
+    retained_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    retained_keypoints = retained_tensor_dict[
+        fields.InputDataFields.groundtruth_keypoints]
+
+    with self.test_session() as sess:
+      (retained_keypoints_, expected_keypoints_) = sess.run(
+          [retained_keypoints,
+           self.expectedKeypointsAfterThresholding()])
+      self.assertAllClose(retained_keypoints_, expected_keypoints_)
+
+  def testRunRandomCropToAspectRatioWithMasks(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True)
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+    with mock.patch.object(preprocessor,
+                           '_random_integer') as mock_random_integer:
+      mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_masks = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_instance_masks]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_masks_) = sess.run([
+             distorted_image, distorted_boxes, distorted_labels, distorted_masks
+         ])
+
+        expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+        self.assertAllEqual(distorted_labels_, [1])
+        self.assertAllClose(distorted_boxes_.flatten(),
+                            expected_boxes.flatten())
+        self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
+
+  def testRunRandomCropToAspectRatioWithKeypoints(self):
+    image = self.createColorfulTestImage()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    keypoints = self.createTestKeypoints()
+
+    tensor_dict = {
+        fields.InputDataFields.image: image,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_keypoints: keypoints
+    }
+
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_keypoints=True)
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+    with mock.patch.object(preprocessor,
+                           '_random_integer') as mock_random_integer:
+      mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+      distorted_tensor_dict = preprocessor.preprocess(
+          tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+      distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+      distorted_boxes = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_boxes]
+      distorted_labels = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_classes]
+      distorted_keypoints = distorted_tensor_dict[
+          fields.InputDataFields.groundtruth_keypoints]
+      with self.test_session() as sess:
+        (distorted_image_, distorted_boxes_, distorted_labels_,
+         distorted_keypoints_) = sess.run([
+             distorted_image, distorted_boxes, distorted_labels,
+             distorted_keypoints
+         ])
+
+        expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+        expected_keypoints = np.array(
+            [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
+        self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+        self.assertAllEqual(distorted_labels_, [1])
+        self.assertAllClose(distorted_boxes_.flatten(),
+                            expected_boxes.flatten())
+        self.assertAllClose(distorted_keypoints_.flatten(),
+                            expected_keypoints.flatten())
+
+  def testRandomPadImage(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_pad_image, {})]
+    padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                 preprocessing_options)
+
+    padded_images = padded_tensor_dict[fields.InputDataFields.image]
+    padded_boxes = padded_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    padded_boxes_shape = tf.shape(padded_boxes)
+    images_shape = tf.shape(images)
+    padded_images_shape = tf.shape(padded_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, padded_boxes_shape_, images_shape_,
+       padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+           [boxes_shape, padded_boxes_shape, images_shape,
+            padded_images_shape, boxes, padded_boxes])
+      self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+      self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+      self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+      self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+      self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+      self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+          padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+      self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+  def testRandomCropPadImageWithRandomCoefOne(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_pad_image, {
+        'random_coef': 1.0
+    })]
+    padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                 preprocessing_options)
+
+    padded_images = padded_tensor_dict[fields.InputDataFields.image]
+    padded_boxes = padded_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    padded_boxes_shape = tf.shape(padded_boxes)
+    images_shape = tf.shape(images)
+    padded_images_shape = tf.shape(padded_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, padded_boxes_shape_, images_shape_,
+       padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+           [boxes_shape, padded_boxes_shape, images_shape,
+            padded_images_shape, boxes, padded_boxes])
+      self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+      self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+      self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+      self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+      self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+      self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+          padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+      self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+          padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+  def testRandomCropToAspectRatio(self):
+    preprocessing_options = [(preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    })]
+
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+    images = tensor_dict[fields.InputDataFields.image]
+
+    preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
+        'aspect_ratio': 2.0
+    })]
+    cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+
+    cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
+    cropped_boxes = cropped_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+    boxes_shape = tf.shape(boxes)
+    cropped_boxes_shape = tf.shape(cropped_boxes)
+    images_shape = tf.shape(images)
+    cropped_images_shape = tf.shape(cropped_images)
+
+    with self.test_session() as sess:
+      (boxes_shape_, cropped_boxes_shape_, images_shape_,
+       cropped_images_shape_) = sess.run([
+           boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
+       ])
+      self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
+      self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
+      self.assertEqual(images_shape_[2], cropped_images_shape_[2])
+
+  def testRandomBlackPatches(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_black_patches, {
+        'size_to_image_ratio': 0.5
+    }))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+    blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
+    images_shape = tf.shape(images)
+    blacked_images_shape = tf.shape(blacked_images)
+
+    with self.test_session() as sess:
+      (images_shape_, blacked_images_shape_) = sess.run(
+          [images_shape, blacked_images_shape])
+      self.assertAllEqual(images_shape_, blacked_images_shape_)
+
+  def testRandomResizeMethod(self):
+    preprocessing_options = []
+    preprocessing_options.append((preprocessor.normalize_image, {
+        'original_minval': 0,
+        'original_maxval': 255,
+        'target_minval': 0,
+        'target_maxval': 1
+    }))
+    preprocessing_options.append((preprocessor.random_resize_method, {
+        'target_size': (75, 150)
+    }))
+    images = self.createTestImages()
+    tensor_dict = {fields.InputDataFields.image: images}
+    resized_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                  preprocessing_options)
+    resized_images = resized_tensor_dict[fields.InputDataFields.image]
+    resized_images_shape = tf.shape(resized_images)
+    expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
+
+    with self.test_session() as sess:
+      (expected_images_shape_, resized_images_shape_) = sess.run(
+          [expected_images_shape, resized_images_shape])
+      self.assertAllEqual(expected_images_shape_,
+                          resized_images_shape_)
+
+  def testResizeToRange(self):
+    """Tests image resizing, checking output sizes."""
+    in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+    min_dim = 50
+    max_dim = 100
+    expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
+
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.random_uniform(in_shape)
+      out_image = preprocessor.resize_to_range(
+          in_image, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+
+      with self.test_session() as sess:
+        out_image_shape = sess.run(out_image_shape)
+        self.assertAllEqual(out_image_shape, expected_shape)
+
+  def testResizeToRangeWithMasks(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+    min_dim = 50
+    max_dim = 100
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_range(
+          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeToRangeWithNoInstanceMask(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    min_dim = 50
+    max_dim = 100
+    expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_to_range(
+          in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeImageWithMasks(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeImageWithNoInstanceMask(self):
+    """Tests image resizing, checking output sizes."""
+    in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+    in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+    height = 50
+    width = 100
+    expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+    expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
+
+    for (in_image_shape, expected_image_shape, in_masks_shape,
+         expected_mask_shape) in zip(in_image_shape_list,
+                                     expected_image_shape_list,
+                                     in_masks_shape_list,
+                                     expected_masks_shape_list):
+      in_image = tf.random_uniform(in_image_shape)
+      in_masks = tf.random_uniform(in_masks_shape)
+      out_image, out_masks = preprocessor.resize_image(
+          in_image, in_masks, new_height=height, new_width=width)
+      out_image_shape = tf.shape(out_image)
+      out_masks_shape = tf.shape(out_masks)
+
+      with self.test_session() as sess:
+        out_image_shape, out_masks_shape = sess.run(
+            [out_image_shape, out_masks_shape])
+        self.assertAllEqual(out_image_shape, expected_image_shape)
+        self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+  def testResizeToRange4DImageTensor(self):
+    image = tf.random_uniform([1, 200, 300, 3])
+    with self.assertRaises(ValueError):
+      preprocessor.resize_to_range(image, 500, 600)
+
+  def testResizeToRangeSameMinMax(self):
+    """Tests image resizing, checking output sizes."""
+    in_shape_list = [[312, 312, 3], [299, 299, 3]]
+    min_dim = 320
+    max_dim = 320
+    expected_shape_list = [[320, 320, 3], [320, 320, 3]]
+
+    for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+      in_image = tf.random_uniform(in_shape)
+      out_image = preprocessor.resize_to_range(
+          in_image, min_dimension=min_dim, max_dimension=max_dim)
+      out_image_shape = tf.shape(out_image)
+
+      with self.test_session() as sess:
+        out_image_shape = sess.run(out_image_shape)
+        self.assertAllEqual(out_image_shape, expected_shape)
+
+  def testScaleBoxesToPixelCoordinates(self):
+    """Tests box scaling, checking scaled values."""
+    in_shape = [60, 40, 3]
+    in_boxes = [[0.1, 0.2, 0.4, 0.6],
+                [0.5, 0.3, 0.9, 0.7]]
+
+    expected_boxes = [[6., 8., 24., 24.],
+                      [30., 12., 54., 28.]]
+
+    in_image = tf.random_uniform(in_shape)
+    in_boxes = tf.constant(in_boxes)
+    _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
+        in_image, boxes=in_boxes)
+    with self.test_session() as sess:
+      out_boxes = sess.run(out_boxes)
+      self.assertAllClose(out_boxes, expected_boxes)
+
+  def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
+    """Tests box and keypoint scaling, checking scaled values."""
+    in_shape = [60, 40, 3]
+    in_boxes = self.createTestBoxes()
+    in_keypoints = self.createTestKeypoints()
+
+    expected_boxes = [[0., 10., 45., 40.],
+                      [15., 20., 45., 40.]]
+    expected_keypoints = [
+        [[6., 4.], [12., 8.], [18., 12.]],
+        [[24., 16.], [30., 20.], [36., 24.]],
+    ]
+
+    in_image = tf.random_uniform(in_shape)
+    _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
+        in_image, boxes=in_boxes, keypoints=in_keypoints)
+    with self.test_session() as sess:
+      out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
+      self.assertAllClose(out_boxes_, expected_boxes)
+      self.assertAllClose(out_keypoints_, expected_keypoints)
+
+  def testSubtractChannelMean(self):
+    """Tests whether channel means have been subtracted."""
+    with self.test_session():
+      image = tf.zeros((240, 320, 3))
+      means = [1, 2, 3]
+      actual = preprocessor.subtract_channel_mean(image, means=means)
+      actual = actual.eval()
+
+      self.assertTrue((actual[:, :, 0] == -1).all())
+      self.assertTrue((actual[:, :, 1] == -2).all())
+      self.assertTrue((actual[:, :, 2] == -3).all())
+
+  def testOneHotEncoding(self):
+    """Tests one hot encoding of multiclass labels."""
+    with self.test_session():
+      labels = tf.constant([1, 4, 2], dtype=tf.int32)
+      one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
+      one_hot = one_hot.eval()
+
+      self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
+
+  def testSSDRandomCrop(self):
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop, {})]
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropPad(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_pad, {})]
+    tensor_dict = {fields.InputDataFields.image: images,
+                   fields.InputDataFields.groundtruth_boxes: boxes,
+                   fields.InputDataFields.groundtruth_classes: labels}
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run([
+           boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+       ])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropFixedAspectRatio(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels
+    }
+    distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+                                                    preprocessing_options)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+  def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
+    images = self.createTestImages()
+    boxes = self.createTestBoxes()
+    labels = self.createTestLabels()
+    masks = self.createTestMasks()
+    keypoints = self.createTestKeypoints()
+    preprocessing_options = [
+        (preprocessor.normalize_image, {
+            'original_minval': 0,
+            'original_maxval': 255,
+            'target_minval': 0,
+            'target_maxval': 1
+        }),
+        (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+    tensor_dict = {
+        fields.InputDataFields.image: images,
+        fields.InputDataFields.groundtruth_boxes: boxes,
+        fields.InputDataFields.groundtruth_classes: labels,
+        fields.InputDataFields.groundtruth_instance_masks: masks,
+        fields.InputDataFields.groundtruth_keypoints: keypoints,
+    }
+    preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+        include_instance_masks=True, include_keypoints=True)
+    distorted_tensor_dict = preprocessor.preprocess(
+        tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+    distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+    distorted_boxes = distorted_tensor_dict[
+        fields.InputDataFields.groundtruth_boxes]
+
+    images_rank = tf.rank(images)
+    distorted_images_rank = tf.rank(distorted_images)
+    boxes_rank = tf.rank(boxes)
+    distorted_boxes_rank = tf.rank(distorted_boxes)
+
+    with self.test_session() as sess:
+      (boxes_rank_, distorted_boxes_rank_, images_rank_,
+       distorted_images_rank_) = sess.run(
+           [boxes_rank, distorted_boxes_rank, images_rank,
+            distorted_images_rank])
+      self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+      self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/region_similarity_calculator.py
+++ b/object_detection/core/region_similarity_calculator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list_ops
+
+
+class RegionSimilarityCalculator(object):
+  """Abstract base class for region similarity calculator."""
+  __metaclass__ = ABCMeta
+
+  def compare(self, boxlist1, boxlist2, scope=None):
+    """Computes matrix of pairwise similarity between BoxLists.
+
+    This op (to be overriden) computes a measure of pairwise similarity between
+    the boxes in the given BoxLists. Higher values indicate more similarity.
+
+    Note that this method simply measures similarity and does not explicitly
+    perform a matching.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+      scope: Op scope name. Defaults to 'Compare' if None.
+
+    Returns:
+      a (float32) tensor of shape [N, M] with pairwise similarity score.
+    """
+    with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
+      return self._compare(boxlist1, boxlist2)
+
+  @abstractmethod
+  def _compare(self, boxlist1, boxlist2):
+    pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+  This class computes pairwise similarity between two BoxLists based on IOU.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    return box_list_ops.iou(boxlist1, boxlist2)
+
+
+class NegSqDistSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on the squared distance metric.
+
+  This class computes pairwise similarity between two BoxLists based on the
+  negative squared distance metric.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute matrix of (negated) sq distances.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing negated pairwise squared distance.
+    """
+    return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
+
+
+class IoaSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Area (IOA) metric.
+
+  This class computes pairwise similarity between two BoxLists based on their
+  pairwise intersections divided by the areas of second BoxLists.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOA similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise IOA scores.
+    """
+    return box_list_ops.ioa(boxlist1, boxlist2)
--- a/object_detection/core/region_similarity_calculator_test.py
+++ b/object_detection/core/region_similarity_calculator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for region_similarity_calculator."""
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+
+
+class RegionSimilarityCalculatorTest(tf.test.TestCase):
+
+  def test_get_correct_pairwise_similarity_based_on_iou(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
+    iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
+    with self.test_session() as sess:
+      iou_output = sess.run(iou_similarity)
+      self.assertAllClose(iou_output, exp_output)
+
+  def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
+    corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
+                            [1.0, 1.0, 0.0, 2.0]])
+    corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
+                            [-4.0, 0.0, 0.0, 3.0],
+                            [0.0, 0.0, 0.0, 0.0]])
+    exp_output = [[-26, -25, 0], [-18, -27, -6]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
+    with self.test_session() as sess:
+      dist_output = sess.run(dist_similarity)
+      self.assertAllClose(dist_output, exp_output)
+
+  def test_get_correct_pairwise_similarity_based_on_ioa(self):
+    corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+    corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+                            [0.0, 0.0, 20.0, 20.0]])
+    exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
+                    [1.0 / 12.0, 0.0, 5.0 / 400.0]]
+    exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
+                    [0, 0],
+                    [6.0 / 6.0, 5.0 / 5.0]]
+    boxes1 = box_list.BoxList(corners1)
+    boxes2 = box_list.BoxList(corners2)
+    ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
+    ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
+    ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
+    with self.test_session() as sess:
+      iou_output_1, iou_output_2 = sess.run(
+          [ioa_similarity_1, ioa_similarity_2])
+      self.assertAllClose(iou_output_1, exp_output_1)
+      self.assertAllClose(iou_output_2, exp_output_2)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/core/standard_fields.py
+++ b/object_detection/core/standard_fields.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Contains classes specifying naming conventions used for object detection.
+
+
+Specifies:
+  InputDataFields: standard fields used by reader/preprocessor/batcher.
+  BoxListFields: standard field used by BoxList
+  TfExampleFields: standard fields for tf-example data format (go/tf-example).
+"""
+
+
+class InputDataFields(object):
+  """Names for the input tensors.
+
+  Holds the standard data field names to use for identifying input tensors. This
+  should be used by the decoder to identify keys for the returned tensor_dict
+  containing input tensors. And it should be used by the model to identify the
+  tensors it needs.
+
+  Attributes:
+    image: image.
+    original_image: image in the original input size.
+    key: unique key corresponding to image.
+    source_id: source of the original image.
+    filename: original filename of the dataset (without common path).
+    groundtruth_image_classes: image-level class labels.
+    groundtruth_boxes: coordinates of the ground truth boxes in the image.
+    groundtruth_classes: box-level class labels.
+    groundtruth_label_types: box-level label types (e.g. explicit negative).
+    groundtruth_is_crowd: is the groundtruth a single object or a crowd.
+    groundtruth_area: area of a groundtruth segment.
+    groundtruth_difficult: is a `difficult` object
+    proposal_boxes: coordinates of object proposal boxes.
+    proposal_objectness: objectness score of each proposal.
+    groundtruth_instance_masks: ground truth instance masks.
+    groundtruth_instance_classes: instance mask-level class labels.
+    groundtruth_keypoints: ground truth keypoints.
+    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
+    groundtruth_label_scores: groundtruth label scores.
+  """
+  image = 'image'
+  original_image = 'original_image'
+  key = 'key'
+  source_id = 'source_id'
+  filename = 'filename'
+  groundtruth_image_classes = 'groundtruth_image_classes'
+  groundtruth_boxes = 'groundtruth_boxes'
+  groundtruth_classes = 'groundtruth_classes'
+  groundtruth_label_types = 'groundtruth_label_types'
+  groundtruth_is_crowd = 'groundtruth_is_crowd'
+  groundtruth_area = 'groundtruth_area'
+  groundtruth_difficult = 'groundtruth_difficult'
+  proposal_boxes = 'proposal_boxes'
+  proposal_objectness = 'proposal_objectness'
+  groundtruth_instance_masks = 'groundtruth_instance_masks'
+  groundtruth_instance_classes = 'groundtruth_instance_classes'
+  groundtruth_keypoints = 'groundtruth_keypoints'
+  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
+  groundtruth_label_scores = 'groundtruth_label_scores'
+
+
+class BoxListFields(object):
+  """Naming conventions for BoxLists.
+
+  Attributes:
+    boxes: bounding box coordinates.
+    classes: classes per bounding box.
+    scores: scores per bounding box.
+    weights: sample weights per bounding box.
+    objectness: objectness score per bounding box.
+    masks: masks per bounding box.
+    keypoints: keypoints per bounding box.
+    keypoint_heatmaps: keypoint heatmaps per bounding box.
+  """
+  boxes = 'boxes'
+  classes = 'classes'
+  scores = 'scores'
+  weights = 'weights'
+  objectness = 'objectness'
+  masks = 'masks'
+  keypoints = 'keypoints'
+  keypoint_heatmaps = 'keypoint_heatmaps'
+
+
+class TfExampleFields(object):
+  """TF-example proto feature names for object detection.
+
+  Holds the standard feature names to load from an Example proto for object
+  detection.
+
+  Attributes:
+    image_encoded: JPEG encoded string
+    image_format: image format, e.g. "JPEG"
+    filename: filename
+    channels: number of channels of image
+    colorspace: colorspace, e.g. "RGB"
+    height: height of image in pixels, e.g. 462
+    width: width of image in pixels, e.g. 581
+    source_id: original source of the image
+    object_class_text: labels in text format, e.g. ["person", "cat"]
+    object_class_text: labels in numbers, e.g. [16, 8]
+    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
+    object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
+    object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
+    object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
+    object_view: viewpoint of object, e.g. ["frontal", "left"]
+    object_truncated: is object truncated, e.g. [true, false]
+    object_occluded: is object occluded, e.g. [true, false]
+    object_difficult: is object difficult, e.g. [true, false]
+    object_is_crowd: is the object a single object or a crowd
+    object_segment_area: the area of the segment.
+    instance_masks: instance segmentation masks.
+    instance_classes: Classes for each instance segmentation mask.
+  """
+  image_encoded = 'image/encoded'
+  image_format = 'image/format'  # format is reserved keyword
+  filename = 'image/filename'
+  channels = 'image/channels'
+  colorspace = 'image/colorspace'
+  height = 'image/height'
+  width = 'image/width'
+  source_id = 'image/source_id'
+  object_class_text = 'image/object/class/text'
+  object_class_label = 'image/object/class/label'
+  object_bbox_ymin = 'image/object/bbox/ymin'
+  object_bbox_xmin = 'image/object/bbox/xmin'
+  object_bbox_ymax = 'image/object/bbox/ymax'
+  object_bbox_xmax = 'image/object/bbox/xmax'
+  object_view = 'image/object/view'
+  object_truncated = 'image/object/truncated'
+  object_occluded = 'image/object/occluded'
+  object_difficult = 'image/object/difficult'
+  object_is_crowd = 'image/object/is_crowd'
+  object_segment_area = 'image/object/segment/area'
+  instance_masks = 'image/segmentation/object'
+  instance_classes = 'image/segmentation/object/class'
--- a/object_detection/core/target_assigner.py
+++ b/object_detection/core/target_assigner.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_coder as bcoder
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import matcher as mat
+from object_detection.core import region_similarity_calculator as sim_calc
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+
+
+class TargetAssigner(object):
+  """Target assigner to compute classification and regression targets."""
+
+  def __init__(self, similarity_calc, matcher, box_coder,
+               positive_class_weight=1.0, negative_class_weight=1.0,
+               unmatched_cls_target=None):
+    """Construct Multibox Target Assigner.
+
+    Args:
+      similarity_calc: a RegionSimilarityCalculator
+      matcher: an object_detection.core.Matcher used to match groundtruth to
+        anchors.
+      box_coder: an object_detection.core.BoxCoder used to encode matching
+        groundtruth boxes with respect to anchors.
+      positive_class_weight: classification weight to be associated to positive
+        anchors (default: 1.0)
+      negative_class_weight: classification weight to be associated to negative
+        anchors (default: 1.0)
+      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+        which is consistent with the classification target for each
+        anchor (and can be empty for scalar targets).  This shape must thus be
+        compatible with the groundtruth labels that are passed to the "assign"
+        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+        If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+    Raises:
+      ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+        if matcher is not a Matcher or if box_coder is not a BoxCoder
+    """
+    if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
+      raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
+    if not isinstance(matcher, mat.Matcher):
+      raise ValueError('matcher must be a Matcher')
+    if not isinstance(box_coder, bcoder.BoxCoder):
+      raise ValueError('box_coder must be a BoxCoder')
+    self._similarity_calc = similarity_calc
+    self._matcher = matcher
+    self._box_coder = box_coder
+    self._positive_class_weight = positive_class_weight
+    self._negative_class_weight = negative_class_weight
+    if unmatched_cls_target is None:
+      self._unmatched_cls_target = tf.constant([0], tf.float32)
+    else:
+      self._unmatched_cls_target = unmatched_cls_target
+
+  @property
+  def box_coder(self):
+    return self._box_coder
+
+  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
+             **params):
+    """Assign classification and regression targets to each anchor.
+
+    For a given set of anchors and groundtruth detections, match anchors
+    to groundtruth_boxes and assign classification and regression targets to
+    each anchor as well as weights based on the resulting match (specifying,
+    e.g., which anchors should not contribute to training loss).
+
+    Anchors that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth boxes
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+        to None, groundtruth_labels assumes a binary problem where all
+        ground_truth boxes get a positive label (of 1).
+      **params: Additional keyword arguments for specific implementations of
+              the Matcher.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      cls_weights: a float32 tensor with shape [num_anchors]
+      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_anchors]
+      match: a matcher.Match object encoding the match between anchors and
+        groundtruth boxes, with rows corresponding to groundtruth boxes
+        and columns corresponding to anchors.
+
+    Raises:
+      ValueError: if anchors or groundtruth_boxes are not of type
+        box_list.BoxList
+    """
+    if not isinstance(anchors, box_list.BoxList):
+      raise ValueError('anchors must be an BoxList')
+    if not isinstance(groundtruth_boxes, box_list.BoxList):
+      raise ValueError('groundtruth_boxes must be an BoxList')
+
+    if groundtruth_labels is None:
+      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+                                                  0))
+      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+    shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
+                                   tf.shape(self._unmatched_cls_target))
+
+    with tf.control_dependencies([shape_assert]):
+      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
+                                                           anchors)
+      match = self._matcher.match(match_quality_matrix, **params)
+      reg_targets = self._create_regression_targets(anchors,
+                                                    groundtruth_boxes,
+                                                    match)
+      cls_targets = self._create_classification_targets(groundtruth_labels,
+                                                        match)
+      reg_weights = self._create_regression_weights(match)
+      cls_weights = self._create_classification_weights(
+          match, self._positive_class_weight, self._negative_class_weight)
+
+      num_anchors = anchors.num_boxes_static()
+      if num_anchors is not None:
+        reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+        cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+        reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+        cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+    return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+  def _reset_target_shape(self, target, num_anchors):
+    """Sets the static shape of the target.
+
+    Args:
+      target: the target tensor. Its first dimension will be overwritten.
+      num_anchors: the number of anchors, which is used to override the target's
+        first dimension.
+
+    Returns:
+      A tensor with the shape info filled in.
+    """
+    target_shape = target.get_shape().as_list()
+    target_shape[0] = num_anchors
+    target.set_shape(target_shape)
+    return target
+
+  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+    """Returns a regression target for each anchor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth_boxes
+      match: a matcher.Match object
+
+    Returns:
+      reg_targets: a float32 tensor with shape [N, box_code_dimension]
+    """
+    matched_anchor_indices = match.matched_column_indices()
+    unmatched_ignored_anchor_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    matched_gt_indices = match.matched_row_indices()
+    matched_anchors = box_list_ops.gather(anchors,
+                                          matched_anchor_indices)
+    matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
+                                           matched_gt_indices)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
+                                                 matched_anchors)
+    unmatched_ignored_reg_targets = tf.tile(
+        self._default_regression_target(),
+        tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
+    reg_targets = tf.dynamic_stitch(
+        [matched_anchor_indices, unmatched_ignored_anchor_indices],
+        [matched_reg_targets, unmatched_ignored_reg_targets])
+    # TODO: summarize the number of matches on average.
+    return reg_targets
+
+  def _default_regression_target(self):
+    """Returns the default target for anchors to regress to.
+
+    Default regression targets are set to zero (though in
+    this implementation what these targets are set to should
+    not matter as the regression weight of any box set to
+    regress to the default target is zero).
+
+    Returns:
+      default_target: a float32 tensor with shape [1, box_code_dimension]
+    """
+    return tf.constant([self._box_coder.code_size*[0]], tf.float32)
+
+  def _create_classification_targets(self, groundtruth_labels, match):
+    """Create classification targets for each anchor.
+
+    Assign a classification target of for each anchor to the matching
+    groundtruth label that is provided by match.  Anchors that are not matched
+    to anything are given the target self._unmatched_cls_target
+
+    Args:
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar labels).
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+    """
+    matched_anchor_indices = match.matched_column_indices()
+    unmatched_ignored_anchor_indices = (match.
+                                        unmatched_or_ignored_column_indices())
+    matched_gt_indices = match.matched_row_indices()
+    matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
+
+    ones = self._unmatched_cls_target.shape.ndims * [1]
+    unmatched_ignored_cls_targets = tf.tile(
+        tf.expand_dims(self._unmatched_cls_target, 0),
+        tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
+
+    cls_targets = tf.dynamic_stitch(
+        [matched_anchor_indices, unmatched_ignored_anchor_indices],
+        [matched_cls_targets, unmatched_ignored_cls_targets])
+    return cls_targets
+
+  def _create_regression_weights(self, match):
+    """Set regression weight for each anchor.
+
+    Only positive anchors are set to contribute to the regression loss, so this
+    method returns a weight of 1 for every positive anchor and 0 for every
+    negative anchor.
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      reg_weights: a float32 tensor with shape [num_anchors] representing
+        regression weights
+    """
+    reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
+    return reg_weights
+
+  def _create_classification_weights(self,
+                                     match,
+                                     positive_class_weight=1.0,
+                                     negative_class_weight=1.0):
+    """Create classification weights for each anchor.
+
+    Positive (matched) anchors are associated with a weight of
+    positive_class_weight and negative (unmatched) anchors are associated with
+    a weight of negative_class_weight. When anchors are ignored, weights are set
+    to zero. By default, both positive/negative weights are set to 1.0,
+    but they can be adjusted to handle class imbalance (which is almost always
+    the case in object detection).
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      positive_class_weight: weight to be associated to positive anchors
+      negative_class_weight: weight to be associated to negative anchors
+
+    Returns:
+      cls_weights: a float32 tensor with shape [num_anchors] representing
+        classification weights.
+    """
+    matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
+    ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
+    unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
+    cls_weights = (positive_class_weight * matched_indicator
+                   + negative_class_weight * unmatched_indicator)
+    return cls_weights
+
+  def get_box_coder(self):
+    """Get BoxCoder of this TargetAssigner.
+
+    Returns:
+      BoxCoder: BoxCoder object.
+    """
+    return self._box_coder
+
+
+# TODO: This method pulls in all the implementation dependencies into core.
+# Therefore its best to have this factory method outside of core.
+def create_target_assigner(reference, stage=None,
+                           positive_class_weight=1.0,
+                           negative_class_weight=1.0,
+                           unmatched_cls_target=None):
+  """Factory function for creating standard target assigners.
+
+  Args:
+    reference: string referencing the type of TargetAssigner.
+    stage: string denoting stage: {proposal, detection}.
+    positive_class_weight: classification weight to be associated to positive
+      anchors (default: 1.0)
+    negative_class_weight: classification weight to be associated to negative
+      anchors (default: 1.0)
+    unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+      which is consistent with the classification target for each
+      anchor (and can be empty for scalar targets).  This shape must thus be
+      compatible with the groundtruth labels that are passed to the Assign
+      function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+      If set to None, unmatched_cls_target is set to be 0 for each anchor.
+
+  Returns:
+    TargetAssigner: desired target assigner.
+
+  Raises:
+    ValueError: if combination reference+stage is invalid.
+  """
+  if reference == 'Multibox' and stage == 'proposal':
+    similarity_calc = sim_calc.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+
+  elif reference == 'FasterRCNN' and stage == 'proposal':
+    similarity_calc = sim_calc.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
+                                           unmatched_threshold=0.3,
+                                           force_match_for_each_row=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+  elif reference == 'FasterRCNN' and stage == 'detection':
+    similarity_calc = sim_calc.IouSimilarity()
+    # Uses all proposals with IOU < 0.5 as candidate negatives.
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           negatives_lower_than_unmatched=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+  elif reference == 'FastRCNN':
+    similarity_calc = sim_calc.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.1,
+                                           force_match_for_each_row=False,
+                                           negatives_lower_than_unmatched=False)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+
+  else:
+    raise ValueError('No valid combination of reference and stage.')
+
+  return TargetAssigner(similarity_calc, matcher, box_coder,
+                        positive_class_weight=positive_class_weight,
+                        negative_class_weight=negative_class_weight,
+                        unmatched_cls_target=unmatched_cls_target)
+
+
+def batch_assign_targets(target_assigner,
+                         anchors_batch,
+                         gt_box_batch,
+                         gt_class_targets_batch):
+  """Batched assignment of classification and regression targets.
+
+  Args:
+    target_assigner: a target assigner.
+    anchors_batch: BoxList representing N box anchors or list of BoxList objects
+      with length batch_size representing anchor sets.
+    gt_box_batch: a list of BoxList objects with length batch_size
+      representing groundtruth boxes for each image in the batch
+    gt_class_targets_batch: a list of tensors with length batch_size, where
+      each tensor has shape [num_gt_boxes_i, classification_target_size] and
+      num_gt_boxes_i is the number of boxes in the ith boxlist of
+      gt_box_batch.
+
+  Returns:
+    batch_cls_targets: a tensor with shape [batch_size, num_anchors,
+      num_classes],
+    batch_cls_weights: a tensor with shape [batch_size, num_anchors],
+    batch_reg_targets: a tensor with shape [batch_size, num_anchors,
+      box_code_dimension]
+    batch_reg_weights: a tensor with shape [batch_size, num_anchors],
+    match_list: a list of matcher.Match objects encoding the match between
+      anchors and groundtruth boxes for each image of the batch,
+      with rows of the Match objects corresponding to groundtruth boxes
+      and columns corresponding to anchors.
+  Raises:
+    ValueError: if input list lengths are inconsistent, i.e.,
+      batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
+        and batch_size == len(anchors_batch) unless anchors_batch is a single
+        BoxList.
+  """
+  if not isinstance(anchors_batch, list):
+    anchors_batch = len(gt_box_batch) * [anchors_batch]
+  if not all(
+      isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
+    raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
+  if not (len(anchors_batch)
+          == len(gt_box_batch)
+          == len(gt_class_targets_batch)):
+    raise ValueError('batch size incompatible with lengths of anchors_batch, '
+                     'gt_box_batch and gt_class_targets_batch.')
+  cls_targets_list = []
+  cls_weights_list = []
+  reg_targets_list = []
+  reg_weights_list = []
+  match_list = []
+  for anchors, gt_boxes, gt_class_targets in zip(
+      anchors_batch, gt_box_batch, gt_class_targets_batch):
+    (cls_targets, cls_weights, reg_targets,
+     reg_weights, match) = target_assigner.assign(
+         anchors, gt_boxes, gt_class_targets)
+    cls_targets_list.append(cls_targets)
+    cls_weights_list.append(cls_weights)
+    reg_targets_list.append(reg_targets)
+    reg_weights_list.append(reg_weights)
+    match_list.append(match)
+  batch_cls_targets = tf.stack(cls_targets_list)
+  batch_cls_weights = tf.stack(cls_weights_list)
+  batch_reg_targets = tf.stack(reg_targets_list)
+  batch_reg_weights = tf.stack(reg_weights_list)
+  return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
+          batch_reg_weights, match_list)
--- a/object_detection/core/target_assigner_test.py
+++ b/object_detection/core/target_assigner_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.target_assigner."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+from object_detection.core import target_assigner as targetassigner
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+
+
+class TargetAssignerTest(tf.test.TestCase):
+
+  def test_assign_agnostic(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder, unmatched_cls_target=None)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0]])
+    prior_stddevs = tf.constant(3 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    exp_matching_anchors = [0, 1]
+
+    result = target_assigner.assign(priors, boxes, num_valid_rows=2)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_with_ignored_matches(self):
+    # Note: test is very similar to above. The third box matched with an IOU
+    # of 0.35, which is between the matched and unmatched threshold. This means
+    # That like above the expected classification targets are [1, 1, 0].
+    # Unlike above, the third target is ignored and therefore expected
+    # classification weights are [1, 1, 0].
+    similarity_calc = region_similarity_calculator.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+                                           unmatched_threshold=0.3)
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0.0, 0.5, .9, 1.0]])
+    prior_stddevs = tf.constant(3 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+    exp_cls_targets = [[1], [1], [0]]
+    exp_cls_weights = [1, 1, 0]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+    exp_matching_anchors = [0, 1]
+
+    result = target_assigner.assign(priors, boxes)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_multiclass(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+
+    exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
+                       [0, 0, 0, 0, 0, 1, 0],
+                       [1, 0, 0, 0, 0, 0, 0],
+                       [0, 0, 0, 1, 0, 0, 0]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0],
+                       [0, 0, -.5, .2]]
+    exp_reg_weights = [1, 1, 0, 1]
+    exp_matching_anchors = [0, 1, 3]
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_multiclass_unequal_class_weights(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0, negative_class_weight=0.5,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+
+    exp_cls_weights = [1, 1, .5, 1]
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (_, cls_weights, _, _, _) = result
+    with self.test_session() as sess:
+      cls_weights_out = sess.run(cls_weights)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+
+  def test_assign_multidimensional_class_targets(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
+                                      [[1, 0], [0, 1]],
+                                      [[0, 1], [1, .5]]], tf.float32)
+
+    exp_cls_targets = [[[0, 1], [1, 0]],
+                       [[1, 0], [0, 1]],
+                       [[0, 0], [0, 0]],
+                       [[0, 1], [1, .5]]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, -1, 1],
+                       [0, 0, 0, 0],
+                       [0, 0, -.5, .2]]
+    exp_reg_weights = [1, 1, 0, 1]
+    exp_matching_anchors = [0, 1, 3]
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_assign_empty_groundtruth(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
+    box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
+    boxes = box_list.BoxList(box_corners)
+
+    groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
+    groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
+
+    exp_cls_targets = [[0, 0, 0],
+                       [0, 0, 0],
+                       [0, 0, 0],
+                       [0, 0, 0]]
+    exp_cls_weights = [1, 1, 1, 1]
+    exp_reg_targets = [[0, 0, 0, 0],
+                       [0, 0, 0, 0],
+                       [0, 0, 0, 0],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [0, 0, 0, 0]
+    exp_matching_anchors = []
+
+    result = target_assigner.assign(priors, boxes, groundtruth_labels)
+    (cls_targets, cls_weights, reg_targets, reg_weights, match) = result
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out,
+       reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
+           [cls_targets, cls_weights, reg_targets, reg_weights,
+            match.matched_column_indices()])
+
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(matching_anchors_out, exp_matching_anchors)
+      self.assertEquals(cls_targets_out.dtype, np.float32)
+      self.assertEquals(cls_weights_out.dtype, np.float32)
+      self.assertEquals(reg_targets_out.dtype, np.float32)
+      self.assertEquals(reg_weights_out.dtype, np.float32)
+      self.assertEquals(matching_anchors_out.dtype, np.int32)
+
+  def test_raises_error_on_invalid_groundtruth_labels(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
+    prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
+
+    with self.assertRaises(ValueError):
+      target_assigner.assign(priors, boxes, groundtruth_labels,
+                             num_valid_rows=3)
+
+
+class BatchTargetAssignerTest(tf.test.TestCase):
+
+  def _get_agnostic_target_assigner(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=None)
+
+  def _get_multi_class_target_assigner(self, num_classes):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=unmatched_cls_target)
+
+  def _get_multi_dimensional_target_assigner(self, target_dimensions):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
+                                       tf.float32)
+    return targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        positive_class_weight=1.0,
+        negative_class_weight=1.0,
+        unmatched_cls_target=unmatched_cls_target)
+
+  def test_batch_assign_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+    gt_class_targets = [None, None]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0,],
+                        [0, 0, 0, 0,],],
+                       [[0, 0, 0, 0,],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+    exp_cls_targets = [[[1], [0], [0], [0]],
+                       [[0], [1], [1], [0]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    agnostic_target_assigner = self._get_agnostic_target_assigner()
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_multiclass_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+
+    class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
+    class_targets2 = tf.constant([[0, 0, 0, 1],
+                                  [0, 0, 1, 0]], tf.float32)
+
+    gt_class_targets = [class_targets1, class_targets2]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 0, 0, 0],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+    exp_cls_targets = [[[0, 1, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0],
+                        [1, 0, 0, 0]],
+                       [[1, 0, 0, 0],
+                        [0, 0, 0, 1],
+                        [0, 0, 1, 0],
+                        [1, 0, 0, 0]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    multiclass_target_assigner = self._get_multi_class_target_assigner(
+        num_classes=3)
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_multidimensional_targets(self):
+    box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
+
+    box_list2 = box_list.BoxList(tf.constant(
+        [[0, 0.25123152, 1, 1],
+         [0.015789, 0.0985, 0.55789, 0.3842]]
+    ))
+
+    gt_box_batch = [box_list1, box_list2]
+    class_targets1 = tf.constant([[[0, 1, 1],
+                                   [1, 1, 0]]], tf.float32)
+    class_targets2 = tf.constant([[[0, 1, 1],
+                                   [1, 1, 0]],
+                                  [[0, 0, 1],
+                                   [0, 0, 1]]], tf.float32)
+
+    gt_class_targets = [class_targets1, class_targets2]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1],
+                               [0, .1, .5, .5],
+                               [.75, .75, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, -0.5, -0.5],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 0, 0, 0],
+                        [0, 0.01231521, 0, 0],
+                        [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1, 1, 1],
+                       [1, 1, 1, 1]]
+
+    exp_cls_targets = [[[[0., 1., 1.],
+                         [1., 1., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]]],
+                       [[[0., 0., 0.],
+                         [0., 0., 0.]],
+                        [[0., 1., 1.],
+                         [1., 1., 0.]],
+                        [[0., 0., 1.],
+                         [0., 0., 1.]],
+                        [[0., 0., 0.],
+                         [0., 0., 0.]]]]
+    exp_reg_weights = [[1, 0, 0, 0],
+                       [0, 1, 1, 0]]
+    exp_match_0 = [0]
+    exp_match_1 = [1, 2]
+
+    multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
+        target_dimensions=(2, 3))
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0, match_out_1) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+      self.assertAllClose(match_out_1, exp_match_1)
+
+  def test_batch_assign_empty_groundtruth(self):
+    box_coords_expanded = tf.zeros((1, 4), tf.float32)
+    box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
+    box_list1 = box_list.BoxList(box_coords)
+    gt_box_batch = [box_list1]
+
+    prior_means = tf.constant([[0, 0, .25, .25],
+                               [0, .25, 1, 1]])
+    prior_stddevs = tf.constant([[.1, .1, .1, .1],
+                                 [.1, .1, .1, .1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    exp_reg_targets = [[[0, 0, 0, 0],
+                        [0, 0, 0, 0]]]
+    exp_cls_weights = [[1, 1]]
+    exp_cls_targets = [[[1, 0, 0, 0],
+                        [1, 0, 0, 0]]]
+    exp_reg_weights = [[0, 0]]
+    exp_match_0 = []
+
+    num_classes = 3
+    pad = 1
+    gt_class_targets = tf.zeros((0, num_classes + pad))
+    gt_class_targets_batch = [gt_class_targets]
+
+    multiclass_target_assigner = self._get_multi_class_target_assigner(
+        num_classes=3)
+
+    (cls_targets, cls_weights, reg_targets, reg_weights,
+     match_list) = targetassigner.batch_assign_targets(
+         multiclass_target_assigner, priors,
+         gt_box_batch, gt_class_targets_batch)
+    self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
+    with self.test_session() as sess:
+      (cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
+       match_out_0) = sess.run([
+           cls_targets, cls_weights, reg_targets, reg_weights] + [
+               match.matched_column_indices() for match in match_list])
+      self.assertAllClose(cls_targets_out, exp_cls_targets)
+      self.assertAllClose(cls_weights_out, exp_cls_weights)
+      self.assertAllClose(reg_targets_out, exp_reg_targets)
+      self.assertAllClose(reg_weights_out, exp_reg_weights)
+      self.assertAllClose(match_out_0, exp_match_0)
+
+
+class CreateTargetAssignerTest(tf.test.TestCase):
+
+  def test_create_target_assigner(self):
+    """Tests that named constructor gives working target assigners.
+
+    TODO: Make this test more general.
+    """
+    corners = [[0.0, 0.0, 1.0, 1.0]]
+    groundtruth = box_list.BoxList(tf.constant(corners))
+
+    priors = box_list.BoxList(tf.constant(corners))
+    prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
+    priors.add_field('stddev', prior_stddevs)
+    multibox_ta = (targetassigner
+                   .create_target_assigner('Multibox', stage='proposal'))
+    multibox_ta.assign(priors, groundtruth)
+    # No tests on output, as that may vary arbitrarily as new target assigners
+    # are added. As long as it is constructed correctly and runs without errors,
+    # tests on the individual assigners cover correctness of the assignments.
+
+    anchors = box_list.BoxList(tf.constant(corners))
+    faster_rcnn_proposals_ta = (targetassigner
+                                .create_target_assigner('FasterRCNN',
+                                                        stage='proposal'))
+    faster_rcnn_proposals_ta.assign(anchors, groundtruth)
+
+    fast_rcnn_ta = (targetassigner
+                    .create_target_assigner('FastRCNN'))
+    fast_rcnn_ta.assign(anchors, groundtruth)
+
+    faster_rcnn_detection_ta = (targetassigner
+                                .create_target_assigner('FasterRCNN',
+                                                        stage='detection'))
+    faster_rcnn_detection_ta.assign(anchors, groundtruth)
+
+    with self.assertRaises(ValueError):
+      targetassigner.create_target_assigner('InvalidDetector',
+                                            stage='invalid_stage')
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/create_pascal_tf_record.py
+++ b/object_detection/create_pascal_tf_record.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert raw PASCAL dataset to TFRecord for object_detection.
+
+Example usage:
+    ./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
+        --year=VOC2012 \
+        --output_path=/home/user/pascal.record
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import logging
+import os
+
+from lxml import etree
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
+flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
+                    'merged set.')
+flags.DEFINE_string('annotations_dir', 'Annotations',
+                    '(Relative) path to annotations directory.')
+flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
+flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
+flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
+                    'Path to label map proto')
+flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
+                     'difficult instances')
+FLAGS = flags.FLAGS
+
+SETS = ['train', 'val', 'trainval', 'test']
+YEARS = ['VOC2007', 'VOC2012', 'merged']
+
+
+def dict_to_tf_example(data,
+                       dataset_directory,
+                       label_map_dict,
+                       ignore_difficult_instances=False,
+                       image_subdirectory='JPEGImages'):
+  """Convert XML derived dict to tf.Example proto.
+
+  Notice that this function normalizes the bounding box coordinates provided
+  by the raw data.
+
+  Args:
+    data: dict holding PASCAL XML fields for a single image (obtained by
+      running dataset_util.recursive_parse_xml_to_dict)
+    dataset_directory: Path to root directory holding PASCAL dataset
+    label_map_dict: A map from string label names to integers ids.
+    ignore_difficult_instances: Whether to skip difficult instances in the
+      dataset  (default: False).
+    image_subdirectory: String specifying subdirectory within the
+      PASCAL dataset directory holding the actual image data.
+
+  Returns:
+    example: The converted tf.Example.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
+  full_path = os.path.join(dataset_directory, img_path)
+  with tf.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  if image.format != 'JPEG':
+    raise ValueError('Image format not JPEG')
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+
+  width = int(data['size']['width'])
+  height = int(data['size']['height'])
+
+  xmin = []
+  ymin = []
+  xmax = []
+  ymax = []
+  classes = []
+  classes_text = []
+  truncated = []
+  poses = []
+  difficult_obj = []
+  for obj in data['object']:
+    difficult = bool(int(obj['difficult']))
+    if ignore_difficult_instances and difficult:
+      continue
+
+    difficult_obj.append(int(difficult))
+
+    xmin.append(float(obj['bndbox']['xmin']) / width)
+    ymin.append(float(obj['bndbox']['ymin']) / height)
+    xmax.append(float(obj['bndbox']['xmax']) / width)
+    ymax.append(float(obj['bndbox']['ymax']) / height)
+    classes_text.append(obj['name'].encode('utf8'))
+    classes.append(label_map_dict[obj['name']])
+    truncated.append(int(obj['truncated']))
+    poses.append(obj['pose'].encode('utf8'))
+
+  example = tf.train.Example(features=tf.train.Features(feature={
+      'image/height': dataset_util.int64_feature(height),
+      'image/width': dataset_util.int64_feature(width),
+      'image/filename': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/source_id': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
+      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
+      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
+      'image/object/class/label': dataset_util.int64_list_feature(classes),
+      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+      'image/object/truncated': dataset_util.int64_list_feature(truncated),
+      'image/object/view': dataset_util.bytes_list_feature(poses),
+  }))
+  return example
+
+
+def main(_):
+  if FLAGS.set not in SETS:
+    raise ValueError('set must be in : {}'.format(SETS))
+  if FLAGS.year not in YEARS:
+    raise ValueError('year must be in : {}'.format(YEARS))
+
+  data_dir = FLAGS.data_dir
+  years = ['VOC2007', 'VOC2012']
+  if FLAGS.year != 'merged':
+    years = [FLAGS.year]
+
+  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
+
+  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
+
+  for year in years:
+    logging.info('Reading from PASCAL %s dataset.', year)
+    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
+                                 'aeroplane_' + FLAGS.set + '.txt')
+    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
+    examples_list = dataset_util.read_examples_list(examples_path)
+    for idx, example in enumerate(examples_list):
+      if idx % 100 == 0:
+        logging.info('On image %d of %d', idx, len(examples_list))
+      path = os.path.join(annotations_dir, example + '.xml')
+      with tf.gfile.GFile(path, 'r') as fid:
+        xml_str = fid.read()
+      xml = etree.fromstring(xml_str)
+      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
+
+      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
+                                      FLAGS.ignore_difficult_instances)
+      writer.write(tf_example.SerializeToString())
+
+  writer.close()
+
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/object_detection/create_pascal_tf_record_test.py
+++ b/object_detection/create_pascal_tf_record_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for create_pascal_tf_record.py."""
+
+import os
+
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+
+from object_detection import create_pascal_tf_record
+
+
+class DictToTFExampleTest(tf.test.TestCase):
+
+  def _assertProtoEqual(self, proto_field, expectation):
+    """Helper function to assert if a proto field equals some value.
+
+    Args:
+      proto_field: The protobuf field to compare.
+      expectation: The expected value of the protobuf field.
+    """
+    proto_list = [p for p in proto_field]
+    self.assertListEqual(proto_list, expectation)
+
+  def test_dict_to_tf_example(self):
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.rand(256, 256, 3)
+    save_path = os.path.join(self.get_temp_dir(), image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+
+    data = {
+        'folder': '',
+        'filename': image_file_name,
+        'size': {
+            'height': 256,
+            'width': 256,
+        },
+        'object': [
+            {
+                'difficult': 1,
+                'bndbox': {
+                    'xmin': 64,
+                    'ymin': 64,
+                    'xmax': 192,
+                    'ymax': 192,
+                },
+                'name': 'person',
+                'truncated': 0,
+                'pose': '',
+            },
+        ],
+    }
+
+    label_map_dict = {
+        'background': 0,
+        'person': 1,
+        'notperson': 2,
+    }
+
+    example = create_pascal_tf_record.dict_to_tf_example(
+        data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [image_file_name])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [image_file_name])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        ['person'])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/label'].int64_list.value,
+        [1])
+    self._assertProtoEqual(
+        example.features.feature['image/object/difficult'].int64_list.value,
+        [1])
+    self._assertProtoEqual(
+        example.features.feature['image/object/truncated'].int64_list.value,
+        [0])
+    self._assertProtoEqual(
+        example.features.feature['image/object/view'].bytes_list.value, [''])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/create_pet_tf_record.py
+++ b/object_detection/create_pet_tf_record.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert the Oxford pet dataset to TFRecord for object_detection.
+
+See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
+     Cats and Dogs
+     IEEE Conference on Computer Vision and Pattern Recognition, 2012
+     http://www.robots.ox.ac.uk/~vgg/data/pets/
+
+Example usage:
+    ./create_pet_tf_record --data_dir=/home/user/pet \
+        --output_dir=/home/user/pet/output
+"""
+
+import hashlib
+import io
+import logging
+import os
+import random
+import re
+
+from lxml import etree
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
+flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
+flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
+                    'Path to label map proto')
+FLAGS = flags.FLAGS
+
+
+def get_class_name_from_filename(file_name):
+  """Gets the class name from a file.
+
+  Args:
+    file_name: The file name to get the class name from.
+               ie. "american_pit_bull_terrier_105.jpg"
+
+  Returns:
+    example: The converted tf.Example.
+  """
+  match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
+  return match.groups()[0]
+
+
+def dict_to_tf_example(data,
+                       label_map_dict,
+                       image_subdirectory,
+                       ignore_difficult_instances=False):
+  """Convert XML derived dict to tf.Example proto.
+
+  Notice that this function normalizes the bounding box coordinates provided
+  by the raw data.
+
+  Args:
+    data: dict holding PASCAL XML fields for a single image (obtained by
+      running dataset_util.recursive_parse_xml_to_dict)
+    label_map_dict: A map from string label names to integers ids.
+    image_subdirectory: String specifying subdirectory within the
+      Pascal dataset directory holding the actual image data.
+    ignore_difficult_instances: Whether to skip difficult instances in the
+      dataset  (default: False).
+
+  Returns:
+    example: The converted tf.Example.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  img_path = os.path.join(image_subdirectory, data['filename'])
+  with tf.gfile.GFile(img_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  if image.format != 'JPEG':
+    raise ValueError('Image format not JPEG')
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+
+  width = int(data['size']['width'])
+  height = int(data['size']['height'])
+
+  xmin = []
+  ymin = []
+  xmax = []
+  ymax = []
+  classes = []
+  classes_text = []
+  truncated = []
+  poses = []
+  difficult_obj = []
+  for obj in data['object']:
+    difficult = bool(int(obj['difficult']))
+    if ignore_difficult_instances and difficult:
+      continue
+
+    difficult_obj.append(int(difficult))
+
+    xmin.append(float(obj['bndbox']['xmin']) / width)
+    ymin.append(float(obj['bndbox']['ymin']) / height)
+    xmax.append(float(obj['bndbox']['xmax']) / width)
+    ymax.append(float(obj['bndbox']['ymax']) / height)
+    class_name = get_class_name_from_filename(data['filename'])
+    classes_text.append(class_name.encode('utf8'))
+    classes.append(label_map_dict[class_name])
+    truncated.append(int(obj['truncated']))
+    poses.append(obj['pose'].encode('utf8'))
+
+  example = tf.train.Example(features=tf.train.Features(feature={
+      'image/height': dataset_util.int64_feature(height),
+      'image/width': dataset_util.int64_feature(width),
+      'image/filename': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/source_id': dataset_util.bytes_feature(
+          data['filename'].encode('utf8')),
+      'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
+      'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
+      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
+      'image/object/class/label': dataset_util.int64_list_feature(classes),
+      'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+      'image/object/truncated': dataset_util.int64_list_feature(truncated),
+      'image/object/view': dataset_util.bytes_list_feature(poses),
+  }))
+  return example
+
+
+def create_tf_record(output_filename,
+                     label_map_dict,
+                     annotations_dir,
+                     image_dir,
+                     examples):
+  """Creates a TFRecord file from examples.
+
+  Args:
+    output_filename: Path to where output file is saved.
+    label_map_dict: The label map dictionary.
+    annotations_dir: Directory where annotation files are stored.
+    image_dir: Directory where image files are stored.
+    examples: Examples to parse and save to tf record.
+  """
+  writer = tf.python_io.TFRecordWriter(output_filename)
+  for idx, example in enumerate(examples):
+    if idx % 100 == 0:
+      logging.info('On image %d of %d', idx, len(examples))
+    path = os.path.join(annotations_dir, 'xmls', example + '.xml')
+
+    if not os.path.exists(path):
+      logging.warning('Could not find %s, ignoring example.', path)
+      continue
+    with tf.gfile.GFile(path, 'r') as fid:
+      xml_str = fid.read()
+    xml = etree.fromstring(xml_str)
+    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
+
+    tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
+    writer.write(tf_example.SerializeToString())
+
+  writer.close()
+
+
+# TODO: Add test for pet/PASCAL main files.
+def main(_):
+  data_dir = FLAGS.data_dir
+  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
+
+  logging.info('Reading from Pet dataset.')
+  image_dir = os.path.join(data_dir, 'images')
+  annotations_dir = os.path.join(data_dir, 'annotations')
+  examples_path = os.path.join(annotations_dir, 'trainval.txt')
+  examples_list = dataset_util.read_examples_list(examples_path)
+
+  # Test images are not included in the downloaded data set, so we shall perform
+  # our own split.
+  random.seed(42)
+  random.shuffle(examples_list)
+  num_examples = len(examples_list)
+  num_train = int(0.7 * num_examples)
+  train_examples = examples_list[:num_train]
+  val_examples = examples_list[num_train:]
+  logging.info('%d training and %d validation examples.',
+               len(train_examples), len(val_examples))
+
+  train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
+  val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
+  create_tf_record(train_output_path, label_map_dict, annotations_dir,
+                   image_dir, train_examples)
+  create_tf_record(val_output_path, label_map_dict, annotations_dir,
+                   image_dir, val_examples)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/object_detection/data/mscoco_label_map.pbtxt
+++ b/object_detection/data/mscoco_label_map.pbtxt
+item {
+  name: "/m/01g317"
+  id: 1
+  display_name: "person"
+}
+item {
+  name: "/m/0199g"
+  id: 2
+  display_name: "bicycle"
+}
+item {
+  name: "/m/0k4j"
+  id: 3
+  display_name: "car"
+}
+item {
+  name: "/m/04_sv"
+  id: 4
+  display_name: "motorcycle"
+}
+item {
+  name: "/m/05czz6l"
+  id: 5
+  display_name: "airplane"
+}
+item {
+  name: "/m/01bjv"
+  id: 6
+  display_name: "bus"
+}
+item {
+  name: "/m/07jdr"
+  id: 7
+  display_name: "train"
+}
+item {
+  name: "/m/07r04"
+  id: 8
+  display_name: "truck"
+}
+item {
+  name: "/m/019jd"
+  id: 9
+  display_name: "boat"
+}
+item {
+  name: "/m/015qff"
+  id: 10
+  display_name: "traffic light"
+}
+item {
+  name: "/m/01pns0"
+  id: 11
+  display_name: "fire hydrant"
+}
+item {
+  name: "/m/02pv19"
+  id: 13
+  display_name: "stop sign"
+}
+item {
+  name: "/m/015qbp"
+  id: 14
+  display_name: "parking meter"
+}
+item {
+  name: "/m/0cvnqh"
+  id: 15
+  display_name: "bench"
+}
+item {
+  name: "/m/015p6"
+  id: 16
+  display_name: "bird"
+}
+item {
+  name: "/m/01yrx"
+  id: 17
+  display_name: "cat"
+}
+item {
+  name: "/m/0bt9lr"
+  id: 18
+  display_name: "dog"
+}
+item {
+  name: "/m/03k3r"
+  id: 19
+  display_name: "horse"
+}
+item {
+  name: "/m/07bgp"
+  id: 20
+  display_name: "sheep"
+}
+item {
+  name: "/m/01xq0k1"
+  id: 21
+  display_name: "cow"
+}
+item {
+  name: "/m/0bwd_0j"
+  id: 22
+  display_name: "elephant"
+}
+item {
+  name: "/m/01dws"
+  id: 23
+  display_name: "bear"
+}
+item {
+  name: "/m/0898b"
+  id: 24
+  display_name: "zebra"
+}
+item {
+  name: "/m/03bk1"
+  id: 25
+  display_name: "giraffe"
+}
+item {
+  name: "/m/01940j"
+  id: 27
+  display_name: "backpack"
+}
+item {
+  name: "/m/0hnnb"
+  id: 28
+  display_name: "umbrella"
+}
+item {
+  name: "/m/080hkjn"
+  id: 31
+  display_name: "handbag"
+}
+item {
+  name: "/m/01rkbr"
+  id: 32
+  display_name: "tie"
+}
+item {
+  name: "/m/01s55n"
+  id: 33
+  display_name: "suitcase"
+}
+item {
+  name: "/m/02wmf"
+  id: 34
+  display_name: "frisbee"
+}
+item {
+  name: "/m/071p9"
+  id: 35
+  display_name: "skis"
+}
+item {
+  name: "/m/06__v"
+  id: 36
+  display_name: "snowboard"
+}
+item {
+  name: "/m/018xm"
+  id: 37
+  display_name: "sports ball"
+}
+item {
+  name: "/m/02zt3"
+  id: 38
+  display_name: "kite"
+}
+item {
+  name: "/m/03g8mr"
+  id: 39
+  display_name: "baseball bat"
+}
+item {
+  name: "/m/03grzl"
+  id: 40
+  display_name: "baseball glove"
+}
+item {
+  name: "/m/06_fw"
+  id: 41
+  display_name: "skateboard"
+}
+item {
+  name: "/m/019w40"
+  id: 42
+  display_name: "surfboard"
+}
+item {
+  name: "/m/0dv9c"
+  id: 43
+  display_name: "tennis racket"
+}
+item {
+  name: "/m/04dr76w"
+  id: 44
+  display_name: "bottle"
+}
+item {
+  name: "/m/09tvcd"
+  id: 46
+  display_name: "wine glass"
+}
+item {
+  name: "/m/08gqpm"
+  id: 47
+  display_name: "cup"
+}
+item {
+  name: "/m/0dt3t"
+  id: 48
+  display_name: "fork"
+}
+item {
+  name: "/m/04ctx"
+  id: 49
+  display_name: "knife"
+}
+item {
+  name: "/m/0cmx8"
+  id: 50
+  display_name: "spoon"
+}
+item {
+  name: "/m/04kkgm"
+  id: 51
+  display_name: "bowl"
+}
+item {
+  name: "/m/09qck"
+  id: 52
+  display_name: "banana"
+}
+item {
+  name: "/m/014j1m"
+  id: 53
+  display_name: "apple"
+}
+item {
+  name: "/m/0l515"
+  id: 54
+  display_name: "sandwich"
+}
+item {
+  name: "/m/0cyhj_"
+  id: 55
+  display_name: "orange"
+}
+item {
+  name: "/m/0hkxq"
+  id: 56
+  display_name: "broccoli"
+}
+item {
+  name: "/m/0fj52s"
+  id: 57
+  display_name: "carrot"
+}
+item {
+  name: "/m/01b9xk"
+  id: 58
+  display_name: "hot dog"
+}
+item {
+  name: "/m/0663v"
+  id: 59
+  display_name: "pizza"
+}
+item {
+  name: "/m/0jy4k"
+  id: 60
+  display_name: "donut"
+}
+item {
+  name: "/m/0fszt"
+  id: 61
+  display_name: "cake"
+}
+item {
+  name: "/m/01mzpv"
+  id: 62
+  display_name: "chair"
+}
+item {
+  name: "/m/02crq1"
+  id: 63
+  display_name: "couch"
+}
+item {
+  name: "/m/03fp41"
+  id: 64
+  display_name: "potted plant"
+}
+item {
+  name: "/m/03ssj5"
+  id: 65
+  display_name: "bed"
+}
+item {
+  name: "/m/04bcr3"
+  id: 67
+  display_name: "dining table"
+}
+item {
+  name: "/m/09g1w"
+  id: 70
+  display_name: "toilet"
+}
+item {
+  name: "/m/07c52"
+  id: 72
+  display_name: "tv"
+}
+item {
+  name: "/m/01c648"
+  id: 73
+  display_name: "laptop"
+}
+item {
+  name: "/m/020lf"
+  id: 74
+  display_name: "mouse"
+}
+item {
+  name: "/m/0qjjc"
+  id: 75
+  display_name: "remote"
+}
+item {
+  name: "/m/01m2v"
+  id: 76
+  display_name: "keyboard"
+}
+item {
+  name: "/m/050k8"
+  id: 77
+  display_name: "cell phone"
+}
+item {
+  name: "/m/0fx9l"
+  id: 78
+  display_name: "microwave"
+}
+item {
+  name: "/m/029bxz"
+  id: 79
+  display_name: "oven"
+}
+item {
+  name: "/m/01k6s3"
+  id: 80
+  display_name: "toaster"
+}
+item {
+  name: "/m/0130jx"
+  id: 81
+  display_name: "sink"
+}
+item {
+  name: "/m/040b_t"
+  id: 82
+  display_name: "refrigerator"
+}
+item {
+  name: "/m/0bt_c3"
+  id: 84
+  display_name: "book"
+}
+item {
+  name: "/m/01x3z"
+  id: 85
+  display_name: "clock"
+}
+item {
+  name: "/m/02s195"
+  id: 86
+  display_name: "vase"
+}
+item {
+  name: "/m/01lsmm"
+  id: 87
+  display_name: "scissors"
+}
+item {
+  name: "/m/0kmg4"
+  id: 88
+  display_name: "teddy bear"
+}
+item {
+  name: "/m/03wvsk"
+  id: 89
+  display_name: "hair drier"
+}
+item {
+  name: "/m/012xff"
+  id: 90
+  display_name: "toothbrush"
+}
--- a/object_detection/data/pascal_label_map.pbtxt
+++ b/object_detection/data/pascal_label_map.pbtxt
+item {
+  id: 1
+  name: 'aeroplane'
+}
+
+item {
+  id: 2
+  name: 'bicycle'
+}
+
+item {
+  id: 3
+  name: 'bird'
+}
+
+item {
+  id: 4
+  name: 'boat'
+}
+
+item {
+  id: 5
+  name: 'bottle'
+}
+
+item {
+  id: 6
+  name: 'bus'
+}
+
+item {
+  id: 7
+  name: 'car'
+}
+
+item {
+  id: 8
+  name: 'cat'
+}
+
+item {
+  id: 9
+  name: 'chair'
+}
+
+item {
+  id: 10
+  name: 'cow'
+}
+
+item {
+  id: 11
+  name: 'diningtable'
+}
+
+item {
+  id: 12
+  name: 'dog'
+}
+
+item {
+  id: 13
+  name: 'horse'
+}
+
+item {
+  id: 14
+  name: 'motorbike'
+}
+
+item {
+  id: 15
+  name: 'person'
+}
+
+item {
+  id: 16
+  name: 'pottedplant'
+}
+
+item {
+  id: 17
+  name: 'sheep'
+}
+
+item {
+  id: 18
+  name: 'sofa'
+}
+
+item {
+  id: 19
+  name: 'train'
+}
+
+item {
+  id: 20
+  name: 'tvmonitor'
+}
--- a/object_detection/data/pet_label_map.pbtxt
+++ b/object_detection/data/pet_label_map.pbtxt
+item {
+  id: 1
+  name: 'Abyssinian'
+}
+
+item {
+  id: 2
+  name: 'american_bulldog'
+}
+
+item {
+  id: 3
+  name: 'american_pit_bull_terrier'
+}
+
+item {
+  id: 4
+  name: 'basset_hound'
+}
+
+item {
+  id: 5
+  name: 'beagle'
+}
+
+item {
+  id: 6
+  name: 'Bengal'
+}
+
+item {
+  id: 7
+  name: 'Birman'
+}
+
+item {
+  id: 8
+  name: 'Bombay'
+}
+
+item {
+  id: 9
+  name: 'boxer'
+}
+
+item {
+  id: 10
+  name: 'British_Shorthair'
+}
+
+item {
+  id: 11
+  name: 'chihuahua'
+}
+
+item {
+  id: 12
+  name: 'Egyptian_Mau'
+}
+
+item {
+  id: 13
+  name: 'english_cocker_spaniel'
+}
+
+item {
+  id: 14
+  name: 'english_setter'
+}
+
+item {
+  id: 15
+  name: 'german_shorthaired'
+}
+
+item {
+  id: 16
+  name: 'great_pyrenees'
+}
+
+item {
+  id: 17
+  name: 'havanese'
+}
+
+item {
+  id: 18
+  name: 'japanese_chin'
+}
+
+item {
+  id: 19
+  name: 'keeshond'
+}
+
+item {
+  id: 20
+  name: 'leonberger'
+}
+
+item {
+  id: 21
+  name: 'Maine_Coon'
+}
+
+item {
+  id: 22
+  name: 'miniature_pinscher'
+}
+
+item {
+  id: 23
+  name: 'newfoundland'
+}
+
+item {
+  id: 24
+  name: 'Persian'
+}
+
+item {
+  id: 25
+  name: 'pomeranian'
+}
+
+item {
+  id: 26
+  name: 'pug'
+}
+
+item {
+  id: 27
+  name: 'Ragdoll'
+}
+
+item {
+  id: 28
+  name: 'Russian_Blue'
+}
+
+item {
+  id: 29
+  name: 'saint_bernard'
+}
+
+item {
+  id: 30
+  name: 'samoyed'
+}
+
+item {
+  id: 31
+  name: 'scottish_terrier'
+}
+
+item {
+  id: 32
+  name: 'shiba_inu'
+}
+
+item {
+  id: 33
+  name: 'Siamese'
+}
+
+item {
+  id: 34
+  name: 'Sphynx'
+}
+
+item {
+  id: 35
+  name: 'staffordshire_bull_terrier'
+}
+
+item {
+  id: 36
+  name: 'wheaten_terrier'
+}
+
+item {
+  id: 37
+  name: 'yorkshire_terrier'
+}
--- a/object_detection/data_decoders/BUILD
+++ b/object_detection/data_decoders/BUILD
+# Tensorflow Object Detection API: data decoders.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+# Apache 2.0
+
+py_library(
+    name = "tf_example_decoder",
+    srcs = ["tf_example_decoder.py"],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:data_decoder",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+
+py_test(
+    name = "tf_example_decoder_test",
+    srcs = ["tf_example_decoder_test.py"],
+    deps = [
+        ":tf_example_decoder",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
--- a/object_detection/data_decoders/__init__.py
+++ b/object_detection/data_decoders/__init__.py
--- a/object_detection/data_decoders/tf_example_decoder.py
+++ b/object_detection/data_decoders/tf_example_decoder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tensorflow Example proto decoder for object detection.
+
+A decoder to decode string tensors containing serialized tensorflow.Example
+protos for object detection.
+"""
+import tensorflow as tf
+
+from object_detection.core import data_decoder
+from object_detection.core import standard_fields as fields
+
+slim_example_decoder = tf.contrib.slim.tfexample_decoder
+
+
+class TfExampleDecoder(data_decoder.DataDecoder):
+  """Tensorflow Example proto decoder."""
+
+  def __init__(self):
+    """Constructor sets keys_to_features and items_to_handlers."""
+    self.keys_to_features = {
+        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+        'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/height': tf.FixedLenFeature((), tf.int64, 1),
+        'image/width': tf.FixedLenFeature((), tf.int64, 1),
+        # Object boxes and classes.
+        'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
+        'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
+        'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
+        'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
+        'image/object/class/label': tf.VarLenFeature(tf.int64),
+        'image/object/area': tf.VarLenFeature(tf.float32),
+        'image/object/is_crowd': tf.VarLenFeature(tf.int64),
+        'image/object/difficult': tf.VarLenFeature(tf.int64),
+        # Instance masks and classes.
+        'image/segmentation/object': tf.VarLenFeature(tf.int64),
+        'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
+    }
+    self.items_to_handlers = {
+        fields.InputDataFields.image: slim_example_decoder.Image(
+            image_key='image/encoded', format_key='image/format', channels=3),
+        fields.InputDataFields.source_id: (
+            slim_example_decoder.Tensor('image/source_id')),
+        fields.InputDataFields.key: (
+            slim_example_decoder.Tensor('image/key/sha256')),
+        fields.InputDataFields.filename: (
+            slim_example_decoder.Tensor('image/filename')),
+        # Object boxes and classes.
+        fields.InputDataFields.groundtruth_boxes: (
+            slim_example_decoder.BoundingBox(
+                ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
+        fields.InputDataFields.groundtruth_classes: (
+            slim_example_decoder.Tensor('image/object/class/label')),
+        fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
+            'image/object/area'),
+        fields.InputDataFields.groundtruth_is_crowd: (
+            slim_example_decoder.Tensor('image/object/is_crowd')),
+        fields.InputDataFields.groundtruth_difficult: (
+            slim_example_decoder.Tensor('image/object/difficult')),
+        # Instance masks and classes.
+        fields.InputDataFields.groundtruth_instance_masks: (
+            slim_example_decoder.ItemHandlerCallback(
+                ['image/segmentation/object', 'image/height', 'image/width'],
+                self._reshape_instance_masks)),
+        fields.InputDataFields.groundtruth_instance_classes: (
+            slim_example_decoder.Tensor('image/segmentation/object/class')),
+    }
+
+  def decode(self, tf_example_string_tensor):
+    """Decodes serialized tensorflow example and returns a tensor dictionary.
+
+    Args:
+      tf_example_string_tensor: a string tensor holding a serialized tensorflow
+        example proto.
+
+    Returns:
+      A dictionary of the following tensors.
+      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
+        containing image.
+      fields.InputDataFields.source_id - string tensor containing original
+        image id.
+      fields.InputDataFields.key - string tensor with unique sha256 hash key.
+      fields.InputDataFields.filename - string tensor with original dataset
+        filename.
+      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
+        [None, 4] containing box corners.
+      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
+        [None] containing classes for the boxes.
+      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
+        [None] containing containing object mask area in pixel squared.
+      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
+        [None] indicating if the boxes enclose a crowd.
+      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
+        [None] indicating if the boxes represent `difficult` instances.
+      fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
+        shape [None, None, None] containing instance masks.
+      fields.InputDataFields.groundtruth_instance_classes - 1D int64 tensor
+        of shape [None] containing classes for the instance masks.
+    """
+
+    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
+    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
+                                                    self.items_to_handlers)
+    keys = decoder.list_items()
+    tensors = decoder.decode(serialized_example, items=keys)
+    tensor_dict = dict(zip(keys, tensors))
+    is_crowd = fields.InputDataFields.groundtruth_is_crowd
+    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
+    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
+    return tensor_dict
+
+  def _reshape_instance_masks(self, keys_to_tensors):
+    """Reshape instance segmentation masks.
+
+    The instance segmentation masks are reshaped to [num_instances, height,
+    width] and cast to boolean type to save memory.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 3-D boolean tensor of shape [num_instances, height, width].
+    """
+    masks = keys_to_tensors['image/segmentation/object']
+    if isinstance(masks, tf.SparseTensor):
+      masks = tf.sparse_tensor_to_dense(masks)
+    height = keys_to_tensors['image/height']
+    width = keys_to_tensors['image/width']
+    to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
+
+    return tf.cast(tf.reshape(masks, to_shape), tf.bool)
--- a/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/object_detection/data_decoders/tf_example_decoder_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.data_decoders.tf_example_decoder."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import standard_fields as fields
+from object_detection.data_decoders import tf_example_decoder
+
+
+class TfExampleDecoderTest(tf.test.TestCase):
+
+  def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
+    with self.test_session():
+      if encoding_type == 'jpeg':
+        image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
+      elif encoding_type == 'png':
+        image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
+      else:
+        raise ValueError('Invalid encoding type.')
+    return image_encoded
+
+  def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
+    with self.test_session():
+      if encoding_type == 'jpeg':
+        image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
+      elif encoding_type == 'png':
+        image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
+      else:
+        raise ValueError('Invalid encoding type.')
+    return image_decoded
+
+  def _Int64Feature(self, value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+  def _FloatFeature(self, value):
+    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+  def _BytesFeature(self, value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+  def testDecodeJpegImage(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    decoded_jpeg = self._DecodeImage(encoded_jpeg)
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/source_id': self._BytesFeature('image_id'),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+                         get_shape().as_list()), [None, None, 3])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
+    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+
+  def testDecodeImageKeyAndFilename(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/key/sha256': self._BytesFeature('abc'),
+        'image/filename': self._BytesFeature('filename')
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
+    self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
+
+  def testDecodePngImage(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
+    decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_png),
+        'image/format': self._BytesFeature('png'),
+        'image/source_id': self._BytesFeature('image_id')
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+                         get_shape().as_list()), [None, None, 3])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
+    self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+
+  def testDecodeBoundingBox(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_ymins = [0.0, 4.0]
+    bbox_xmins = [1.0, 5.0]
+    bbox_ymaxs = [2.0, 6.0]
+    bbox_xmaxs = [3.0, 7.0]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
+        'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
+        'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
+        'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
+                         get_shape().as_list()), [None, 4])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
+                                bbox_ymaxs, bbox_xmaxs]).transpose()
+    self.assertAllEqual(expected_boxes,
+                        tensor_dict[fields.InputDataFields.groundtruth_boxes])
+
+  def testDecodeObjectLabel(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    bbox_classes = [0, 1]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/class/label': self._Int64Feature(bbox_classes),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[
+        fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
+                        [None])
+
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual(bbox_classes,
+                        tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+  def testDecodeObjectArea(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    object_area = [100., 174.]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/area': self._FloatFeature(object_area),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
+                         get_shape().as_list()), [None])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual(object_area,
+                        tensor_dict[fields.InputDataFields.groundtruth_area])
+
+  def testDecodeObjectIsCrowd(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    object_is_crowd = [0, 1]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/is_crowd': self._Int64Feature(object_is_crowd),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[
+        fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
+                        [None])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual([bool(item) for item in object_is_crowd],
+                        tensor_dict[
+                            fields.InputDataFields.groundtruth_is_crowd])
+
+  def testDecodeObjectDifficult(self):
+    image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+    object_difficult = [0, 1]
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/object/difficult': self._Int64Feature(object_difficult),
+    })).SerializeToString()
+
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((tensor_dict[
+        fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
+                        [None])
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual([bool(item) for item in object_difficult],
+                        tensor_dict[
+                            fields.InputDataFields.groundtruth_difficult])
+
+  def testDecodeInstanceSegmentation(self):
+    num_instances = 4
+    image_height = 5
+    image_width = 3
+
+    # Randomly generate image.
+    image_tensor = np.random.randint(255, size=(image_height,
+                                                image_width,
+                                                3)).astype(np.uint8)
+    encoded_jpeg = self._EncodeImage(image_tensor)
+
+    # Randomly generate instance segmentation masks.
+    instance_segmentation = (
+        np.random.randint(2, size=(num_instances,
+                                   image_height,
+                                   image_width)).astype(np.int64))
+
+    # Randomly generate class labels for each instance.
+    instance_segmentation_classes = np.random.randint(
+        100, size=(num_instances)).astype(np.int64)
+
+    example = tf.train.Example(features=tf.train.Features(feature={
+        'image/encoded': self._BytesFeature(encoded_jpeg),
+        'image/format': self._BytesFeature('jpeg'),
+        'image/height': self._Int64Feature([image_height]),
+        'image/width': self._Int64Feature([image_width]),
+        'image/segmentation/object': self._Int64Feature(
+            instance_segmentation.flatten()),
+        'image/segmentation/object/class': self._Int64Feature(
+            instance_segmentation_classes)})).SerializeToString()
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+    tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+    self.assertAllEqual((
+        tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
+        get_shape().as_list()), [None, None, None])
+
+    self.assertAllEqual((
+        tensor_dict[fields.InputDataFields.groundtruth_instance_classes].
+        get_shape().as_list()), [None])
+
+    with self.test_session() as sess:
+      tensor_dict = sess.run(tensor_dict)
+
+    self.assertAllEqual(
+        instance_segmentation.astype(np.bool),
+        tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
+    self.assertAllEqual(
+        instance_segmentation_classes,
+        tensor_dict[fields.InputDataFields.groundtruth_instance_classes])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/eval.py
+++ b/object_detection/eval.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Evaluation executable for detection models.
+
+This executable is used to evaluate DetectionModels. There are two ways of
+configuring the eval job.
+
+1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
+In this mode, the --eval_training_data flag may be given to force the pipeline
+to evaluate on training data instead.
+
+Example usage:
+    ./eval \
+        --logtostderr \
+        --checkpoint_dir=path/to/checkpoint_dir \
+        --eval_dir=path/to/eval_dir \
+        --pipeline_config_path=pipeline_config.pbtxt
+
+2) Three configuration files may be provided: a model_pb2.DetectionModel
+configuration file to define what type of DetectionModel is being evaulated, an
+input_reader_pb2.InputReader file to specify what data the model is evaluating
+and an eval_pb2.EvalConfig file to configure evaluation parameters.
+
+Example usage:
+    ./eval \
+        --logtostderr \
+        --checkpoint_dir=path/to/checkpoint_dir \
+        --eval_dir=path/to/eval_dir \
+        --eval_config_path=eval_config.pbtxt \
+        --model_config_path=model_config.pbtxt \
+        --input_config_path=eval_input_config.pbtxt
+"""
+import functools
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection import evaluator
+from object_detection.builders import input_reader_builder
+from object_detection.builders import model_builder
+from object_detection.protos import eval_pb2
+from object_detection.protos import input_reader_pb2
+from object_detection.protos import model_pb2
+from object_detection.protos import pipeline_pb2
+from object_detection.utils import label_map_util
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+flags = tf.app.flags
+flags.DEFINE_boolean('eval_training_data', False,
+                     'If training data should be evaluated for this job.')
+flags.DEFINE_string('checkpoint_dir', '',
+                    'Directory containing checkpoints to evaluate, typically '
+                    'set to `train_dir` used in the training job.')
+flags.DEFINE_string('eval_dir', '',
+                    'Directory to write eval summaries to.')
+flags.DEFINE_string('pipeline_config_path', '',
+                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+                    'file. If provided, other configs are ignored')
+flags.DEFINE_string('eval_config_path', '',
+                    'Path to an eval_pb2.EvalConfig config file.')
+flags.DEFINE_string('input_config_path', '',
+                    'Path to an input_reader_pb2.InputReader config file.')
+flags.DEFINE_string('model_config_path', '',
+                    'Path to a model_pb2.DetectionModel config file.')
+
+FLAGS = flags.FLAGS
+
+
+def get_configs_from_pipeline_file():
+  """Reads evaluation configuration from a pipeline_pb2.TrainEvalPipelineConfig.
+
+  Reads evaluation config from file specified by pipeline_config_path flag.
+
+  Returns:
+    model_config: a model_pb2.DetectionModel
+    eval_config: a eval_pb2.EvalConfig
+    input_config: a input_reader_pb2.InputReader
+  """
+  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
+    text_format.Merge(f.read(), pipeline_config)
+
+  model_config = pipeline_config.model
+  if FLAGS.eval_training_data:
+    eval_config = pipeline_config.train_config
+  else:
+    eval_config = pipeline_config.eval_config
+  input_config = pipeline_config.eval_input_reader
+
+  return model_config, eval_config, input_config
+
+
+def get_configs_from_multiple_files():
+  """Reads evaluation configuration from multiple config files.
+
+  Reads the evaluation config from the following files:
+    model_config: Read from --model_config_path
+    eval_config: Read from --eval_config_path
+    input_config: Read from --input_config_path
+
+  Returns:
+    model_config: a model_pb2.DetectionModel
+    eval_config: a eval_pb2.EvalConfig
+    input_config: a input_reader_pb2.InputReader
+  """
+  eval_config = eval_pb2.EvalConfig()
+  with tf.gfile.GFile(FLAGS.eval_config_path, 'r') as f:
+    text_format.Merge(f.read(), eval_config)
+
+  model_config = model_pb2.DetectionModel()
+  with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
+    text_format.Merge(f.read(), model_config)
+
+  input_config = input_reader_pb2.InputReader()
+  with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
+    text_format.Merge(f.read(), input_config)
+
+  return model_config, eval_config, input_config
+
+
+def main(unused_argv):
+  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
+  assert FLAGS.eval_dir, '`eval_dir` is missing.'
+  if FLAGS.pipeline_config_path:
+    model_config, eval_config, input_config = get_configs_from_pipeline_file()
+  else:
+    model_config, eval_config, input_config = get_configs_from_multiple_files()
+
+  model_fn = functools.partial(
+      model_builder.build,
+      model_config=model_config,
+      is_training=False)
+
+  create_input_dict_fn = functools.partial(
+      input_reader_builder.build,
+      input_config)
+
+  label_map = label_map_util.load_labelmap(input_config.label_map_path)
+  max_num_classes = max([item.id for item in label_map.item])
+  categories = label_map_util.convert_label_map_to_categories(
+      label_map, max_num_classes)
+
+  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
+                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
+
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/object_detection/eval_util.py
+++ b/object_detection/eval_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Common functions for repeatedly evaluating a checkpoint.
+"""
+import copy
+import logging
+import os
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import label_map_util
+from object_detection.utils import object_detection_evaluation
+from object_detection.utils import visualization_utils as vis_utils
+
+slim = tf.contrib.slim
+
+
+def write_metrics(metrics, global_step, summary_dir):
+  """Write metrics to a summary directory.
+
+  Args:
+    metrics: A dictionary containing metric names and values.
+    global_step: Global step at which the metrics are computed.
+    summary_dir: Directory to write tensorflow summaries to.
+  """
+  logging.info('Writing metrics to tf summary.')
+  summary_writer = tf.summary.FileWriter(summary_dir)
+  for key in sorted(metrics):
+    summary = tf.Summary(value=[
+        tf.Summary.Value(tag=key, simple_value=metrics[key]),
+    ])
+    summary_writer.add_summary(summary, global_step)
+    logging.info('%s: %f', key, metrics[key])
+  summary_writer.close()
+  logging.info('Metrics written to tf summary.')
+
+
+def evaluate_detection_results_pascal_voc(result_lists,
+                                          categories,
+                                          label_id_offset=0,
+                                          iou_thres=0.5,
+                                          corloc_summary=False):
+  """Computes Pascal VOC detection metrics given groundtruth and detections.
+
+  This function computes Pascal VOC metrics. This function by default
+  takes detections and groundtruth boxes encoded in result_lists and writes
+  evaluation results to tf summaries which can be viewed on tensorboard.
+
+  Args:
+    result_lists: a dictionary holding lists of groundtruth and detection
+      data corresponding to each image being evaluated.  The following keys
+      are required:
+        'image_id': a list of string ids
+        'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
+        'detection_scores': a list of float32 numpy arrays of shape [N]
+        'detection_classes': a list of int32 numpy arrays of shape [N]
+        'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
+        'groundtruth_classes': a list of int32 numpy arrays of shape [M]
+      and the remaining fields below are optional:
+        'difficult': a list of boolean arrays of shape [M] indicating the
+          difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
+          this information and it is used to remove difficult examples from eval
+          in order to not penalize the models on them.
+      Note that it is okay to have additional fields in result_lists --- they
+      are simply ignored.
+    categories: a list of dictionaries representing all possible categories.
+      Each dict in this list has the following keys:
+          'id': (required) an integer id uniquely identifying this category
+          'name': (required) string representing category name
+            e.g., 'cat', 'dog', 'pizza'
+    label_id_offset: an integer offset for the label space.
+    iou_thres: float determining the IoU threshold at which a box is considered
+        correct. Defaults to the standard 0.5.
+    corloc_summary: boolean. If True, also outputs CorLoc metrics.
+
+  Returns:
+    A dictionary of metric names to scalar values.
+
+  Raises:
+    ValueError: if the set of keys in result_lists is not a superset of the
+      expected list of keys.  Unexpected keys are ignored.
+    ValueError: if the lists in result_lists have inconsistent sizes.
+  """
+  # check for expected keys in result_lists
+  expected_keys = [
+      'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
+  ]
+  expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
+  if not set(expected_keys).issubset(set(result_lists.keys())):
+    raise ValueError('result_lists does not have expected key set.')
+  num_results = len(result_lists[expected_keys[0]])
+  for key in expected_keys:
+    if len(result_lists[key]) != num_results:
+      raise ValueError('Inconsistent list sizes in result_lists')
+
+  # Pascal VOC evaluator assumes foreground index starts from zero.
+  categories = copy.deepcopy(categories)
+  for idx in range(len(categories)):
+    categories[idx]['id'] -= label_id_offset
+
+  # num_classes (maybe encoded as categories)
+  num_classes = max([cat['id'] for cat in categories]) + 1
+  logging.info('Computing Pascal VOC metrics on results.')
+  if all(image_id.isdigit() for image_id in result_lists['image_id']):
+    image_ids = [int(image_id) for image_id in result_lists['image_id']]
+  else:
+    image_ids = range(num_results)
+
+  evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
+      num_classes, matching_iou_threshold=iou_thres)
+
+  difficult_lists = None
+  if 'difficult' in result_lists and result_lists['difficult']:
+    difficult_lists = result_lists['difficult']
+  for idx, image_id in enumerate(image_ids):
+    difficult = None
+    if difficult_lists is not None and difficult_lists[idx].size:
+      difficult = difficult_lists[idx].astype(np.bool)
+    evaluator.add_single_ground_truth_image_info(
+        image_id, result_lists['groundtruth_boxes'][idx],
+        result_lists['groundtruth_classes'][idx] - label_id_offset,
+        difficult)
+    evaluator.add_single_detected_image_info(
+        image_id, result_lists['detection_boxes'][idx],
+        result_lists['detection_scores'][idx],
+        result_lists['detection_classes'][idx] - label_id_offset)
+  per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
+      evaluator.evaluate())
+
+  metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
+  category_index = label_map_util.create_category_index(categories)
+  for idx in range(per_class_ap.size):
+    if idx in category_index:
+      display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
+                      .format(iou_thres, category_index[idx]['name']))
+      metrics[display_name] = per_class_ap[idx]
+
+  if corloc_summary:
+    metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
+    for idx in range(per_class_corloc.size):
+      if idx in category_index:
+        display_name = (
+            'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
+                iou_thres, category_index[idx]['name']))
+        metrics[display_name] = per_class_corloc[idx]
+  return metrics
+
+
+# TODO: Add tests.
+def visualize_detection_results(result_dict,
+                                tag,
+                                global_step,
+                                categories,
+                                summary_dir='',
+                                export_dir='',
+                                agnostic_mode=False,
+                                show_groundtruth=False,
+                                min_score_thresh=.5,
+                                max_num_predictions=20):
+  """Visualizes detection results and writes visualizations to image summaries.
+
+  This function visualizes an image with its detected bounding boxes and writes
+  to image summaries which can be viewed on tensorboard.  It optionally also
+  writes images to a directory. In the case of missing entry in the label map,
+  unknown class name in the visualization is shown as "N/A".
+
+  Args:
+    result_dict: a dictionary holding groundtruth and detection
+      data corresponding to each image being evaluated.  The following keys
+      are required:
+        'original_image': a numpy array representing the image with shape
+          [1, height, width, 3]
+        'detection_boxes': a numpy array of shape [N, 4]
+        'detection_scores': a numpy array of shape [N]
+        'detection_classes': a numpy array of shape [N]
+      The following keys are optional:
+        'groundtruth_boxes': a numpy array of shape [N, 4]
+        'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
+      Detections are assumed to be provided in decreasing order of score and for
+      display, and we assume that scores are probabilities between 0 and 1.
+    tag: tensorboard tag (string) to associate with image.
+    global_step: global step at which the visualization are generated.
+    categories: a list of dictionaries representing all possible categories.
+      Each dict in this list has the following keys:
+          'id': (required) an integer id uniquely identifying this category
+          'name': (required) string representing category name
+            e.g., 'cat', 'dog', 'pizza'
+          'supercategory': (optional) string representing the supercategory
+            e.g., 'animal', 'vehicle', 'food', etc
+    summary_dir: the output directory to which the image summaries are written.
+    export_dir: the output directory to which images are written.  If this is
+      empty (default), then images are not exported.
+    agnostic_mode: boolean (default: False) controlling whether to evaluate in
+      class-agnostic mode or not.
+    show_groundtruth: boolean (default: False) controlling whether to show
+      groundtruth boxes in addition to detected boxes
+    min_score_thresh: minimum score threshold for a box to be visualized
+    max_num_predictions: maximum number of detections to visualize
+  Raises:
+    ValueError: if result_dict does not contain the expected keys (i.e.,
+      'original_image', 'detection_boxes', 'detection_scores',
+      'detection_classes')
+  """
+  if not set([
+      'original_image', 'detection_boxes', 'detection_scores',
+      'detection_classes'
+  ]).issubset(set(result_dict.keys())):
+    raise ValueError('result_dict does not contain all expected keys.')
+  if show_groundtruth and 'groundtruth_boxes' not in result_dict:
+    raise ValueError('If show_groundtruth is enabled, result_dict must contain '
+                     'groundtruth_boxes.')
+  logging.info('Creating detection visualizations.')
+  category_index = label_map_util.create_category_index(categories)
+
+  image = np.squeeze(result_dict['original_image'], axis=0)
+  detection_boxes = result_dict['detection_boxes']
+  detection_scores = result_dict['detection_scores']
+  detection_classes = np.int32((result_dict['detection_classes']))
+  detection_keypoints = result_dict.get('detection_keypoints', None)
+  detection_masks = result_dict.get('detection_masks', None)
+
+  # Plot groundtruth underneath detections
+  if show_groundtruth:
+    groundtruth_boxes = result_dict['groundtruth_boxes']
+    groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
+    vis_utils.visualize_boxes_and_labels_on_image_array(
+        image,
+        groundtruth_boxes,
+        None,
+        None,
+        category_index,
+        keypoints=groundtruth_keypoints,
+        use_normalized_coordinates=False,
+        max_boxes_to_draw=None)
+  vis_utils.visualize_boxes_and_labels_on_image_array(
+      image,
+      detection_boxes,
+      detection_classes,
+      detection_scores,
+      category_index,
+      instance_masks=detection_masks,
+      keypoints=detection_keypoints,
+      use_normalized_coordinates=False,
+      max_boxes_to_draw=max_num_predictions,
+      min_score_thresh=min_score_thresh,
+      agnostic_mode=agnostic_mode)
+
+  if export_dir:
+    export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
+    vis_utils.save_image_array_as_png(image, export_path)
+
+  summary = tf.Summary(value=[
+      tf.Summary.Value(tag=tag, image=tf.Summary.Image(
+          encoded_image_string=vis_utils.encode_image_array_as_png_str(
+              image)))
+  ])
+  summary_writer = tf.summary.FileWriter(summary_dir)
+  summary_writer.add_summary(summary, global_step)
+  summary_writer.close()
+
+  logging.info('Detection visualizations written to summary with tag %s.', tag)
+
+
+# TODO: Add tests.
+# TODO: Have an argument called `aggregated_processor_tensor_keys` that contains
+# a whitelist of tensors used by the `aggregated_result_processor` instead of a
+# blacklist. This will prevent us from inadvertently adding any evaluated
+# tensors into the `results_list` data structure that are not needed by
+# `aggregated_result_preprocessor`.
+def run_checkpoint_once(tensor_dict,
+                        update_op,
+                        summary_dir,
+                        aggregated_result_processor=None,
+                        batch_processor=None,
+                        checkpoint_dirs=None,
+                        variables_to_restore=None,
+                        restore_fn=None,
+                        num_batches=1,
+                        master='',
+                        save_graph=False,
+                        save_graph_dir='',
+                        metric_names_to_values=None,
+                        keys_to_exclude_from_results=()):
+  """Evaluates both python metrics and tensorflow slim metrics.
+
+  Python metrics are processed in batch by the aggregated_result_processor,
+  while tensorflow slim metrics statistics are computed by running
+  metric_names_to_updates tensors and aggregated using metric_names_to_values
+  tensor.
+
+  Args:
+    tensor_dict: a dictionary holding tensors representing a batch of detections
+      and corresponding groundtruth annotations.
+    update_op: a tensorflow update op that will run for each batch along with
+      the tensors in tensor_dict..
+    summary_dir: a directory to write metrics summaries.
+    aggregated_result_processor: a function taking one arguments:
+      1. result_lists: a dictionary with keys matching those in tensor_dict
+        and corresponding values being the list of results for each tensor
+        in tensor_dict.  The length of each such list is num_batches.
+    batch_processor: a function taking four arguments:
+      1. tensor_dict: the same tensor_dict that is passed in as the first
+        argument to this function.
+      2. sess: a tensorflow session
+      3. batch_index: an integer representing the index of the batch amongst
+        all batches
+      4. update_op: a tensorflow update op that will run for each batch.
+      and returns result_dict, a dictionary of results for that batch.
+      By default, batch_processor is None, which defaults to running:
+        return sess.run(tensor_dict)
+      To skip an image, it suffices to return an empty dictionary in place of
+      result_dict.
+    checkpoint_dirs: list of directories to load into an EnsembleModel. If it
+      has only one directory, EnsembleModel will not be used -- a DetectionModel
+      will be instantiated directly. Not used if restore_fn is set.
+    variables_to_restore: None, or a dictionary mapping variable names found in
+      a checkpoint to model variables. The dictionary would normally be
+      generated by creating a tf.train.ExponentialMovingAverage object and
+      calling its variables_to_restore() method. Not used if restore_fn is set.
+    restore_fn: None, or a function that takes a tf.Session object and correctly
+      restores all necessary variables from the correct checkpoint file. If
+      None, attempts to restore from the first directory in checkpoint_dirs.
+    num_batches: the number of batches to use for evaluation.
+    master: the location of the Tensorflow session.
+    save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
+    save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
+      is True this must be non-empty.
+    metric_names_to_values: A dictionary containing metric names to tensors
+      which will be evaluated after processing all batches
+      of [tensor_dict, update_op]. If any metrics depend on statistics computed
+      during each batch ensure that `update_op` tensor has a control dependency
+      on the update ops that compute the statistics.
+    keys_to_exclude_from_results: keys in tensor_dict that will be excluded
+      from results_list. Note that the tensors corresponding to these keys will
+      still be evaluated for each batch, but won't be added to results_list.
+
+  Raises:
+    ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
+      one element.
+    ValueError: if save_graph is True and save_graph_dir is not defined.
+  """
+  if save_graph and not save_graph_dir:
+    raise ValueError('`save_graph_dir` must be defined.')
+  sess = tf.Session(master, graph=tf.get_default_graph())
+  sess.run(tf.global_variables_initializer())
+  sess.run(tf.local_variables_initializer())
+  if restore_fn:
+    restore_fn(sess)
+  else:
+    if not checkpoint_dirs:
+      raise ValueError('`checkpoint_dirs` must have at least one entry.')
+    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
+    saver = tf.train.Saver(variables_to_restore)
+    saver.restore(sess, checkpoint_file)
+
+  if save_graph:
+    tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
+
+  valid_keys = list(set(tensor_dict.keys()) - set(keys_to_exclude_from_results))
+  result_lists = {key: [] for key in valid_keys}
+  counters = {'skipped': 0, 'success': 0}
+  other_metrics = None
+  with tf.contrib.slim.queues.QueueRunners(sess):
+    try:
+      for batch in range(int(num_batches)):
+        if (batch + 1) % 100 == 0:
+          logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
+        if not batch_processor:
+          try:
+            (result_dict, _) = sess.run([tensor_dict, update_op])
+            counters['success'] += 1
+          except tf.errors.InvalidArgumentError:
+            logging.info('Skipping image')
+            counters['skipped'] += 1
+            result_dict = {}
+        else:
+          result_dict = batch_processor(
+              tensor_dict, sess, batch, counters, update_op)
+        for key in result_dict:
+          if key in valid_keys:
+            result_lists[key].append(result_dict[key])
+      if metric_names_to_values is not None:
+        other_metrics = sess.run(metric_names_to_values)
+      logging.info('Running eval batches done.')
+    except tf.errors.OutOfRangeError:
+      logging.info('Done evaluating -- epoch limit reached')
+    finally:
+      # When done, ask the threads to stop.
+      metrics = aggregated_result_processor(result_lists)
+      if other_metrics is not None:
+        metrics.update(other_metrics)
+      global_step = tf.train.global_step(sess, slim.get_global_step())
+      write_metrics(metrics, global_step, summary_dir)
+      logging.info('# success: %d', counters['success'])
+      logging.info('# skipped: %d', counters['skipped'])
+  sess.close()
+
+
+# TODO: Add tests.
+def repeated_checkpoint_run(tensor_dict,
+                            update_op,
+                            summary_dir,
+                            aggregated_result_processor=None,
+                            batch_processor=None,
+                            checkpoint_dirs=None,
+                            variables_to_restore=None,
+                            restore_fn=None,
+                            num_batches=1,
+                            eval_interval_secs=120,
+                            max_number_of_evaluations=None,
+                            master='',
+                            save_graph=False,
+                            save_graph_dir='',
+                            metric_names_to_values=None,
+                            keys_to_exclude_from_results=()):
+  """Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
+
+  This function repeatedly loads a checkpoint and evaluates a desired
+  set of tensors (provided by tensor_dict) and hands the resulting numpy
+  arrays to a function result_processor which can be used to further
+  process/save/visualize the results.
+
+  Args:
+    tensor_dict: a dictionary holding tensors representing a batch of detections
+      and corresponding groundtruth annotations.
+    update_op: a tensorflow update op that will run for each batch along with
+      the tensors in tensor_dict.
+    summary_dir: a directory to write metrics summaries.
+    aggregated_result_processor: a function taking one argument:
+      1. result_lists: a dictionary with keys matching those in tensor_dict
+        and corresponding values being the list of results for each tensor
+        in tensor_dict.  The length of each such list is num_batches.
+    batch_processor: a function taking three arguments:
+      1. tensor_dict: the same tensor_dict that is passed in as the first
+        argument to this function.
+      2. sess: a tensorflow session
+      3. batch_index: an integer representing the index of the batch amongst
+        all batches
+      4. update_op: a tensorflow update op that will run for each batch.
+      and returns result_dict, a dictionary of results for that batch.
+      By default, batch_processor is None, which defaults to running:
+        return sess.run(tensor_dict)
+    checkpoint_dirs: list of directories to load into a DetectionModel or an
+      EnsembleModel if restore_fn isn't set. Also used to determine when to run
+      next evaluation. Must have at least one element.
+    variables_to_restore: None, or a dictionary mapping variable names found in
+      a checkpoint to model variables. The dictionary would normally be
+      generated by creating a tf.train.ExponentialMovingAverage object and
+      calling its variables_to_restore() method. Not used if restore_fn is set.
+    restore_fn: a function that takes a tf.Session object and correctly restores
+      all necessary variables from the correct checkpoint file.
+    num_batches: the number of batches to use for evaluation.
+    eval_interval_secs: the number of seconds between each evaluation run.
+    max_number_of_evaluations: the max number of iterations of the evaluation.
+      If the value is left as None the evaluation continues indefinitely.
+    master: the location of the Tensorflow session.
+    save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
+    save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
+      is True this must be non-empty.
+    metric_names_to_values: A dictionary containing metric names to tensors
+      which will be evaluated after processing all batches
+      of [tensor_dict, update_op]. If any metrics depend on statistics computed
+      during each batch ensure that `update_op` tensor has a control dependency
+      on the update ops that compute the statistics.
+    keys_to_exclude_from_results: keys in tensor_dict that will be excluded
+      from results_list. Note that the tensors corresponding to these keys will
+      still be evaluated for each batch, but won't be added to results_list.
+
+  Raises:
+    ValueError: if max_num_of_evaluations is not None or a positive number.
+    ValueError: if checkpoint_dirs doesn't have at least one element.
+  """
+  if max_number_of_evaluations and max_number_of_evaluations <= 0:
+    raise ValueError(
+        '`number_of_steps` must be either None or a positive number.')
+
+  if not checkpoint_dirs:
+    raise ValueError('`checkpoint_dirs` must have at least one entry.')
+
+  last_evaluated_model_path = None
+  number_of_evaluations = 0
+  while True:
+    start = time.time()
+    logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
+                                                           time.gmtime()))
+    model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
+    if not model_path:
+      logging.info('No model found in %s. Will try again in %d seconds',
+                   checkpoint_dirs[0], eval_interval_secs)
+    elif model_path == last_evaluated_model_path:
+      logging.info('Found already evaluated checkpoint. Will try again in %d '
+                   'seconds', eval_interval_secs)
+    else:
+      last_evaluated_model_path = model_path
+      run_checkpoint_once(tensor_dict, update_op, summary_dir,
+                          aggregated_result_processor,
+                          batch_processor, checkpoint_dirs,
+                          variables_to_restore, restore_fn, num_batches, master,
+                          save_graph, save_graph_dir, metric_names_to_values,
+                          keys_to_exclude_from_results)
+    number_of_evaluations += 1
+
+    if (max_number_of_evaluations and
+        number_of_evaluations >= max_number_of_evaluations):
+      logging.info('Finished evaluation!')
+      break
+    time_to_next_eval = start + eval_interval_secs - time.time()
+    if time_to_next_eval > 0:
+      time.sleep(time_to_next_eval)
--- a/object_detection/evaluator.py
+++ b/object_detection/evaluator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Detection model evaluator.
+
+This file provides a generic evaluation method that can be used to evaluate a
+DetectionModel.
+"""
+import logging
+import tensorflow as tf
+
+from object_detection import eval_util
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import prefetcher
+from object_detection.core import standard_fields as fields
+from object_detection.utils import ops
+
+slim = tf.contrib.slim
+
+EVAL_METRICS_FN_DICT = {
+    'pascal_voc_metrics': eval_util.evaluate_detection_results_pascal_voc
+}
+
+
+def _extract_prediction_tensors(model,
+                                create_input_dict_fn,
+                                ignore_groundtruth=False):
+  """Restores the model in a tensorflow session.
+
+  Args:
+    model: model to perform predictions with.
+    create_input_dict_fn: function to create input tensor dictionaries.
+    ignore_groundtruth: whether groundtruth should be ignored.
+
+  Returns:
+    tensor_dict: A tensor dictionary with evaluations.
+  """
+  input_dict = create_input_dict_fn()
+  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
+  input_dict = prefetch_queue.dequeue()
+  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
+  preprocessed_image = model.preprocess(tf.to_float(original_image))
+  prediction_dict = model.predict(preprocessed_image)
+  detections = model.postprocess(prediction_dict)
+
+  original_image_shape = tf.shape(original_image)
+  absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
+      box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
+      original_image_shape[1], original_image_shape[2])
+  label_id_offset = 1
+  tensor_dict = {
+      'original_image': original_image,
+      'image_id': input_dict[fields.InputDataFields.source_id],
+      'detection_boxes': absolute_detection_boxlist.get(),
+      'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
+      'detection_classes': (
+          tf.squeeze(detections['detection_classes'], axis=0) +
+          label_id_offset),
+  }
+  if 'detection_masks' in detections:
+    detection_masks = tf.squeeze(detections['detection_masks'],
+                                 axis=0)
+    detection_boxes = tf.squeeze(detections['detection_boxes'],
+                                 axis=0)
+    # TODO: This should be done in model's postprocess function ideally.
+    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
+        detection_masks,
+        detection_boxes,
+        original_image_shape[1], original_image_shape[2])
+    detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
+                                                      0.5))
+
+    tensor_dict['detection_masks'] = detection_masks_reframed
+  # load groundtruth fields into tensor_dict
+  if not ignore_groundtruth:
+    normalized_gt_boxlist = box_list.BoxList(
+        input_dict[fields.InputDataFields.groundtruth_boxes])
+    gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
+                                    tf.shape(original_image)[1],
+                                    tf.shape(original_image)[2])
+    groundtruth_boxes = gt_boxlist.get()
+    groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
+    tensor_dict['groundtruth_boxes'] = groundtruth_boxes
+    tensor_dict['groundtruth_classes'] = groundtruth_classes
+    tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
+    tensor_dict['is_crowd'] = input_dict[
+        fields.InputDataFields.groundtruth_is_crowd]
+    tensor_dict['difficult'] = input_dict[
+        fields.InputDataFields.groundtruth_difficult]
+    if 'detection_masks' in tensor_dict:
+      tensor_dict['groundtruth_instance_masks'] = input_dict[
+          fields.InputDataFields.groundtruth_instance_masks]
+  return tensor_dict
+
+
+def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
+             checkpoint_dir, eval_dir):
+  """Evaluation function for detection models.
+
+  Args:
+    create_input_dict_fn: a function to create a tensor input dictionary.
+    create_model_fn: a function that creates a DetectionModel.
+    eval_config: a eval_pb2.EvalConfig protobuf.
+    categories: a list of category dictionaries. Each dict in the list should
+                have an integer 'id' field and string 'name' field.
+    checkpoint_dir: directory to load the checkpoints to evaluate from.
+    eval_dir: directory to write evaluation metrics summary to.
+  """
+
+  model = create_model_fn()
+
+  if eval_config.ignore_groundtruth and not eval_config.export_path:
+    logging.fatal('If ignore_groundtruth=True then an export_path is '
+                  'required. Aborting!!!')
+
+  tensor_dict = _extract_prediction_tensors(
+      model=model,
+      create_input_dict_fn=create_input_dict_fn,
+      ignore_groundtruth=eval_config.ignore_groundtruth)
+
+  def _process_batch(tensor_dict, sess, batch_index, counters, update_op):
+    """Evaluates tensors in tensor_dict, visualizing the first K examples.
+
+    This function calls sess.run on tensor_dict, evaluating the original_image
+    tensor only on the first K examples and visualizing detections overlaid
+    on this original_image.
+
+    Args:
+      tensor_dict: a dictionary of tensors
+      sess: tensorflow session
+      batch_index: the index of the batch amongst all batches in the run.
+      counters: a dictionary holding 'success' and 'skipped' fields which can
+        be updated to keep track of number of successful and failed runs,
+        respectively.  If these fields are not updated, then the success/skipped
+        counter values shown at the end of evaluation will be incorrect.
+      update_op: An update op that has to be run along with output tensors. For
+        example this could be an op to compute statistics for slim metrics.
+
+    Returns:
+      result_dict: a dictionary of numpy arrays
+    """
+    if batch_index >= eval_config.num_visualizations:
+      if 'original_image' in tensor_dict:
+        tensor_dict = {k: v for (k, v) in tensor_dict.items()
+                       if k != 'original_image'}
+    try:
+      (result_dict, _) = sess.run([tensor_dict, update_op])
+      counters['success'] += 1
+    except tf.errors.InvalidArgumentError:
+      logging.info('Skipping image')
+      counters['skipped'] += 1
+      return {}
+    global_step = tf.train.global_step(sess, slim.get_global_step())
+    if batch_index < eval_config.num_visualizations:
+      tag = 'image-{}'.format(batch_index)
+      eval_util.visualize_detection_results(
+          result_dict, tag, global_step, categories=categories,
+          summary_dir=eval_dir,
+          export_dir=eval_config.visualization_export_dir,
+          show_groundtruth=eval_config.visualization_export_dir)
+    return result_dict
+
+  def _process_aggregated_results(result_lists):
+    eval_metric_fn_key = eval_config.metrics_set
+    if eval_metric_fn_key not in EVAL_METRICS_FN_DICT:
+      raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
+    return EVAL_METRICS_FN_DICT[eval_metric_fn_key](result_lists,
+                                                    categories=categories)
+
+  variables_to_restore = tf.global_variables()
+  global_step = slim.get_or_create_global_step()
+  variables_to_restore.append(global_step)
+  if eval_config.use_moving_averages:
+    variable_averages = tf.train.ExponentialMovingAverage(0.0)
+    variables_to_restore = variable_averages.variables_to_restore()
+  saver = tf.train.Saver(variables_to_restore)
+  def _restore_latest_checkpoint(sess):
+    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
+    saver.restore(sess, latest_checkpoint)
+
+  eval_util.repeated_checkpoint_run(
+      tensor_dict=tensor_dict,
+      update_op=tf.no_op(),
+      summary_dir=eval_dir,
+      aggregated_result_processor=_process_aggregated_results,
+      batch_processor=_process_batch,
+      checkpoint_dirs=[checkpoint_dir],
+      variables_to_restore=None,
+      restore_fn=_restore_latest_checkpoint,
+      num_batches=eval_config.num_examples,
+      eval_interval_secs=eval_config.eval_interval_secs,
+      max_number_of_evaluations=(
+          1 if eval_config.ignore_groundtruth else
+          eval_config.max_evals if eval_config.max_evals else
+          None),
+      master=eval_config.eval_master,
+      save_graph=eval_config.save_graph,
+      save_graph_dir=(eval_dir if eval_config.save_graph else ''))