Add Tensorflow Object Detection API. (#1561)

For details see our paper: "Speed/accuracy trade-offs for modern convolutional object detectors." Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017 https://arxiv.org/abs/1611.10012

Add Tensorflow Object Detection API. (#1561)
For details see our paper: "Speed/accuracy trade-offs for modern convolutional object detectors." Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017 https://arxiv.org/abs/1611.10012
a4944a57 · derekjchow · Sergio Guadarrama · 60c3ed2e · a4944a57 · a4944a57
Commit a4944a57 authored Jun 14, 2017 by derekjchow Committed by Sergio Guadarrama Jun 14, 2017
20 changed files
--- a/object_detection/samples/configs/ssd_inception_v2_pets.config
+++ b/object_detection/samples/configs/ssd_inception_v2_pets.config
+# SSD with Inception v2 configured for Oxford-IIT Pets Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  ssd {
+    num_classes: 37
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+        reduce_boxes_in_lowest_layer: true
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 300
+        width: 300
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 3
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_inception_v2'
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.9997,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid {
+          anchorwise_output: true
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          anchorwise_output: true
+        }
+      }
+      hard_example_miner {
+        num_hard_examples: 3000
+        iou_threshold: 0.99
+        loss_type: CLASSIFICATION
+        max_negatives_per_positive: 3
+        min_negatives_per_image: 0
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 32
+  optimizer {
+    rms_prop_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.004
+          decay_steps: 800720
+          decay_factor: 0.95
+        }
+      }
+      momentum_optimizer_value: 0.9
+      decay: 0.9
+      epsilon: 1.0
+    }
+  }
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
+
+eval_config: {
+  num_examples: 2000
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
--- a/object_detection/samples/configs/ssd_mobilenet_v1_pets.config
+++ b/object_detection/samples/configs/ssd_mobilenet_v1_pets.config
+# SSD with Mobilenet v1, configured for Oxford-IIT Pets Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  ssd {
+    num_classes: 37
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    anchor_generator {
+      ssd_anchor_generator {
+        num_layers: 6
+        min_scale: 0.2
+        max_scale: 0.95
+        aspect_ratios: 1.0
+        aspect_ratios: 2.0
+        aspect_ratios: 0.5
+        aspect_ratios: 3.0
+        aspect_ratios: 0.3333
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 300
+        width: 300
+      }
+    }
+    box_predictor {
+      convolutional_box_predictor {
+        min_depth: 0
+        max_depth: 0
+        num_layers_before_predictor: 0
+        use_dropout: false
+        dropout_keep_probability: 0.8
+        kernel_size: 1
+        box_code_size: 4
+        apply_sigmoid_to_scores: false
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.00004
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.03
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            train: true,
+            scale: true,
+            center: true,
+            decay: 0.9997,
+            epsilon: 0.001,
+          }
+        }
+      }
+    }
+    feature_extractor {
+      type: 'ssd_mobilenet_v1'
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.00004
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          train: true,
+          scale: true,
+          center: true,
+          decay: 0.9997,
+          epsilon: 0.001,
+        }
+      }
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid {
+          anchorwise_output: true
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+          anchorwise_output: true
+        }
+      }
+      hard_example_miner {
+        num_hard_examples: 3000
+        iou_threshold: 0.99
+        loss_type: CLASSIFICATION
+        max_negatives_per_positive: 3
+        min_negatives_per_image: 0
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 1e-8
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 100
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config: {
+  batch_size: 32
+  optimizer {
+    rms_prop_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.004
+          decay_steps: 800720
+          decay_factor: 0.95
+        }
+      }
+      momentum_optimizer_value: 0.9
+      decay: 0.9
+      epsilon: 1.0
+    }
+  }
+  fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
+  from_detection_checkpoint: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    ssd_random_crop {
+    }
+  }
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
+
+eval_config: {
+  num_examples: 2000
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
+}
--- a/object_detection/test_images/image1.jpg
+++ b/object_detection/test_images/image1.jpg
--- a/object_detection/test_images/image2.jpg
+++ b/object_detection/test_images/image2.jpg
--- a/object_detection/test_images/image_info.txt
+++ b/object_detection/test_images/image_info.txt
+
+Image provenance:
+image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg
+image2.jpg: Michael Miley,
+  https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4
+
--- a/object_detection/train.py
+++ b/object_detection/train.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Training executable for detection models.
+
+This executable is used to train DetectionModels. There are two ways of
+configuring the training job:
+
+1) A single pipeline_pb2.TrainEvalPipelineConfig configuration file
+can be specified by --pipeline_config_path.
+
+Example usage:
+    ./train \
+        --logtostderr \
+        --train_dir=path/to/train_dir \
+        --pipeline_config_path=pipeline_config.pbtxt
+
+2) Three configuration files can be provided: a model_pb2.DetectionModel
+configuration file to define what type of DetectionModel is being trained, an
+input_reader_pb2.InputReader file to specify what training data will be used and
+a train_pb2.TrainConfig file to configure training parameters.
+
+Example usage:
+    ./train \
+        --logtostderr \
+        --train_dir=path/to/train_dir \
+        --model_config_path=model_config.pbtxt \
+        --train_config_path=train_config.pbtxt \
+        --input_config_path=train_input_config.pbtxt
+"""
+
+import functools
+import json
+import os
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection import trainer
+from object_detection.builders import input_reader_builder
+from object_detection.builders import model_builder
+from object_detection.protos import input_reader_pb2
+from object_detection.protos import model_pb2
+from object_detection.protos import pipeline_pb2
+from object_detection.protos import train_pb2
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+flags = tf.app.flags
+flags.DEFINE_string('master', '', 'BNS name of the TensorFlow master to use.')
+flags.DEFINE_integer('task', 0, 'task id')
+flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
+flags.DEFINE_boolean('clone_on_cpu', False,
+                     'Force clones to be deployed on CPU.  Note that even if '
+                     'set to False (allowing ops to run on gpu), some ops may '
+                     'still be run on the CPU if they have no GPU kernel.')
+flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
+                     'replicas.')
+flags.DEFINE_integer('ps_tasks', 0,
+                     'Number of parameter server tasks. If None, does not use '
+                     'a parameter server.')
+flags.DEFINE_string('train_dir', '',
+                    'Directory to save the checkpoints and training summaries.')
+
+flags.DEFINE_string('pipeline_config_path', '',
+                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+                    'file. If provided, other configs are ignored')
+
+flags.DEFINE_string('train_config_path', '',
+                    'Path to a train_pb2.TrainConfig config file.')
+flags.DEFINE_string('input_config_path', '',
+                    'Path to an input_reader_pb2.InputReader config file.')
+flags.DEFINE_string('model_config_path', '',
+                    'Path to a model_pb2.DetectionModel config file.')
+
+FLAGS = flags.FLAGS
+
+
+def get_configs_from_pipeline_file():
+  """Reads training configuration from a pipeline_pb2.TrainEvalPipelineConfig.
+
+  Reads training config from file specified by pipeline_config_path flag.
+
+  Returns:
+    model_config: model_pb2.DetectionModel
+    train_config: train_pb2.TrainConfig
+    input_config: input_reader_pb2.InputReader
+  """
+  pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+  with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
+    text_format.Merge(f.read(), pipeline_config)
+
+  model_config = pipeline_config.model
+  train_config = pipeline_config.train_config
+  input_config = pipeline_config.train_input_reader
+
+  return model_config, train_config, input_config
+
+
+def get_configs_from_multiple_files():
+  """Reads training configuration from multiple config files.
+
+  Reads the training config from the following files:
+    model_config: Read from --model_config_path
+    train_config: Read from --train_config_path
+    input_config: Read from --input_config_path
+
+  Returns:
+    model_config: model_pb2.DetectionModel
+    train_config: train_pb2.TrainConfig
+    input_config: input_reader_pb2.InputReader
+  """
+  train_config = train_pb2.TrainConfig()
+  with tf.gfile.GFile(FLAGS.train_config_path, 'r') as f:
+    text_format.Merge(f.read(), train_config)
+
+  model_config = model_pb2.DetectionModel()
+  with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
+    text_format.Merge(f.read(), model_config)
+
+  input_config = input_reader_pb2.InputReader()
+  with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
+    text_format.Merge(f.read(), input_config)
+
+  return model_config, train_config, input_config
+
+
+def main(_):
+  assert FLAGS.train_dir, '`train_dir` is missing.'
+  if FLAGS.pipeline_config_path:
+    model_config, train_config, input_config = get_configs_from_pipeline_file()
+  else:
+    model_config, train_config, input_config = get_configs_from_multiple_files()
+
+  model_fn = functools.partial(
+      model_builder.build,
+      model_config=model_config,
+      is_training=True)
+
+  create_input_dict_fn = functools.partial(
+      input_reader_builder.build, input_config)
+
+  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
+  cluster_data = env.get('cluster', None)
+  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
+  task_data = env.get('task', None) or {'type': 'master', 'index': 0}
+  task_info = type('TaskSpec', (object,), task_data)
+
+  # Parameters for a single worker.
+  ps_tasks = 0
+  worker_replicas = 1
+  worker_job_name = 'lonely_worker'
+  task = 0
+  is_chief = True
+  master = ''
+
+  if cluster_data and 'worker' in cluster_data:
+    # Number of total worker replicas include "worker"s and the "master".
+    worker_replicas = len(cluster_data['worker']) + 1
+  if cluster_data and 'ps' in cluster_data:
+    ps_tasks = len(cluster_data['ps'])
+
+  if worker_replicas > 1 and ps_tasks < 1:
+    raise ValueError('At least 1 ps task is needed for distributed training.')
+
+  if worker_replicas >= 1 and ps_tasks > 0:
+    # Set up distributed training.
+    server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
+                             job_name=task_info.type,
+                             task_index=task_info.index)
+    if task_info.type == 'ps':
+      server.join()
+      return
+
+    worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
+    task = task_info.index
+    is_chief = (task_info.type == 'master')
+    master = server.target
+
+  trainer.train(create_input_dict_fn, model_fn, train_config, master, task,
+                FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks,
+                worker_job_name, is_chief, FLAGS.train_dir)
+
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/object_detection/trainer.py
+++ b/object_detection/trainer.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Detection model trainer.
+
+This file provides a generic training method that can be used to train a
+DetectionModel.
+"""
+
+import functools
+
+import tensorflow as tf
+
+from object_detection.builders import optimizer_builder
+from object_detection.builders import preprocessor_builder
+from object_detection.core import batcher
+from object_detection.core import preprocessor
+from object_detection.core import standard_fields as fields
+from object_detection.utils import ops as util_ops
+from object_detection.utils import variables_helper
+from deployment import model_deploy
+
+slim = tf.contrib.slim
+
+
+def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
+                        batch_queue_capacity, num_batch_queue_threads,
+                        prefetch_queue_capacity, data_augmentation_options):
+  """Sets up reader, prefetcher and returns input queue.
+
+  Args:
+    batch_size_per_clone: batch size to use per clone.
+    create_tensor_dict_fn: function to create tensor dictionary.
+    batch_queue_capacity: maximum number of elements to store within a queue.
+    num_batch_queue_threads: number of threads to use for batching.
+    prefetch_queue_capacity: maximum capacity of the queue used to prefetch
+                             assembled batches.
+    data_augmentation_options: a list of tuples, where each tuple contains a
+      data augmentation function and a dictionary containing arguments and their
+      values (see preprocessor.py).
+
+  Returns:
+    input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
+      (which hold images, boxes and targets).  To get a batch of tensor_dicts,
+      call input_queue.Dequeue().
+  """
+  tensor_dict = create_tensor_dict_fn()
+
+  tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
+      tensor_dict[fields.InputDataFields.image], 0)
+
+  images = tensor_dict[fields.InputDataFields.image]
+  float_images = tf.to_float(images)
+  tensor_dict[fields.InputDataFields.image] = float_images
+
+  if data_augmentation_options:
+    tensor_dict = preprocessor.preprocess(tensor_dict,
+                                          data_augmentation_options)
+
+  input_queue = batcher.BatchQueue(
+      tensor_dict,
+      batch_size=batch_size_per_clone,
+      batch_queue_capacity=batch_queue_capacity,
+      num_batch_queue_threads=num_batch_queue_threads,
+      prefetch_queue_capacity=prefetch_queue_capacity)
+  return input_queue
+
+
+def _get_inputs(input_queue, num_classes):
+  """Dequeue batch and construct inputs to object detection model.
+
+  Args:
+    input_queue: BatchQueue object holding enqueued tensor_dicts.
+    num_classes: Number of classes.
+
+  Returns:
+    images: a list of 3-D float tensor of images.
+    locations_list: a list of tensors of shape [num_boxes, 4]
+      containing the corners of the groundtruth boxes.
+    classes_list: a list of padded one-hot tensors containing target classes.
+    masks_list: a list of 3-D float tensors of shape [num_boxes, image_height,
+      image_width] containing instance masks for objects if present in the
+      input_queue. Else returns None.
+  """
+  read_data_list = input_queue.dequeue()
+  label_id_offset = 1
+  def extract_images_and_targets(read_data):
+    image = read_data[fields.InputDataFields.image]
+    location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
+    classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
+                         tf.int32)
+    classes_gt -= label_id_offset
+    classes_gt = util_ops.padded_one_hot_encoding(indices=classes_gt,
+                                                  depth=num_classes, left_pad=0)
+    masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks)
+    return image, location_gt, classes_gt, masks_gt
+  return zip(*map(extract_images_and_targets, read_data_list))
+
+
+def _create_losses(input_queue, create_model_fn):
+  """Creates loss function for a DetectionModel.
+
+  Args:
+    input_queue: BatchQueue object holding enqueued tensor_dicts.
+    create_model_fn: A function to create the DetectionModel.
+  """
+  detection_model = create_model_fn()
+  (images, groundtruth_boxes_list, groundtruth_classes_list,
+   groundtruth_masks_list
+  ) = _get_inputs(input_queue, detection_model.num_classes)
+  images = [detection_model.preprocess(image) for image in images]
+  images = tf.concat(images, 0)
+  if any(mask is None for mask in groundtruth_masks_list):
+    groundtruth_masks_list = None
+
+  detection_model.provide_groundtruth(groundtruth_boxes_list,
+                                      groundtruth_classes_list,
+                                      groundtruth_masks_list)
+  prediction_dict = detection_model.predict(images)
+
+  losses_dict = detection_model.loss(prediction_dict)
+  for loss_tensor in losses_dict.values():
+    tf.losses.add_loss(loss_tensor)
+
+
+def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
+          num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name,
+          is_chief, train_dir):
+  """Training function for detection models.
+
+  Args:
+    create_tensor_dict_fn: a function to create a tensor input dictionary.
+    create_model_fn: a function that creates a DetectionModel and generates
+                     losses.
+    train_config: a train_pb2.TrainConfig protobuf.
+    master: BNS name of the TensorFlow master to use.
+    task: The task id of this training instance.
+    num_clones: The number of clones to run per machine.
+    worker_replicas: The number of work replicas to train with.
+    clone_on_cpu: True if clones should be forced to run on CPU.
+    ps_tasks: Number of parameter server tasks.
+    worker_job_name: Name of the worker job.
+    is_chief: Whether this replica is the chief replica.
+    train_dir: Directory to write checkpoints and training summaries to.
+  """
+
+  detection_model = create_model_fn()
+  data_augmentation_options = [
+      preprocessor_builder.build(step)
+      for step in train_config.data_augmentation_options]
+
+  with tf.Graph().as_default():
+    # Build a configuration specifying multi-GPU and multi-replicas.
+    deploy_config = model_deploy.DeploymentConfig(
+        num_clones=num_clones,
+        clone_on_cpu=clone_on_cpu,
+        replica_id=task,
+        num_replicas=worker_replicas,
+        num_ps_tasks=ps_tasks,
+        worker_job_name=worker_job_name)
+
+    # Place the global step on the device storing the variables.
+    with tf.device(deploy_config.variables_device()):
+      global_step = slim.create_global_step()
+
+    with tf.device(deploy_config.inputs_device()):
+      input_queue = _create_input_queue(train_config.batch_size // num_clones,
+                                        create_tensor_dict_fn,
+                                        train_config.batch_queue_capacity,
+                                        train_config.num_batch_queue_threads,
+                                        train_config.prefetch_queue_capacity,
+                                        data_augmentation_options)
+
+    # Gather initial summaries.
+    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
+    global_summaries = set([])
+
+    model_fn = functools.partial(_create_losses,
+                                 create_model_fn=create_model_fn)
+    clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue])
+    first_clone_scope = clones[0].scope
+
+    # Gather update_ops from the first clone. These contain, for example,
+    # the updates for the batch_norm variables created by model_fn.
+    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
+
+    with tf.device(deploy_config.optimizer_device()):
+      training_optimizer = optimizer_builder.build(train_config.optimizer,
+                                                   global_summaries)
+
+    sync_optimizer = None
+    if train_config.sync_replicas:
+      training_optimizer = tf.SyncReplicasOptimizer(
+          training_optimizer,
+          replicas_to_aggregate=train_config.replicas_to_aggregate,
+          total_num_replicas=train_config.worker_replicas)
+      sync_optimizer = training_optimizer
+
+    # Create ops required to initialize the model from a given checkpoint.
+    init_fn = None
+    if train_config.fine_tune_checkpoint:
+      init_fn = detection_model.restore_fn(
+          train_config.fine_tune_checkpoint,
+          from_detection_checkpoint=train_config.from_detection_checkpoint)
+
+    with tf.device(deploy_config.optimizer_device()):
+      total_loss, grads_and_vars = model_deploy.optimize_clones(
+          clones, training_optimizer, regularization_losses=None)
+      total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')
+
+      # Optionally multiply bias gradients by train_config.bias_grad_multiplier.
+      if train_config.bias_grad_multiplier:
+        biases_regex_list = ['.*/biases']
+        grads_and_vars = variables_helper.multiply_gradients_matching_regex(
+            grads_and_vars,
+            biases_regex_list,
+            multiplier=train_config.bias_grad_multiplier)
+
+      # Optionally freeze some layers by setting their gradients to be zero.
+      if train_config.freeze_variables:
+        grads_and_vars = variables_helper.freeze_gradients_matching_regex(
+            grads_and_vars, train_config.freeze_variables)
+
+      # Optionally clip gradients
+      if train_config.gradient_clipping_by_norm > 0:
+        with tf.name_scope('clip_grads'):
+          grads_and_vars = slim.learning.clip_gradient_norms(
+              grads_and_vars, train_config.gradient_clipping_by_norm)
+
+      # Create gradient updates.
+      grad_updates = training_optimizer.apply_gradients(grads_and_vars,
+                                                        global_step=global_step)
+      update_ops.append(grad_updates)
+
+      update_op = tf.group(*update_ops)
+      with tf.control_dependencies([update_op]):
+        train_tensor = tf.identity(total_loss, name='train_op')
+
+    # Add summaries.
+    for model_var in slim.get_model_variables():
+      global_summaries.add(tf.summary.histogram(model_var.op.name, model_var))
+    for loss_tensor in tf.losses.get_losses():
+      global_summaries.add(tf.summary.scalar(loss_tensor.op.name, loss_tensor))
+    global_summaries.add(
+        tf.summary.scalar('TotalLoss', tf.losses.get_total_loss()))
+
+    # Add the summaries from the first clone. These contain the summaries
+    # created by model_fn and either optimize_clones() or _gather_clone_loss().
+    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
+                                       first_clone_scope))
+    summaries |= global_summaries
+
+    # Merge all summaries together.
+    summary_op = tf.summary.merge(list(summaries), name='summary_op')
+
+    # Soft placement allows placing on CPU ops without GPU implementation.
+    session_config = tf.ConfigProto(allow_soft_placement=True,
+                                    log_device_placement=False)
+
+    # Save checkpoints regularly.
+    keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours
+    saver = tf.train.Saver(
+        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
+
+    slim.learning.train(
+        train_tensor,
+        logdir=train_dir,
+        master=master,
+        is_chief=is_chief,
+        session_config=session_config,
+        startup_delay_steps=train_config.startup_delay_steps,
+        init_fn=init_fn,
+        summary_op=summary_op,
+        number_of_steps=(
+            train_config.num_steps if train_config.num_steps else None),
+        save_summaries_secs=120,
+        sync_optimizer=sync_optimizer,
+        saver=saver)
--- a/object_detection/trainer_test.py
+++ b/object_detection/trainer_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.trainer."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection import trainer
+from object_detection.core import losses
+from object_detection.core import model
+from object_detection.core import standard_fields as fields
+from object_detection.protos import train_pb2
+
+
+NUMBER_OF_CLASSES = 2
+
+
+def get_input_function():
+  """A function to get test inputs. Returns an image with one box."""
+  image = tf.random_uniform([32, 32, 3], dtype=tf.float32)
+  class_label = tf.random_uniform(
+      [1], minval=0, maxval=NUMBER_OF_CLASSES, dtype=tf.int32)
+  box_label = tf.random_uniform(
+      [1, 4], minval=0.4, maxval=0.6, dtype=tf.float32)
+
+  return {
+      fields.InputDataFields.image: image,
+      fields.InputDataFields.groundtruth_classes: class_label,
+      fields.InputDataFields.groundtruth_boxes: box_label
+  }
+
+
+class FakeDetectionModel(model.DetectionModel):
+  """A simple (and poor) DetectionModel for use in test."""
+
+  def __init__(self):
+    super(FakeDetectionModel, self).__init__(num_classes=NUMBER_OF_CLASSES)
+    self._classification_loss = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    self._localization_loss = losses.WeightedSmoothL1LocalizationLoss(
+        anchorwise_output=True)
+
+  def preprocess(self, inputs):
+    """Input preprocessing, resizes images to 28x28.
+
+    Args:
+      inputs: a [batch, height_in, width_in, channels] float32 tensor
+        representing a batch of images with values between 0 and 255.0.
+
+    Returns:
+      preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
+    """
+    return tf.image.resize_images(inputs, [28, 28])
+
+  def predict(self, preprocessed_inputs):
+    """Prediction tensors from inputs tensor.
+
+    Args:
+      preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
+
+    Returns:
+      prediction_dict: a dictionary holding prediction tensors to be
+        passed to the Loss or Postprocess functions.
+    """
+    flattened_inputs = tf.contrib.layers.flatten(preprocessed_inputs)
+    class_prediction = tf.contrib.layers.fully_connected(
+        flattened_inputs, self._num_classes)
+    box_prediction = tf.contrib.layers.fully_connected(flattened_inputs, 4)
+
+    return {
+        'class_predictions_with_background': tf.reshape(
+            class_prediction, [-1, 1, self._num_classes]),
+        'box_encodings': tf.reshape(box_prediction, [-1, 1, 4])
+    }
+
+  def postprocess(self, prediction_dict, **params):
+    """Convert predicted output tensors to final detections. Unused.
+
+    Args:
+      prediction_dict: a dictionary holding prediction tensors.
+      **params: Additional keyword arguments for specific implementations of
+        DetectionModel.
+
+    Returns:
+      detections: a dictionary with empty fields.
+    """
+    return {
+        'detection_boxes': None,
+        'detection_scores': None,
+        'detection_classes': None,
+        'num_detections': None
+    }
+
+  def loss(self, prediction_dict):
+    """Compute scalar loss tensors with respect to provided groundtruth.
+
+    Calling this function requires that groundtruth tensors have been
+    provided via the provide_groundtruth function.
+
+    Args:
+      prediction_dict: a dictionary holding predicted tensors
+
+    Returns:
+      a dictionary mapping strings (loss names) to scalar tensors representing
+        loss values.
+    """
+    batch_reg_targets = tf.stack(
+        self.groundtruth_lists(fields.BoxListFields.boxes))
+    batch_cls_targets = tf.stack(
+        self.groundtruth_lists(fields.BoxListFields.classes))
+    weights = tf.constant(
+        1.0, dtype=tf.float32,
+        shape=[len(self.groundtruth_lists(fields.BoxListFields.boxes)), 1])
+
+    location_losses = self._localization_loss(
+        prediction_dict['box_encodings'], batch_reg_targets,
+        weights=weights)
+    cls_losses = self._classification_loss(
+        prediction_dict['class_predictions_with_background'], batch_cls_targets,
+        weights=weights)
+
+    loss_dict = {
+        'localization_loss': tf.reduce_sum(location_losses),
+        'classification_loss': tf.reduce_sum(cls_losses),
+    }
+    return loss_dict
+
+  def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
+    """Return callable for loading a checkpoint into the tensorflow graph.
+
+    Args:
+      checkpoint_path: path to checkpoint to restore.
+      from_detection_checkpoint: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+
+    Returns:
+      a callable which takes a tf.Session and does nothing.
+    """
+    def restore(unused_sess):
+      return
+    return restore
+
+
+class TrainerTest(tf.test.TestCase):
+
+  def test_configure_trainer_and_train_two_steps(self):
+    train_config_text_proto = """
+    optimizer {
+      adam_optimizer {
+        learning_rate {
+          constant_learning_rate {
+            learning_rate: 0.01
+          }
+        }
+      }
+    }
+    data_augmentation_options {
+      random_adjust_brightness {
+        max_delta: 0.2
+      }
+    }
+    data_augmentation_options {
+      random_adjust_contrast {
+        min_delta: 0.7
+        max_delta: 1.1
+      }
+    }
+    num_steps: 2
+    """
+    train_config = train_pb2.TrainConfig()
+    text_format.Merge(train_config_text_proto, train_config)
+
+    train_dir = self.get_temp_dir()
+
+    trainer.train(create_tensor_dict_fn=get_input_function,
+                  create_model_fn=FakeDetectionModel,
+                  train_config=train_config,
+                  master='',
+                  task=0,
+                  num_clones=1,
+                  worker_replicas=1,
+                  clone_on_cpu=True,
+                  ps_tasks=0,
+                  worker_job_name='worker',
+                  is_chief=True,
+                  train_dir=train_dir)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/BUILD
+++ b/object_detection/utils/BUILD
+# Tensorflow Object Detection API: Utility functions.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+# Apache 2.0
+
+py_library(
+    name = "category_util",
+    srcs = ["category_util.py"],
+    deps = ["//tensorflow"],
+)
+
+py_library(
+    name = "dataset_util",
+    srcs = ["dataset_util.py"],
+    deps = [
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "label_map_util",
+    srcs = ["label_map_util.py"],
+    deps = [
+        "//third_party/py/google/protobuf",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/protos:string_int_label_map_py_pb2",
+    ],
+)
+
+py_library(
+    name = "learning_schedules",
+    srcs = ["learning_schedules.py"],
+    deps = ["//tensorflow"],
+)
+
+py_library(
+    name = "metrics",
+    srcs = ["metrics.py"],
+    deps = ["//third_party/py/numpy"],
+)
+
+py_library(
+    name = "np_box_list",
+    srcs = ["np_box_list.py"],
+    deps = ["//tensorflow"],
+)
+
+py_library(
+    name = "np_box_list_ops",
+    srcs = ["np_box_list_ops.py"],
+    deps = [
+        ":np_box_list",
+        ":np_box_ops",
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "np_box_ops",
+    srcs = ["np_box_ops.py"],
+    deps = ["//tensorflow"],
+)
+
+py_library(
+    name = "object_detection_evaluation",
+    srcs = ["object_detection_evaluation.py"],
+    deps = [
+        ":metrics",
+        ":per_image_evaluation",
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "ops",
+    srcs = ["ops.py"],
+    deps = [
+        ":static_shape",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/core:box_list_ops",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+
+py_library(
+    name = "per_image_evaluation",
+    srcs = ["per_image_evaluation.py"],
+    deps = [
+        ":np_box_list",
+        ":np_box_list_ops",
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "shape_utils",
+    srcs = ["shape_utils.py"],
+    deps = ["//tensorflow"],
+)
+
+py_library(
+    name = "static_shape",
+    srcs = ["static_shape.py"],
+    deps = [],
+)
+
+py_library(
+    name = "test_utils",
+    srcs = ["test_utils.py"],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:anchor_generator",
+        "//tensorflow_models/object_detection/core:box_coder",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/core:box_predictor",
+        "//tensorflow_models/object_detection/core:matcher",
+    ],
+)
+
+py_library(
+    name = "variables_helper",
+    srcs = ["variables_helper.py"],
+    deps = [
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "visualization_utils",
+    srcs = ["visualization_utils.py"],
+    deps = [
+        "//third_party/py/PIL:pil",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "category_util_test",
+    srcs = ["category_util_test.py"],
+    deps = [
+        ":category_util",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "dataset_util_test",
+    srcs = ["dataset_util_test.py"],
+    deps = [
+        ":dataset_util",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "label_map_util_test",
+    srcs = ["label_map_util_test.py"],
+    deps = [
+        ":label_map_util",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "learning_schedules_test",
+    srcs = ["learning_schedules_test.py"],
+    deps = [
+        ":learning_schedules",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "metrics_test",
+    srcs = ["metrics_test.py"],
+    deps = [
+        ":metrics",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "np_box_list_test",
+    srcs = ["np_box_list_test.py"],
+    deps = [
+        ":np_box_list",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "np_box_list_ops_test",
+    srcs = ["np_box_list_ops_test.py"],
+    deps = [
+        ":np_box_list",
+        ":np_box_list_ops",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "np_box_ops_test",
+    srcs = ["np_box_ops_test.py"],
+    deps = [
+        ":np_box_ops",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "object_detection_evaluation_test",
+    srcs = ["object_detection_evaluation_test.py"],
+    deps = [
+        ":object_detection_evaluation",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "ops_test",
+    srcs = ["ops_test.py"],
+    deps = [
+        ":ops",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+
+py_test(
+    name = "per_image_evaluation_test",
+    srcs = ["per_image_evaluation_test.py"],
+    deps = [
+        ":per_image_evaluation",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "shape_utils_test",
+    srcs = ["shape_utils_test.py"],
+    deps = [
+        ":shape_utils",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "static_shape_test",
+    srcs = ["static_shape_test.py"],
+    deps = [
+        ":static_shape",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "test_utils_test",
+    srcs = ["test_utils_test.py"],
+    deps = [
+        ":test_utils",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "variables_helper_test",
+    srcs = ["variables_helper_test.py"],
+    deps = [
+        ":variables_helper",
+        "//tensorflow",
+    ],
+)
+
+py_test(
+    name = "visualization_utils_test",
+    srcs = ["visualization_utils_test.py"],
+    deps = [
+        ":visualization_utils",
+        "//third_party/py/PIL:pil",
+    ],
+)
--- a/object_detection/utils/__init__.py
+++ b/object_detection/utils/__init__.py
--- a/object_detection/utils/category_util.py
+++ b/object_detection/utils/category_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions for importing/exporting Object Detection categories."""
+import csv
+
+import tensorflow as tf
+
+
+def load_categories_from_csv_file(csv_path):
+  """Loads categories from a csv file.
+
+  The CSV file should have one comma delimited numeric category id and string
+  category name pair per line. For example:
+
+  0,"cat"
+  1,"dog"
+  2,"bird"
+  ...
+
+  Args:
+    csv_path: Path to the csv file to be parsed into categories.
+  Returns:
+    categories: A list of dictionaries representing all possible categories.
+                The categories will contain an integer 'id' field and a string
+                'name' field.
+  Raises:
+    ValueError: If the csv file is incorrectly formatted.
+  """
+  categories = []
+
+  with tf.gfile.Open(csv_path, 'r') as csvfile:
+    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
+    for row in reader:
+      if not row:
+        continue
+
+      if len(row) != 2:
+        raise ValueError('Expected 2 fields per row in csv: %s' % ','.join(row))
+
+      category_id = int(row[0])
+      category_name = row[1]
+      categories.append({'id': category_id, 'name': category_name})
+
+  return categories
+
+
+def save_categories_to_csv_file(categories, csv_path):
+  """Saves categories to a csv file.
+
+  Args:
+    categories: A list of dictionaries representing categories to save to file.
+                Each category must contain an 'id' and 'name' field.
+    csv_path: Path to the csv file to be parsed into categories.
+  """
+  categories.sort(key=lambda x: x['id'])
+  with tf.gfile.Open(csv_path, 'w') as csvfile:
+    writer = csv.writer(csvfile, delimiter=',', quotechar='"')
+    for category in categories:
+      writer.writerow([category['id'], category['name']])
--- a/object_detection/utils/category_util_test.py
+++ b/object_detection/utils/category_util_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.category_util."""
+import os
+
+import tensorflow as tf
+
+from object_detection.utils import category_util
+
+
+class EvalUtilTest(tf.test.TestCase):
+
+  def test_load_categories_from_csv_file(self):
+    csv_data = """
+        0,"cat"
+        1,"dog"
+        2,"bird"
+    """.strip(' ')
+    csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
+    with tf.gfile.Open(csv_path, 'wb') as f:
+      f.write(csv_data)
+
+    categories = category_util.load_categories_from_csv_file(csv_path)
+    self.assertTrue({'id': 0, 'name': 'cat'} in categories)
+    self.assertTrue({'id': 1, 'name': 'dog'} in categories)
+    self.assertTrue({'id': 2, 'name': 'bird'} in categories)
+
+  def test_save_categories_to_csv_file(self):
+    categories = [
+        {'id': 0, 'name': 'cat'},
+        {'id': 1, 'name': 'dog'},
+        {'id': 2, 'name': 'bird'},
+    ]
+    csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
+    category_util.save_categories_to_csv_file(categories, csv_path)
+    saved_categories = category_util.load_categories_from_csv_file(csv_path)
+    self.assertEqual(saved_categories, categories)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/dataset_util.py
+++ b/object_detection/utils/dataset_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utility functions for creating TFRecord data sets."""
+
+import tensorflow as tf
+
+
+def int64_feature(value):
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def int64_list_feature(value):
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def bytes_feature(value):
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def bytes_list_feature(value):
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+
+def float_list_feature(value):
+  return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+def read_examples_list(path):
+  """Read list of training or validation examples.
+
+  The file is assumed to contain a single example per line where the first
+  token in the line is an identifier that allows us to find the image and
+  annotation xml for that example.
+
+  For example, the line:
+  xyz 3
+  would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
+
+  Args:
+    path: absolute path to examples list file.
+
+  Returns:
+    list of example identifiers (strings).
+  """
+  with tf.gfile.GFile(path) as fid:
+    lines = fid.readlines()
+  return [line.strip().split(' ')[0] for line in lines]
+
+
+def recursive_parse_xml_to_dict(xml):
+  """Recursively parses XML contents to python dict.
+
+  We assume that `object` tags are the only ones that can appear
+  multiple times at the same level of a tree.
+
+  Args:
+    xml: xml tree obtained by parsing XML file contents using lxml.etree
+
+  Returns:
+    Python dictionary holding XML contents.
+  """
+  if not xml:
+    return {xml.tag: xml.text}
+  result = {}
+  for child in xml:
+    child_result = recursive_parse_xml_to_dict(child)
+    if child.tag != 'object':
+      result[child.tag] = child_result[child.tag]
+    else:
+      if child.tag not in result:
+        result[child.tag] = []
+      result[child.tag].append(child_result[child.tag])
+  return {xml.tag: result}
--- a/object_detection/utils/dataset_util_test.py
+++ b/object_detection/utils/dataset_util_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.dataset_util."""
+
+import os
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+
+
+class DatasetUtilTest(tf.test.TestCase):
+
+  def test_read_examples_list(self):
+    example_list_data = """example1 1\nexample2 2"""
+    example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt')
+    with tf.gfile.Open(example_list_path, 'wb') as f:
+      f.write(example_list_data)
+
+    examples = dataset_util.read_examples_list(example_list_path)
+    self.assertListEqual(['example1', 'example2'], examples)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/label_map_util.py
+++ b/object_detection/utils/label_map_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Label map utility functions."""
+
+import logging
+
+import tensorflow as tf
+from google.protobuf import text_format
+from object_detection.protos import string_int_label_map_pb2
+
+
+def create_category_index(categories):
+  """Creates dictionary of COCO compatible categories keyed by category id.
+
+  Args:
+    categories: a list of dicts, each of which has the following keys:
+      'id': (required) an integer id uniquely identifying this category.
+      'name': (required) string representing category name
+        e.g., 'cat', 'dog', 'pizza'.
+
+  Returns:
+    category_index: a dict containing the same entries as categories, but keyed
+      by the 'id' field of each category.
+  """
+  category_index = {}
+  for cat in categories:
+    category_index[cat['id']] = cat
+  return category_index
+
+
+def convert_label_map_to_categories(label_map,
+                                    max_num_classes,
+                                    use_display_name=True):
+  """Loads label map proto and returns categories list compatible with eval.
+
+  This function loads a label map and returns a list of dicts, each of which
+  has the following keys:
+    'id': (required) an integer id uniquely identifying this category.
+    'name': (required) string representing category name
+      e.g., 'cat', 'dog', 'pizza'.
+  We only allow class into the list if its id-label_id_offset is
+  between 0 (inclusive) and max_num_classes (exclusive).
+  If there are several items mapping to the same id in the label map,
+  we will only keep the first one in the categories list.
+
+  Args:
+    label_map: a StringIntLabelMapProto or None.  If None, a default categories
+      list is created with max_num_classes categories.
+    max_num_classes: maximum number of (consecutive) label indices to include.
+    use_display_name: (boolean) choose whether to load 'display_name' field
+      as category name.  If False of if the display_name field does not exist,
+      uses 'name' field as category names instead.
+  Returns:
+    categories: a list of dictionaries representing all possible categories.
+  """
+  categories = []
+  list_of_ids_already_added = []
+  if not label_map:
+    label_id_offset = 1
+    for class_id in range(max_num_classes):
+      categories.append({
+          'id': class_id + label_id_offset,
+          'name': 'category_{}'.format(class_id + label_id_offset)
+      })
+    return categories
+  for item in label_map.item:
+    if not 0 < item.id <= max_num_classes:
+      logging.info('Ignore item %d since it falls outside of requested '
+                   'label range.', item.id)
+      continue
+    if use_display_name and item.HasField('display_name'):
+      name = item.display_name
+    else:
+      name = item.name
+    if item.id not in list_of_ids_already_added:
+      list_of_ids_already_added.append(item.id)
+      categories.append({'id': item.id, 'name': name})
+  return categories
+
+
+# TODO: double check documentaion.
+def load_labelmap(path):
+  """Loads label map proto.
+
+  Args:
+    path: path to StringIntLabelMap proto text file.
+  Returns:
+    a StringIntLabelMapProto
+  """
+  with tf.gfile.GFile(path, 'r') as fid:
+    label_map_string = fid.read()
+    label_map = string_int_label_map_pb2.StringIntLabelMap()
+    try:
+      text_format.Merge(label_map_string, label_map)
+    except text_format.ParseError:
+      label_map.ParseFromString(label_map_string)
+  return label_map
+
+
+def get_label_map_dict(label_map_path):
+  """Reads a label map and returns a dictionary of label names to id.
+
+  Args:
+    label_map_path: path to label_map.
+
+  Returns:
+    A dictionary mapping label names to id.
+  """
+  label_map = load_labelmap(label_map_path)
+  label_map_dict = {}
+  for item in label_map.item:
+    label_map_dict[item.name] = item.id
+  return label_map_dict
--- a/object_detection/utils/label_map_util_test.py
+++ b/object_detection/utils/label_map_util_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.label_map_util."""
+
+import os
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.protos import string_int_label_map_pb2
+from object_detection.utils import label_map_util
+
+
+class LabelMapUtilTest(tf.test.TestCase):
+
+  def _generate_label_map(self, num_classes):
+    label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
+    for i in range(1, num_classes + 1):
+      item = label_map_proto.item.add()
+      item.id = i
+      item.name = 'label_' + str(i)
+      item.display_name = str(i)
+    return label_map_proto
+
+  def test_get_label_map_dict(self):
+    label_map_string = """
+      item {
+        id:2
+        name:'cat'
+      }
+      item {
+        id:1
+        name:'dog'
+      }
+    """
+    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+    with tf.gfile.Open(label_map_path, 'wb') as f:
+      f.write(label_map_string)
+
+    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
+    self.assertEqual(label_map_dict['dog'], 1)
+    self.assertEqual(label_map_dict['cat'], 2)
+
+  def test_keep_categories_with_unique_id(self):
+    label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
+    label_map_string = """
+      item {
+        id:2
+        name:'cat'
+      }
+      item {
+        id:1
+        name:'child'
+      }
+      item {
+        id:1
+        name:'person'
+      }
+      item {
+        id:1
+        name:'n00007846'
+      }
+    """
+    text_format.Merge(label_map_string, label_map_proto)
+    categories = label_map_util.convert_label_map_to_categories(
+        label_map_proto, max_num_classes=3)
+    self.assertListEqual([{
+        'id': 2,
+        'name': u'cat'
+    }, {
+        'id': 1,
+        'name': u'child'
+    }], categories)
+
+  def test_convert_label_map_to_categories_no_label_map(self):
+    categories = label_map_util.convert_label_map_to_categories(
+        None, max_num_classes=3)
+    expected_categories_list = [{
+        'name': u'category_1',
+        'id': 1
+    }, {
+        'name': u'category_2',
+        'id': 2
+    }, {
+        'name': u'category_3',
+        'id': 3
+    }]
+    self.assertListEqual(expected_categories_list, categories)
+
+  def test_convert_label_map_to_coco_categories(self):
+    label_map_proto = self._generate_label_map(num_classes=4)
+    categories = label_map_util.convert_label_map_to_categories(
+        label_map_proto, max_num_classes=3)
+    expected_categories_list = [{
+        'name': u'1',
+        'id': 1
+    }, {
+        'name': u'2',
+        'id': 2
+    }, {
+        'name': u'3',
+        'id': 3
+    }]
+    self.assertListEqual(expected_categories_list, categories)
+
+  def test_convert_label_map_to_coco_categories_with_few_classes(self):
+    label_map_proto = self._generate_label_map(num_classes=4)
+    cat_no_offset = label_map_util.convert_label_map_to_categories(
+        label_map_proto, max_num_classes=2)
+    expected_categories_list = [{
+        'name': u'1',
+        'id': 1
+    }, {
+        'name': u'2',
+        'id': 2
+    }]
+    self.assertListEqual(expected_categories_list, cat_no_offset)
+
+  def test_create_category_index(self):
+    categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
+    category_index = label_map_util.create_category_index(categories)
+    self.assertDictEqual({
+        1: {
+            'name': u'1',
+            'id': 1
+        },
+        2: {
+            'name': u'2',
+            'id': 2
+        }
+    }, category_index)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/learning_schedules.py
+++ b/object_detection/utils/learning_schedules.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Library of common learning rate schedules."""
+
+import tensorflow as tf
+
+
+def exponential_decay_with_burnin(global_step,
+                                  learning_rate_base,
+                                  learning_rate_decay_steps,
+                                  learning_rate_decay_factor,
+                                  burnin_learning_rate=0.0,
+                                  burnin_steps=0):
+  """Exponential decay schedule with burn-in period.
+
+  In this schedule, learning rate is fixed at burnin_learning_rate
+  for a fixed period, before transitioning to a regular exponential
+  decay schedule.
+
+  Args:
+    global_step: int tensor representing global step.
+    learning_rate_base: base learning rate.
+    learning_rate_decay_steps: steps to take between decaying the learning rate.
+      Note that this includes the number of burn-in steps.
+    learning_rate_decay_factor: multiplicative factor by which to decay
+      learning rate.
+    burnin_learning_rate: initial learning rate during burn-in period.  If
+      0.0 (which is the default), then the burn-in learning rate is simply
+      set to learning_rate_base.
+    burnin_steps: number of steps to use burnin learning rate.
+
+  Returns:
+    a (scalar) float tensor representing learning rate
+  """
+  if burnin_learning_rate == 0:
+    burnin_learning_rate = learning_rate_base
+  post_burnin_learning_rate = tf.train.exponential_decay(
+      learning_rate_base,
+      global_step,
+      learning_rate_decay_steps,
+      learning_rate_decay_factor,
+      staircase=True)
+  return tf.cond(
+      tf.less(global_step, burnin_steps),
+      lambda: tf.convert_to_tensor(burnin_learning_rate),
+      lambda: post_burnin_learning_rate)
+
+
+def manual_stepping(global_step, boundaries, rates):
+  """Manually stepped learning rate schedule.
+
+  This function provides fine grained control over learning rates.  One must
+  specify a sequence of learning rates as well as a set of integer steps
+  at which the current learning rate must transition to the next.  For example,
+  if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning
+  rate returned by this function is .1 for global_step=0,...,4, .01 for
+  global_step=5...9, and .001 for global_step=10 and onward.
+
+  Args:
+    global_step: int64 (scalar) tensor representing global step.
+    boundaries: a list of global steps at which to switch learning
+      rates.  This list is assumed to consist of increasing positive integers.
+    rates: a list of (float) learning rates corresponding to intervals between
+      the boundaries.  The length of this list must be exactly
+      len(boundaries) + 1.
+
+  Returns:
+    a (scalar) float tensor representing learning rate
+  Raises:
+    ValueError: if one of the following checks fails:
+      1. boundaries is a strictly increasing list of positive integers
+      2. len(rates) == len(boundaries) + 1
+  """
+  if any([b < 0 for b in boundaries]) or any(
+      [not isinstance(b, int) for b in boundaries]):
+    raise ValueError('boundaries must be a list of positive integers')
+  if any([bnext <= b for bnext, b in zip(boundaries[1:], boundaries[:-1])]):
+    raise ValueError('Entries in boundaries must be strictly increasing.')
+  if any([not isinstance(r, float) for r in rates]):
+    raise ValueError('Learning rates must be floats')
+  if len(rates) != len(boundaries) + 1:
+    raise ValueError('Number of provided learning rates must exceed '
+                     'number of boundary points by exactly 1.')
+  step_boundaries = tf.constant(boundaries, tf.int64)
+  learning_rates = tf.constant(rates, tf.float32)
+  unreached_boundaries = tf.reshape(tf.where(
+      tf.greater(step_boundaries, global_step)), [-1])
+  unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0)
+  index = tf.reshape(tf.reduce_min(unreached_boundaries), [1])
+  return tf.reshape(tf.slice(learning_rates, index, [1]), [])
--- a/object_detection/utils/learning_schedules_test.py
+++ b/object_detection/utils/learning_schedules_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.utils.learning_schedules."""
+import tensorflow as tf
+
+from object_detection.utils import learning_schedules
+
+
+class LearningSchedulesTest(tf.test.TestCase):
+
+  def testExponentialDecayWithBurnin(self):
+    global_step = tf.placeholder(tf.int32, [])
+    learning_rate_base = 1.0
+    learning_rate_decay_steps = 3
+    learning_rate_decay_factor = .1
+    burnin_learning_rate = .5
+    burnin_steps = 2
+    exp_rates = [.5, .5, 1, .1, .1, .1, .01, .01]
+    learning_rate = learning_schedules.exponential_decay_with_burnin(
+        global_step, learning_rate_base, learning_rate_decay_steps,
+        learning_rate_decay_factor, burnin_learning_rate, burnin_steps)
+    with self.test_session() as sess:
+      output_rates = []
+      for input_global_step in range(8):
+        output_rate = sess.run(learning_rate,
+                               feed_dict={global_step: input_global_step})
+        output_rates.append(output_rate)
+      self.assertAllClose(output_rates, exp_rates)
+
+  def testManualStepping(self):
+    global_step = tf.placeholder(tf.int64, [])
+    boundaries = [2, 3, 7]
+    rates = [1.0, 2.0, 3.0, 4.0]
+    exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
+    learning_rate = learning_schedules.manual_stepping(global_step, boundaries,
+                                                       rates)
+    with self.test_session() as sess:
+      output_rates = []
+      for input_global_step in range(10):
+        output_rate = sess.run(learning_rate,
+                               feed_dict={global_step: input_global_step})
+        output_rates.append(output_rate)
+      self.assertAllClose(output_rates, exp_rates)
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/utils/metrics.py
+++ b/object_detection/utils/metrics.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions for computing metrics like precision, recall, CorLoc and etc."""
+from __future__ import division
+
+import numpy as np
+
+
+def compute_precision_recall(scores, labels, num_gt):
+  """Compute precision and recall.
+
+  Args:
+    scores: A float numpy array representing detection score
+    labels: A boolean numpy array representing true/false positive labels
+    num_gt: Number of ground truth instances
+
+  Raises:
+    ValueError: if the input is not of the correct format
+
+  Returns:
+    precision: Fraction of positive instances over detected ones. This value is
+      None if no ground truth labels are present.
+    recall: Fraction of detected positive instance over all positive instances.
+      This value is None if no ground truth labels are present.
+
+  """
+  if not isinstance(
+      labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
+    raise ValueError("labels must be single dimension bool numpy array")
+
+  if not isinstance(
+      scores, np.ndarray) or len(scores.shape) != 1:
+    raise ValueError("scores must be single dimension numpy array")
+
+  if num_gt < np.sum(labels):
+    raise ValueError("Number of true positives must be smaller than num_gt.")
+
+  if len(scores) != len(labels):
+    raise ValueError("scores and labels must be of the same size.")
+
+  if num_gt == 0:
+    return None, None
+
+  sorted_indices = np.argsort(scores)
+  sorted_indices = sorted_indices[::-1]
+  labels = labels.astype(int)
+  true_positive_labels = labels[sorted_indices]
+  false_positive_labels = 1 - true_positive_labels
+  cum_true_positives = np.cumsum(true_positive_labels)
+  cum_false_positives = np.cumsum(false_positive_labels)
+  precision = cum_true_positives.astype(float) / (
+      cum_true_positives + cum_false_positives)
+  recall = cum_true_positives.astype(float) / num_gt
+  return precision, recall
+
+
+def compute_average_precision(precision, recall):
+  """Compute Average Precision according to the definition in VOCdevkit.
+
+  Precision is modified to ensure that it does not decrease as recall
+  decrease.
+
+  Args:
+    precision: A float [N, 1] numpy array of precisions
+    recall: A float [N, 1] numpy array of recalls
+
+  Raises:
+    ValueError: if the input is not of the correct format
+
+  Returns:
+    average_precison: The area under the precision recall curve. NaN if
+      precision and recall are None.
+
+  """
+  if precision is None:
+    if recall is not None:
+      raise ValueError("If precision is None, recall must also be None")
+    return np.NAN
+
+  if not isinstance(precision, np.ndarray) or not isinstance(recall,
+                                                             np.ndarray):
+    raise ValueError("precision and recall must be numpy array")
+  if precision.dtype != np.float or recall.dtype != np.float:
+    raise ValueError("input must be float numpy array.")
+  if len(precision) != len(recall):
+    raise ValueError("precision and recall must be of the same size.")
+  if not precision.size:
+    return 0.0
+  if np.amin(precision) < 0 or np.amax(precision) > 1:
+    raise ValueError("Precision must be in the range of [0, 1].")
+  if np.amin(recall) < 0 or np.amax(recall) > 1:
+    raise ValueError("recall must be in the range of [0, 1].")
+  if not all(recall[i] <= recall[i + 1] for i in xrange(len(recall) - 1)):
+    raise ValueError("recall must be a non-decreasing array")
+
+  recall = np.concatenate([[0], recall, [1]])
+  precision = np.concatenate([[0], precision, [0]])
+
+  # Preprocess precision to be a non-decreasing array
+  for i in range(len(precision) - 2, -1, -1):
+    precision[i] = np.maximum(precision[i], precision[i + 1])
+
+  indices = np.where(recall[1:] != recall[:-1])[0] + 1
+  average_precision = np.sum(
+      (recall[indices] - recall[indices - 1]) * precision[indices])
+  return average_precision
+
+
+def compute_cor_loc(num_gt_imgs_per_class,
+                    num_images_correctly_detected_per_class):
+  """Compute CorLoc according to the definition in the following paper.
+
+  https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
+
+  Returns nans if there are no ground truth images for a class.
+
+  Args:
+    num_gt_imgs_per_class: 1D array, representing number of images containing
+        at least one object instance of a particular class
+    num_images_correctly_detected_per_class: 1D array, representing number of
+        images that are correctly detected at least one object instance of a
+        particular class
+
+  Returns:
+    corloc_per_class: A float numpy array represents the corloc score of each
+      class
+  """
+  return np.where(
+      num_gt_imgs_per_class == 0,
+      np.nan,
+      num_images_correctly_detected_per_class / num_gt_imgs_per_class)
--- a/object_detection/utils/metrics_test.py
+++ b/object_detection/utils/metrics_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.metrics."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.utils import metrics
+
+
+class MetricsTest(tf.test.TestCase):
+
+  def test_compute_cor_loc(self):
+    num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
+    num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
+                                                       dtype=int)
+    corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
+                                     num_images_correctly_detected_per_class)
+    expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
+    self.assertTrue(np.allclose(corloc, expected_corloc))
+
+  def test_compute_cor_loc_nans(self):
+    num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
+    num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
+                                                       dtype=int)
+    corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
+                                     num_images_correctly_detected_per_class)
+    expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
+    self.assertAllClose(corloc, expected_corloc)
+
+  def test_compute_precision_recall(self):
+    num_gt = 10
+    scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
+    labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
+    accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
+    expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
+    expected_recall = accumulated_tp_count / num_gt
+    precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
+    self.assertAllClose(precision, expected_precision)
+    self.assertAllClose(recall, expected_recall)
+
+  def test_compute_average_precision(self):
+    precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
+    recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
+    processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0],
+                                   dtype=float)
+    recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
+    expected_mean_ap = np.sum(recall_interval * processed_precision)
+    mean_ap = metrics.compute_average_precision(precision, recall)
+    self.assertAlmostEqual(expected_mean_ap, mean_ap)
+
+  def test_compute_precision_recall_and_ap_no_groundtruth(self):
+    num_gt = 0
+    scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
+    labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
+    expected_precision = None
+    expected_recall = None
+    precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
+    self.assertEquals(precision, expected_precision)
+    self.assertEquals(recall, expected_recall)
+    ap = metrics.compute_average_precision(precision, recall)
+    self.assertTrue(np.isnan(ap))
+
+
+if __name__ == '__main__':
+  tf.test.main()