Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/lfads/utils.py
+++ b/lfads/utils.py
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+from __future__ import print_function
+
+import os
+import h5py
+import json
+
+import numpy as np
+import tensorflow as tf
+
+
+def log_sum_exp(x_k):
+  """Computes log \sum exp in a numerically stable way.
+    log ( sum_i exp(x_i) )
+    log ( sum_i exp(x_i - m + m) ),       with m = max(x_i)
+    log ( sum_i exp(x_i - m)*exp(m) )
+    log ( sum_i exp(x_i - m) + m
+
+  Args:
+    x_k - k -dimensional list of arguments to log_sum_exp.
+
+  Returns:
+    log_sum_exp of the arguments.
+  """
+  m = tf.reduce_max(x_k)
+  x1_k = x_k - m
+  u_k = tf.exp(x1_k)
+  z = tf.reduce_sum(u_k)
+  return tf.log(z) + m
+
+
+def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False,
+           normalized=False, name=None, collections=None):
+  """Linear (affine) transformation, y = x W + b, for a variety of
+  configurations.
+
+  Args:
+    x: input The tensor to tranformation.
+    out_size: The integer size of non-batch output dimension.
+    do_bias (optional): Add a learnable bias vector to the operation.
+    alpha (optional): A multiplicative scaling for the weight initialization
+      of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
+    identity_if_possible (optional): just return identity,
+      if x.shape[1] == out_size.
+    normalized (optional): Option to divide out by the norms of the rows of W.
+    name (optional): The name prefix to add to variables.
+    collections (optional): List of additional collections. (Placed in
+      tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
+
+  Returns:
+    In the equation, y = x W + b, returns the tensorflow op that yields y.
+  """
+  in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10
+  stddev = alpha/np.sqrt(float(in_size))
+  mat_init = tf.random_normal_initializer(0.0, stddev)
+  wname = (name + "/W") if name else "/W"
+
+  if identity_if_possible and in_size == out_size:
+    # Sometimes linear layers are nothing more than size adapters.
+    return tf.identity(x, name=(wname+'_ident'))
+
+  W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha,
+                    normalized=normalized, name=name, collections=collections)
+
+  if do_bias:
+    return tf.matmul(x, W) + b
+  else:
+    return tf.matmul(x, W)
+
+
+def init_linear(in_size, out_size, do_bias=True, mat_init_value=None, alpha=1.0,
+                identity_if_possible=False, normalized=False,
+                name=None, collections=None):
+  """Linear (affine) transformation, y = x W + b, for a variety of
+  configurations.
+
+  Args:
+    in_size: The integer size of the non-batc input dimension. [(x),y]
+    out_size: The integer size of non-batch output dimension. [x,(y)]
+    do_bias (optional): Add a learnable bias vector to the operation.
+    mat_init_value (optional): numpy constant for matrix initialization, if None
+      , do random, with additional parameters.
+    alpha (optional): A multiplicative scaling for the weight initialization
+      of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
+    identity_if_possible (optional): just return identity,
+      if x.shape[1] == out_size.
+    normalized (optional): Option to divide out by the norms of the rows of W.
+    name (optional): The name prefix to add to variables.
+    collections (optional): List of additional collections. (Placed in
+      tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
+
+  Returns:
+    In the equation, y = x W + b, returns the pair (W, b).
+  """
+
+  if mat_init_value is not None and mat_init_value.shape != (in_size, out_size):
+    raise ValueError(
+        'Provided mat_init_value must have shape [%d, %d].'%(in_size, out_size))
+
+  if mat_init_value is None:
+    stddev = alpha/np.sqrt(float(in_size))
+    mat_init = tf.random_normal_initializer(0.0, stddev)
+
+  wname = (name + "/W") if name else "/W"
+
+  if identity_if_possible and in_size == out_size:
+    return (tf.constant(np.eye(in_size).astype(np.float32)),
+            tf.zeros(in_size))
+
+  # Note the use of get_variable vs. tf.Variable.  this is because get_variable
+  # does not allow the initialization of the variable with a value.
+  if normalized:
+    w_collections = [tf.GraphKeys.GLOBAL_VARIABLES, "norm-variables"]
+    if collections:
+      w_collections += collections
+    if mat_init_value is not None:
+      w = tf.Variable(mat_init_value, name=wname, collections=w_collections)
+    else:
+      w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
+                          collections=w_collections)
+    w = tf.nn.l2_normalize(w, dim=0) # x W, so xW_j = \sum_i x_bi W_ij
+  else:
+    w_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
+    if collections:
+      w_collections += collections
+    if mat_init_value is not None:
+      w = tf.Variable(mat_init_value, name=wname, collections=w_collections)
+    else:
+      w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
+                          collections=w_collections)
+
+  if do_bias:
+    b_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
+    if collections:
+      b_collections += collections
+    bname = (name + "/b") if name else "/b"
+    b = tf.get_variable(bname, [1, out_size],
+                        initializer=tf.zeros_initializer(),
+                        collections=b_collections)
+  else:
+    b = None
+
+  return (w, b)
+
+
+def write_data(data_fname, data_dict, use_json=False, compression=None):
+  """Write data in HD5F format.
+
+  Args:
+    data_fname: The filename of teh file in which to write the data.
+    data_dict:  The dictionary of data to write. The keys are strings
+      and the values are numpy arrays.
+    use_json (optional): human readable format for simple items
+    compression (optional): The compression to use for h5py (disabled by
+      default because the library borks on scalars, otherwise try 'gzip').
+  """
+
+  dir_name = os.path.dirname(data_fname)
+  if not os.path.exists(dir_name):
+    os.makedirs(dir_name)
+
+  if use_json:
+    the_file = open(data_fname,'w')
+    json.dump(data_dict, the_file)
+    the_file.close()
+  else:
+    try:
+      with h5py.File(data_fname, 'w') as hf:
+        for k, v in data_dict.items():
+          clean_k = k.replace('/', '_')
+          if clean_k is not k:
+            print('Warning: saving variable with name: ', k, ' as ', clean_k)
+          else:
+            print('Saving variable with name: ', clean_k)
+          hf.create_dataset(clean_k, data=v, compression=compression)
+    except IOError:
+      print("Cannot open %s for writing.", data_fname)
+      raise
+
+
+def read_data(data_fname):
+  """ Read saved data in HDF5 format.
+
+  Args:
+    data_fname: The filename of the file from which to read the data.
+  Returns:
+    A dictionary whose keys will vary depending on dataset (but should
+    always contain the keys 'train_data' and 'valid_data') and whose
+    values are numpy arrays.
+  """
+
+  try:
+    with h5py.File(data_fname, 'r') as hf:
+      data_dict = {k: np.array(v) for k, v in hf.items()}
+      return data_dict
+  except IOError:
+    print("Cannot open %s for reading." % data_fname)
+    raise
+
+
+def write_datasets(data_path, data_fname_stem, dataset_dict, compression=None):
+  """Write datasets in HD5F format.
+
+  This function assumes the dataset_dict is a mapping ( string ->
+  to data_dict ).  It calls write_data for each data dictionary,
+  post-fixing the data filename with the key of the dataset.
+
+  Args:
+    data_path: The path to the save directory.
+    data_fname_stem: The filename stem of the file in which to write the data.
+    dataset_dict:  The dictionary of datasets. The keys are strings
+      and the values data dictionaries (str -> numpy arrays) associations.
+    compression (optional): The compression to use for h5py (disabled by
+      default because the library borks on scalars, otherwise try 'gzip').
+  """
+
+  full_name_stem = os.path.join(data_path, data_fname_stem)
+  for s, data_dict in dataset_dict.items():
+    write_data(full_name_stem + "_" + s, data_dict, compression=compression)
+
+
+def read_datasets(data_path, data_fname_stem):
+  """Read dataset sin HD5F format.
+
+  This function assumes the dataset_dict is a mapping ( string ->
+  to data_dict ).  It calls write_data for each data dictionary,
+  post-fixing the data filename with the key of the dataset.
+
+  Args:
+    data_path: The path to the save directory.
+    data_fname_stem: The filename stem of the file in which to write the data.
+  """
+
+  dataset_dict = {}
+  fnames = os.listdir(data_path)
+
+  print ('loading data from ' + data_path + ' with stem ' + data_fname_stem)
+  for fname in fnames:
+    if fname.startswith(data_fname_stem):
+      data_dict = read_data(os.path.join(data_path,fname))
+      idx = len(data_fname_stem) + 1
+      key = fname[idx:]
+      data_dict['data_dim'] = data_dict['train_data'].shape[2]
+      data_dict['num_steps'] = data_dict['train_data'].shape[1]
+      dataset_dict[key] = data_dict
+
+  if len(dataset_dict) == 0:
+    raise ValueError("Failed to load any datasets, are you sure that the "
+                     "'--data_dir' and '--data_filename_stem' flag values "
+                     "are correct?")
+
+  print (str(len(dataset_dict)) + ' datasets loaded')
+  return dataset_dict
+
+
+# NUMPY utility functions
+def list_t_bxn_to_list_b_txn(values_t_bxn):
+  """Convert a length T list of BxN numpy tensors of length B list of TxN numpy
+  tensors.
+
+  Args:
+    values_t_bxn: The length T list of BxN numpy tensors.
+
+  Returns:
+    The length B list of TxN numpy tensors.
+  """
+  T = len(values_t_bxn)
+  B, N = values_t_bxn[0].shape
+  values_b_txn = []
+  for b in range(B):
+    values_pb_txn = np.zeros([T,N])
+    for t in range(T):
+      values_pb_txn[t,:] = values_t_bxn[t][b,:]
+    values_b_txn.append(values_pb_txn)
+
+  return values_b_txn
+
+
+def list_t_bxn_to_tensor_bxtxn(values_t_bxn):
+  """Convert a length T list of BxN numpy tensors to single numpy tensor with
+  shape BxTxN.
+
+  Args:
+    values_t_bxn: The length T list of BxN numpy tensors.
+
+  Returns:
+    values_bxtxn: The BxTxN numpy tensor.
+  """
+
+  T = len(values_t_bxn)
+  B, N = values_t_bxn[0].shape
+  values_bxtxn = np.zeros([B,T,N])
+  for t in range(T):
+    values_bxtxn[:,t,:] = values_t_bxn[t]
+
+  return values_bxtxn
+
+
+def tensor_bxtxn_to_list_t_bxn(tensor_bxtxn):
+  """Convert a numpy tensor with shape BxTxN to a length T list of numpy tensors
+  with shape BxT.
+
+  Args:
+    tensor_bxtxn: The BxTxN numpy tensor.
+
+  Returns:
+    A length T list of numpy tensors with shape BxT.
+  """
+
+  values_t_bxn = []
+  B, T, N = tensor_bxtxn.shape
+  for t in range(T):
+    values_t_bxn.append(np.squeeze(tensor_bxtxn[:,t,:]))
+
+  return values_t_bxn
+
+
+def flatten(list_of_lists):
+  """Takes a list of lists and returns a list of the elements.
+
+  Args:
+    list_of_lists: List of lists.
+
+  Returns:
+    flat_list: Flattened list.
+    flat_list_idxs: Flattened list indices.
+  """
+  flat_list = []
+  flat_list_idxs = []
+  start_idx = 0
+  for item in list_of_lists:
+    if isinstance(item, list):
+      flat_list += item
+      l = len(item)
+      idxs = range(start_idx, start_idx+l)
+      start_idx = start_idx+l
+    else:                   # a value
+      flat_list.append(item)
+      idxs = [start_idx]
+      start_idx += 1
+    flat_list_idxs.append(idxs)
+
+  return flat_list, flat_list_idxs
--- a/neural_gpu/README.md
+++ b/neural_gpu/README.md
 # NeuralGPU
-Code for the Neural GPU model described in [[http://arxiv.org/abs/1511.08228]].
-The extended version was described in [[https://arxiv.org/abs/1610.08613]].
+Code for the Neural GPU model described in http://arxiv.org/abs/1511.08228.
+The extended version was described in https://arxiv.org/abs/1610.08613.

 Requirements:
 * TensorFlow (see tensorflow.org for how to install)

--- a/neural_gpu/neural_gpu.py
+++ b/neural_gpu/neural_gpu.py
@@ -478,8 +478,10 @@ class NeuralGPU(object):
        # This is just for running a baseline RNN seq2seq model.
        if do_rnn:
          self.after_enc_step.append(step)  # Not meaningful here, but needed.
-          lstm_cell = tf.contrib.rnn.BasicLSTMCell(height * nmaps)
-          cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * nconvs)
+          def lstm_cell():
+            return tf.contrib.rnn.BasicLSTMCell(height * nmaps)
+          cell = tf.contrib.rnn.MultiRNNCell(
+              [lstm_cell() for _ in range(nconvs)])
          with tf.variable_scope("encoder"):
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                cell, tf.reshape(step, [batch_size, length, height * nmaps]),

--- a/object_detection/BUILD
+++ b/object_detection/BUILD
+# Tensorflow Object Detection API: main runnables.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+# Apache 2.0
+
+py_binary(
+    name = "train",
+    srcs = [
+        "train.py",
+    ],
+    deps = [
+        ":trainer",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/builders:input_reader_builder",
+        "//tensorflow_models/object_detection/builders:model_builder",
+        "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
+        "//tensorflow_models/object_detection/protos:model_py_pb2",
+        "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
+        "//tensorflow_models/object_detection/protos:train_py_pb2",
+    ],
+)
+
+py_library(
+    name = "trainer",
+    srcs = ["trainer.py"],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/builders:optimizer_builder",
+        "//tensorflow_models/object_detection/builders:preprocessor_builder",
+        "//tensorflow_models/object_detection/core:batcher",
+        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow_models/object_detection/utils:ops",
+        "//tensorflow_models/object_detection/utils:variables_helper",
+        "//tensorflow_models/slim:model_deploy",
+    ],
+)
+
+py_test(
+    name = "trainer_test",
+    srcs = ["trainer_test.py"],
+    deps = [
+        ":trainer",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:losses",
+        "//tensorflow_models/object_detection/core:model",
+        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow_models/object_detection/protos:train_py_pb2",
+    ],
+)
+
+py_library(
+    name = "eval_util",
+    srcs = [
+        "eval_util.py",
+    ],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/utils:label_map_util",
+        "//tensorflow_models/object_detection/utils:object_detection_evaluation",
+        "//tensorflow_models/object_detection/utils:visualization_utils",
+    ],
+)
+
+py_library(
+    name = "evaluator",
+    srcs = ["evaluator.py"],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection:eval_util",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/core:box_list_ops",
+        "//tensorflow_models/object_detection/core:prefetcher",
+        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow_models/object_detection/protos:eval_py_pb2",
+    ],
+)
+
+py_binary(
+    name = "eval",
+    srcs = [
+        "eval.py",
+    ],
+    deps = [
+        ":evaluator",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/builders:input_reader_builder",
+        "//tensorflow_models/object_detection/builders:model_builder",
+        "//tensorflow_models/object_detection/protos:eval_py_pb2",
+        "//tensorflow_models/object_detection/protos:input_reader_py_pb2",
+        "//tensorflow_models/object_detection/protos:model_py_pb2",
+        "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
+        "//tensorflow_models/object_detection/utils:label_map_util",
+    ],
+)
+
+py_library(
+    name = "exporter",
+    srcs = [
+        "exporter.py",
+    ],
+    deps = [
+        "//tensorflow",
+        "//tensorflow/python/tools:freeze_graph_lib",
+        "//tensorflow_models/object_detection/builders:model_builder",
+        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
+    ],
+)
+
+py_test(
+    name = "exporter_test",
+    srcs = [
+        "exporter_test.py",
+    ],
+    deps = [
+        ":exporter",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/builders:model_builder",
+        "//tensorflow_models/object_detection/core:model",
+        "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
+    ],
+)
+
+py_binary(
+    name = "export_inference_graph",
+    srcs = [
+        "export_inference_graph.py",
+    ],
+    deps = [
+        ":exporter",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/protos:pipeline_py_pb2",
+    ],
+)
+
+py_binary(
+    name = "create_pascal_tf_record",
+    srcs = [
+        "create_pascal_tf_record.py",
+    ],
+    deps = [
+        "//third_party/py/PIL:pil",
+        "//third_party/py/lxml",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow_models/object_detection/utils:label_map_util",
+    ],
+)
+
+py_test(
+    name = "create_pascal_tf_record_test",
+    srcs = [
+        "create_pascal_tf_record_test.py",
+    ],
+    deps = [
+        ":create_pascal_tf_record",
+        "//tensorflow",
+    ],
+)
+
+py_binary(
+    name = "create_pet_tf_record",
+    srcs = [
+        "create_pet_tf_record.py",
+    ],
+    deps = [
+        "//third_party/py/PIL:pil",
+        "//third_party/py/lxml",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/utils:dataset_util",
+        "//tensorflow_models/object_detection/utils:label_map_util",
+    ],
+)
--- a/object_detection/CONTRIBUTING.md
+++ b/object_detection/CONTRIBUTING.md
+# Contributing to the Tensorflow Object Detection API
+
+Patches to Tensorflow Object Detection API are welcome!
+
+We require contributors to fill out either the individual or corporate
+Contributor License Agreement (CLA).
+
+  * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
+  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
+
+Please follow the
+[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
+when submitting pull requests.
--- a/object_detection/README.md
+++ b/object_detection/README.md
+# Tensorflow Object Detection API
+Creating accurate machine learning models capable of localizing and identifying
+multiple objects in a single image remains a core challenge in computer vision.
+The TensorFlow Object Detection API is an open source framework built on top of
+TensorFlow that makes it easy to construct, train and deploy object detection
+models.  At Google we’ve certainly found this codebase to be useful for our
+computer vision needs, and we hope that you will as well.
+<p align="center">
+  <img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
+</p>
+Contributions to the codebase are welcome and we would love to hear back from
+you if you find this API useful.  Finally if you use the Tensorflow Object
+Detection API for a research publication, please consider citing:
+
+```
+"Speed/accuracy trade-offs for modern convolutional object detectors."
+Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
+Song Y, Guadarrama S, Murphy K, CVPR 2017
+```
+\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
+https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
+
+## Maintainers
+
+* Jonathan Huang, github: [jch1](https://github.com/jch1)
+* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
+* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
+* Chen Sun, github: [jesu9](https://github.com/jesu9)
+* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
+
+
+## Table of contents
+
+Before You Start:
+* <a href='g3doc/installation.md'>Installation</a><br>
+
+Quick Start:
+* <a href='object_detection_tutorial.ipynb'>
+      Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
+* <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
+
+Setup:
+* <a href='g3doc/configuring_jobs.md'>
+      Configuring an object detection pipeline</a><br>
+* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
+
+Running:
+* <a href='g3doc/running_locally.md'>Running locally</a><br>
+* <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
+
+Extras:
+* <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
+* <a href='g3doc/exporting_models.md'>
+      Exporting a trained model for inference</a><br>
+* <a href='g3doc/defining_your_own_model.md'>
+      Defining your own model architecture</a><br>
+
+## Release information
+
+### June 15, 2017
+
+In addition to our base Tensorflow detection model definitions, this
+release includes:
+
+* A selection of trainable detection models, including:
+  * Single Shot Multibox Detector (SSD) with MobileNet,
+  * SSD with Inception V2,
+  * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
+  * Faster RCNN with Resnet 101,
+  * Faster RCNN with Inception Resnet v2
+* Frozen weights (trained on the COCO dataset) for each of the above models to
+  be used for out-of-the-box inference purposes.
+* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
+  out-of-the-box inference with one of our released models
+* Convenient [local training](g3doc/running_locally.md) scripts as well as
+  distributed training and evaluation pipelines via
+  [Google Cloud](g3doc/running_on_cloud.md).
+
+
+<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
+Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
+Viacheslav Kovalevskyi, Kevin Murphy
--- a/object_detection/__init__.py
+++ b/object_detection/__init__.py
--- a/object_detection/anchor_generators/BUILD
+++ b/object_detection/anchor_generators/BUILD
+# Tensorflow Object Detection API: Anchor Generator implementations.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+# Apache 2.0
+py_library(
+    name = "grid_anchor_generator",
+    srcs = [
+        "grid_anchor_generator.py",
+    ],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:anchor_generator",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/utils:ops",
+    ],
+)
+
+py_test(
+    name = "grid_anchor_generator_test",
+    srcs = [
+        "grid_anchor_generator_test.py",
+    ],
+    deps = [
+        ":grid_anchor_generator",
+        "//tensorflow",
+    ],
+)
+
+py_library(
+    name = "multiple_grid_anchor_generator",
+    srcs = [
+        "multiple_grid_anchor_generator.py",
+    ],
+    deps = [
+        ":grid_anchor_generator",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:anchor_generator",
+        "//tensorflow_models/object_detection/core:box_list_ops",
+    ],
+)
+
+py_test(
+    name = "multiple_grid_anchor_generator_test",
+    srcs = [
+        "multiple_grid_anchor_generator_test.py",
+    ],
+    deps = [
+        ":multiple_grid_anchor_generator",
+        "//third_party/py/numpy",
+    ],
+)
--- a/object_detection/anchor_generators/__init__.py
+++ b/object_detection/anchor_generators/__init__.py
--- a/object_detection/anchor_generators/grid_anchor_generator.py
+++ b/object_detection/anchor_generators/grid_anchor_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly as used in Faster RCNN.
+
+Generates grid anchors on the fly as described in:
+"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
+Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import anchor_generator
+from object_detection.core import box_list
+from object_detection.utils import ops
+
+
+class GridAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Generates a grid of anchors at given scales and aspect ratios."""
+
+  def __init__(self,
+               scales=(0.5, 1.0, 2.0),
+               aspect_ratios=(0.5, 1.0, 2.0),
+               base_anchor_size=None,
+               anchor_stride=None,
+               anchor_offset=None):
+    """Constructs a GridAnchorGenerator.
+
+    Args:
+      scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
+      aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
+      base_anchor_size: base anchor size as height, width (
+                        (length-2 float32 list, default=[256, 256])
+      anchor_stride: difference in centers between base anchors for adjacent
+                     grid positions (length-2 float32 list, default=[16, 16])
+      anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+                     upper left element of the grid, this should be zero for
+                     feature networks with only VALID padding and even receptive
+                     field size, but may need additional calculation if other
+                     padding is used (length-2 float32 tensor, default=[0, 0])
+    """
+    # Handle argument defaults
+    if base_anchor_size is None:
+      base_anchor_size = [256, 256]
+    base_anchor_size = tf.constant(base_anchor_size, tf.float32)
+    if anchor_stride is None:
+      anchor_stride = [16, 16]
+    anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
+    if anchor_offset is None:
+      anchor_offset = [0, 0]
+    anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
+
+    self._scales = scales
+    self._aspect_ratios = aspect_ratios
+    self._base_anchor_size = base_anchor_size
+    self._anchor_stride = anchor_stride
+    self._anchor_offset = anchor_offset
+
+  def name_scope(self):
+    return 'GridAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    """Returns the number of anchors per spatial location.
+
+    Returns:
+      a list of integers, one for each expected feature map to be passed to
+      the `generate` function.
+    """
+    return [len(self._scales) * len(self._aspect_ratios)]
+
+  def _generate(self, feature_map_shape_list):
+    """Generates a collection of bounding boxes to be used as anchors.
+
+    Args:
+      feature_map_shape_list: list of pairs of convnet layer resolutions in the
+        format [(height_0, width_0)].  For example, setting
+        feature_map_shape_list=[(8, 8)] asks for anchors that correspond
+        to an 8x8 layer.  For this anchor generator, only lists of length 1 are
+        allowed.
+
+    Returns:
+      boxes: a BoxList holding a collection of N anchor boxes
+    Raises:
+      ValueError: if feature_map_shape_list, box_specs_list do not have the same
+        length.
+      ValueError: if feature_map_shape_list does not consist of pairs of
+        integers
+    """
+    if not (isinstance(feature_map_shape_list, list)
+            and len(feature_map_shape_list) == 1):
+      raise ValueError('feature_map_shape_list must be a list of length 1.')
+    if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                for list_item in feature_map_shape_list]):
+      raise ValueError('feature_map_shape_list must be a list of pairs.')
+    grid_height, grid_width = feature_map_shape_list[0]
+    scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
+                                                   self._aspect_ratios)
+    scales_grid = tf.reshape(scales_grid, [-1])
+    aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
+    return tile_anchors(grid_height,
+                        grid_width,
+                        scales_grid,
+                        aspect_ratios_grid,
+                        self._base_anchor_size,
+                        self._anchor_stride,
+                        self._anchor_offset)
+
+
+def tile_anchors(grid_height,
+                 grid_width,
+                 scales,
+                 aspect_ratios,
+                 base_anchor_size,
+                 anchor_stride,
+                 anchor_offset):
+  """Create a tiled set of anchors strided along a grid in image space.
+
+  This op creates a set of anchor boxes by placing a "basis" collection of
+  boxes with user-specified scales and aspect ratios centered at evenly
+  distributed points along a grid.  The basis collection is specified via the
+  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
+  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
+  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
+  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
+  placing it over its respective center.
+
+  Grid points are specified via grid_height, grid_width parameters as well as
+  the anchor_stride and anchor_offset parameters.
+
+  Args:
+    grid_height: size of the grid in the y direction (int or int scalar tensor)
+    grid_width: size of the grid in the x direction (int or int scalar tensor)
+    scales: a 1-d  (float) tensor representing the scale of each box in the
+      basis set.
+    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
+      box in the basis set.  The length of the scales and aspect_ratios tensors
+      must be equal.
+    base_anchor_size: base anchor size as [height, width]
+      (float tensor of shape [2])
+    anchor_stride: difference in centers between base anchors for adjacent grid
+                   positions (float tensor of shape [2])
+    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+                   upper left element of the grid, this should be zero for
+                   feature networks with only VALID padding and even receptive
+                   field size, but may need some additional calculation if other
+                   padding is used (float tensor of shape [2])
+  Returns:
+    a BoxList holding a collection of N anchor boxes
+  """
+  ratio_sqrts = tf.sqrt(aspect_ratios)
+  heights = scales / ratio_sqrts * base_anchor_size[0]
+  widths = scales * ratio_sqrts * base_anchor_size[1]
+
+  # Get a grid of box centers
+  y_centers = tf.to_float(tf.range(grid_height))
+  y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
+  x_centers = tf.to_float(tf.range(grid_width))
+  x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
+  x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
+
+  widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
+  heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
+  bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
+  bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
+  bbox_centers = tf.reshape(bbox_centers, [-1, 2])
+  bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
+  bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
+  return box_list.BoxList(bbox_corners)
+
+
+def _center_size_bbox_to_corners_bbox(centers, sizes):
+  """Converts bbox center-size representation to corners representation.
+
+  Args:
+    centers: a tensor with shape [N, 2] representing bounding box centers
+    sizes: a tensor with shape [N, 2] representing bounding boxes
+
+  Returns:
+    corners: tensor with shape [N, 4] representing bounding boxes in corners
+      representation
+  """
+  return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
--- a/object_detection/anchor_generators/grid_anchor_generator_test.py
+++ b/object_detection/anchor_generators/grid_anchor_generator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.grid_anchor_generator."""
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+
+
+class GridAnchorGeneratorTest(tf.test.TestCase):
+
+  def test_construct_single_anchor(self):
+    """Builds a 1x1 anchor grid to test the size of the output boxes."""
+    scales = [0.5, 1.0, 2.0]
+    aspect_ratios = [0.25, 1.0, 4.0]
+    anchor_offset = [7, -3]
+    exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+                          [-505, -131, 519, 125], [-57, -67, 71, 61],
+                          [-121, -131, 135, 125], [-249, -259, 263, 253],
+                          [-25, -131, 39, 125], [-57, -259, 71, 253],
+                          [-121, -515, 135, 509]]
+
+    anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+        scales, aspect_ratios,
+        anchor_offset=anchor_offset)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid(self):
+    base_anchor_size = [10, 10]
+    anchor_stride = [19, 19]
+    anchor_offset = [0, 0]
+    scales = [0.5, 1.0, 2.0]
+    aspect_ratios = [1.0]
+
+    exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+                          [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+                          [-5., 14., 5, 24], [-10., 9., 10, 29],
+                          [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+                          [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+                          [14., 14., 24, 24], [9., 9., 29, 29]]
+
+    anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+        scales,
+        aspect_ratios,
+        base_anchor_size=base_anchor_size,
+        anchor_stride=anchor_stride,
+        anchor_offset=anchor_offset)
+
+    anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+++ b/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly corresponding to multiple CNN layers.
+
+Generates grid anchors on the fly corresponding to multiple CNN layers as
+described in:
+"SSD: Single Shot MultiBox Detector"
+Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
+Cheng-Yang Fu, Alexander C. Berg
+(see Section 2.2: Choosing scales and aspect ratios for default boxes)
+"""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.core import anchor_generator
+from object_detection.core import box_list_ops
+
+
+class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Generate a grid of anchors for multiple CNN layers."""
+
+  def __init__(self,
+               box_specs_list,
+               base_anchor_size=None,
+               clip_window=None):
+    """Constructs a MultipleGridAnchorGenerator.
+
+    To construct anchors, at multiple grid resolutions, one must provide a
+    list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
+    size, a corresponding list of (scale, aspect ratio) box specifications.
+
+    For example:
+    box_specs_list = [[(.1, 1.0), (.1, 2.0)],  # for 8x8 grid
+                      [(.2, 1.0), (.3, 1.0), (.2, 2.0)]]  # for 4x4 grid
+
+    To support the fully convolutional setting, we pass grid sizes in at
+    generation time, while scale and aspect ratios are fixed at construction
+    time.
+
+    Args:
+      box_specs_list: list of list of (scale, aspect ratio) pairs with the
+        outside list having the same number of entries as feature_map_shape_list
+        (which is passed in at generation time).
+      base_anchor_size: base anchor size as [height, width]
+                        (length-2 float tensor, default=[256, 256]).
+      clip_window: a tensor of shape [4] specifying a window to which all
+        anchors should be clipped. If clip_window is None, then no clipping
+        is performed.
+
+    Raises:
+      ValueError: if box_specs_list is not a list of list of pairs
+      ValueError: if clip_window is not either None or a tensor of shape [4]
+    """
+    if isinstance(box_specs_list, list) and all(
+        [isinstance(list_item, list) for list_item in box_specs_list]):
+      self._box_specs = box_specs_list
+    else:
+      raise ValueError('box_specs_list is expected to be a '
+                       'list of lists of pairs')
+    if base_anchor_size is None:
+      base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
+    self._base_anchor_size = base_anchor_size
+    if clip_window is not None and clip_window.get_shape().as_list() != [4]:
+      raise ValueError('clip_window must either be None or a shape [4] tensor')
+    self._clip_window = clip_window
+    self._scales = []
+    self._aspect_ratios = []
+    for box_spec in self._box_specs:
+      if not all([isinstance(entry, tuple) and len(entry) == 2
+                  for entry in box_spec]):
+        raise ValueError('box_specs_list is expected to be a '
+                         'list of lists of pairs')
+      scales, aspect_ratios = zip(*box_spec)
+      self._scales.append(scales)
+      self._aspect_ratios.append(aspect_ratios)
+
+  def name_scope(self):
+    return 'MultipleGridAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    """Returns the number of anchors per spatial location.
+
+    Returns:
+      a list of integers, one for each expected feature map to be passed to
+      the Generate function.
+    """
+    return [len(box_specs) for box_specs in self._box_specs]
+
+  def _generate(self,
+                feature_map_shape_list,
+                im_height=1,
+                im_width=1,
+                anchor_strides=None,
+                anchor_offsets=None):
+    """Generates a collection of bounding boxes to be used as anchors.
+
+    The number of anchors generated for a single grid with shape MxM where we
+    place k boxes over each grid center is k*M^2 and thus the total number of
+    anchors is the sum over all grids. In our box_specs_list example
+    (see the constructor docstring), we would place two boxes over each grid
+    point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
+    thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
+    output anchors follows the order of how the grid sizes and box_specs are
+    specified (with box_spec index varying the fastest, followed by width
+    index, then height index, then grid index).
+
+    Args:
+      feature_map_shape_list: list of pairs of convnet layer resolutions in the
+        format [(height_0, width_0), (height_1, width_1), ...]. For example,
+        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
+        correspond to an 8x8 layer followed by a 7x7 layer.
+      im_height: the height of the image to generate the grid for. If both
+        im_height and im_width are 1, the generated anchors default to
+        normalized coordinates, otherwise absolute coordinates are used for the
+        grid.
+      im_width: the width of the image to generate the grid for. If both
+        im_height and im_width are 1, the generated anchors default to
+        normalized coordinates, otherwise absolute coordinates are used for the
+        grid.
+      anchor_strides: list of pairs of strides (in y and x directions
+        respectively). For example, setting
+        anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
+        corresponding to the first layer to be strided by .25 and those in the
+        second layer to be strided by .5 in both y and x directions. By
+        default, if anchor_strides=None, then they are set to be the reciprocal
+        of the corresponding grid sizes. The pairs can also be specified as
+        dynamic tf.int or tf.float numbers, e.g. for variable shape input
+        images.
+      anchor_offsets: list of pairs of offsets (in y and x directions
+        respectively). The offset specifies where we want the center of the
+        (0, 0)-th anchor to lie for each layer. For example, setting
+        anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
+        (0, 0)-th anchor of the first layer to lie at (.125, .125) in image
+        space and likewise that we want the (0, 0)-th anchor of the second
+        layer to lie at (.25, .25) in image space. By default, if
+        anchor_offsets=None, then they are set to be half of the corresponding
+        anchor stride. The pairs can also be specified as dynamic tf.int or
+        tf.float numbers, e.g. for variable shape input images.
+
+    Returns:
+      boxes: a BoxList holding a collection of N anchor boxes
+    Raises:
+      ValueError: if feature_map_shape_list, box_specs_list do not have the same
+        length.
+      ValueError: if feature_map_shape_list does not consist of pairs of
+        integers
+    """
+    if not (isinstance(feature_map_shape_list, list)
+            and len(feature_map_shape_list) == len(self._box_specs)):
+      raise ValueError('feature_map_shape_list must be a list with the same '
+                       'length as self._box_specs')
+    if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                for list_item in feature_map_shape_list]):
+      raise ValueError('feature_map_shape_list must be a list of pairs.')
+    if not anchor_strides:
+      anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
+                         tf.to_float(im_width) / tf.to_float(pair[1]))
+                        for pair in feature_map_shape_list]
+    if not anchor_offsets:
+      anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
+                        for stride in anchor_strides]
+    for arg, arg_name in zip([anchor_strides, anchor_offsets],
+                             ['anchor_strides', 'anchor_offsets']):
+      if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
+        raise ValueError('%s must be a list with the same length '
+                         'as self._box_specs' % arg_name)
+      if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                  for list_item in arg]):
+        raise ValueError('%s must be a list of pairs.' % arg_name)
+
+    anchor_grid_list = []
+    min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
+    base_anchor_size = min_im_shape * self._base_anchor_size
+    for grid_size, scales, aspect_ratios, stride, offset in zip(
+        feature_map_shape_list, self._scales, self._aspect_ratios,
+        anchor_strides, anchor_offsets):
+      anchor_grid_list.append(
+          grid_anchor_generator.tile_anchors(
+              grid_height=grid_size[0],
+              grid_width=grid_size[1],
+              scales=scales,
+              aspect_ratios=aspect_ratios,
+              base_anchor_size=base_anchor_size,
+              anchor_stride=stride,
+              anchor_offset=offset))
+    concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
+    num_anchors = concatenated_anchors.num_boxes_static()
+    if num_anchors is None:
+      num_anchors = concatenated_anchors.num_boxes()
+    if self._clip_window is not None:
+      clip_window = tf.multiply(
+          tf.to_float([im_height, im_width, im_height, im_width]),
+          self._clip_window)
+      concatenated_anchors = box_list_ops.clip_to_window(
+          concatenated_anchors, clip_window, filter_nonoverlapping=False)
+      # TODO: make reshape an option for the clip_to_window op
+      concatenated_anchors.set(
+          tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
+
+    stddevs_tensor = 0.01 * tf.ones(
+        [num_anchors, 4], dtype=tf.float32, name='stddevs')
+    concatenated_anchors.add_field('stddev', stddevs_tensor)
+
+    return concatenated_anchors
+
+
+def create_ssd_anchors(num_layers=6,
+                       min_scale=0.2,
+                       max_scale=0.95,
+                       aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
+                       base_anchor_size=None,
+                       reduce_boxes_in_lowest_layer=True):
+  """Creates MultipleGridAnchorGenerator for SSD anchors.
+
+  This function instantiates a MultipleGridAnchorGenerator that reproduces
+  ``default box`` construction proposed by Liu et al in the SSD paper.
+  See Section 2.2 for details. Grid sizes are assumed to be passed in
+  at generation time from finest resolution to coarsest resolution --- this is
+  used to (linearly) interpolate scales of anchor boxes corresponding to the
+  intermediate grid sizes.
+
+  Anchors that are returned by calling the `generate` method on the returned
+  MultipleGridAnchorGenerator object are always in normalized coordinates
+  and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
+
+  Args:
+    num_layers: integer number of grid layers to create anchors for (actual
+      grid sizes passed in at generation time)
+    min_scale: scale of anchors corresponding to finest resolution (float)
+    max_scale: scale of anchors corresponding to coarsest resolution (float)
+    aspect_ratios: list or tuple of (float) aspect ratios to place on each
+      grid point.
+    base_anchor_size: base anchor size as [height, width].
+    reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
+      boxes per location is used in the lowest layer.
+
+  Returns:
+    a MultipleGridAnchorGenerator
+  """
+  if base_anchor_size is None:
+    base_anchor_size = [1.0, 1.0]
+  base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
+  box_specs_list = []
+  scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
+            for i in range(num_layers)] + [1.0]
+  for layer, scale, scale_next in zip(
+      range(num_layers), scales[:-1], scales[1:]):
+    layer_box_specs = []
+    if layer == 0 and reduce_boxes_in_lowest_layer:
+      layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
+    else:
+      for aspect_ratio in aspect_ratios:
+        layer_box_specs.append((scale, aspect_ratio))
+        if aspect_ratio == 1.0:
+          layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
+    box_specs_list.append(layer_box_specs)
+  return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)
--- a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+++ b/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
+
+
+class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
+
+  def test_construct_single_anchor_grid(self):
+    """Builds a 1x1 anchor grid to test the size of the output boxes."""
+    exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+                          [-505, -131, 519, 125], [-57, -67, 71, 61],
+                          [-121, -131, 135, 125], [-249, -259, 263, 253],
+                          [-25, -131, 39, 125], [-57, -259, 71, 253],
+                          [-121, -515, 135, 509]]
+
+    base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
+    box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
+                       (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
+                       (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
+    anchor_generator = ag.MultipleGridAnchorGenerator(
+        box_specs_list, base_anchor_size)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)],
+                                        anchor_strides=[(16, 16)],
+                                        anchor_offsets=[(7, -3)])
+    anchor_corners = anchors.get()
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid(self):
+    base_anchor_size = tf.constant([10, 10], dtype=tf.float32)
+    box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
+
+    exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+                          [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+                          [-5., 14., 5, 24], [-10., 9., 10, 29],
+                          [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+                          [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+                          [14., 14., 24, 24], [9., 9., 29, 29]]
+
+    anchor_generator = ag.MultipleGridAnchorGenerator(
+        box_specs_list, base_anchor_size)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)],
+                                        anchor_strides=[(19, 19)],
+                                        anchor_offsets=[(0, 0)])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid_non_square(self):
+    base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
+    box_specs_list = [[(1.0, 1.0)]]
+
+    exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
+
+    anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
+                                                      base_anchor_size)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
+        1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid_unnormalized(self):
+    base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
+    box_specs_list = [[(1.0, 1.0)]]
+
+    exp_anchor_corners = [[0., 0., 320., 320.], [0., 320., 320., 640.]]
+
+    anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
+                                                      base_anchor_size)
+    anchors = anchor_generator.generate(
+        feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
+            2, dtype=tf.int32))],
+        im_height=320,
+        im_width=640)
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_multiple_grids(self):
+    base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5)]]
+
+    # height and width of box with .5 aspect ratio
+    h = np.sqrt(2)
+    w = 1.0/np.sqrt(2)
+    exp_small_grid_corners = [[-.25, -.25, .75, .75],
+                              [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
+                              [-.25, .25, .75, 1.25],
+                              [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
+                              [.25, -.25, 1.25, .75],
+                              [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
+                              [.25, .25, 1.25, 1.25],
+                              [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
+    # only test first entry of larger set of anchors
+    exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
+                            [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
+                            [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
+
+    anchor_generator = ag.MultipleGridAnchorGenerator(
+        box_specs_list, base_anchor_size)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
+                                        anchor_strides=[(.25, .25), (.5, .5)],
+                                        anchor_offsets=[(.125, .125),
+                                                        (.25, .25)])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertEquals(anchor_corners_out.shape, (56, 4))
+      big_grid_corners = anchor_corners_out[0:3, :]
+      small_grid_corners = anchor_corners_out[48:, :]
+      self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+      self.assertAllClose(big_grid_corners, exp_big_grid_corners)
+
+  def test_construct_multiple_grids_with_clipping(self):
+    base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5)]]
+
+    # height and width of box with .5 aspect ratio
+    h = np.sqrt(2)
+    w = 1.0/np.sqrt(2)
+    exp_small_grid_corners = [[0, 0, .75, .75],
+                              [0, 0, .25+.5*h, .25+.5*w],
+                              [0, .25, .75, 1],
+                              [0, .75-.5*w, .25+.5*h, 1],
+                              [.25, 0, 1, .75],
+                              [.75-.5*h, 0, 1, .25+.5*w],
+                              [.25, .25, 1, 1],
+                              [.75-.5*h, .75-.5*w, 1, 1]]
+
+    clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
+    anchor_generator = ag.MultipleGridAnchorGenerator(
+        box_specs_list, base_anchor_size, clip_window=clip_window)
+    anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+    anchor_corners = anchors.get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      small_grid_corners = anchor_corners_out[48:, :]
+      self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+
+  def test_invalid_box_specs(self):
+    # not all box specs are pairs
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5, .3)]]
+    with self.assertRaises(ValueError):
+      ag.MultipleGridAnchorGenerator(box_specs_list)
+
+    # box_specs_list is not a list of lists
+    box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
+    with self.assertRaises(ValueError):
+      ag.MultipleGridAnchorGenerator(box_specs_list)
+
+  def test_invalid_generate_arguments(self):
+    base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5)]]
+    anchor_generator = ag.MultipleGridAnchorGenerator(
+        box_specs_list, base_anchor_size)
+
+    # incompatible lengths with box_specs_list
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
+                                anchor_strides=[(.25, .25)],
+                                anchor_offsets=[(.125, .125), (.25, .25)])
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)],
+                                anchor_strides=[(.25, .25), (.5, .5)],
+                                anchor_offsets=[(.125, .125), (.25, .25)])
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
+                                anchor_strides=[(.5, .5)],
+                                anchor_offsets=[(.25, .25)])
+
+    # not pairs
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)],
+                                anchor_strides=[(.25, .25), (.5, .5)],
+                                anchor_offsets=[(.125, .125), (.25, .25)])
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
+                                anchor_strides=[(.25, .25, .1), (.5, .5)],
+                                anchor_offsets=[(.125, .125),
+                                                (.25, .25)])
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)],
+                                anchor_strides=[(.25, .25), (.5, .5)],
+                                anchor_offsets=[(.125), (.25)])
+
+
+class CreateSSDAnchorsTest(tf.test.TestCase):
+
+  def test_create_ssd_anchors_returns_correct_shape(self):
+    anchor_generator = ag.create_ssd_anchors(
+        num_layers=6, min_scale=0.2, max_scale=0.95,
+        aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
+        reduce_boxes_in_lowest_layer=True)
+
+    feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+                              (5, 5), (3, 3), (1, 1)]
+    anchors = anchor_generator.generate(
+        feature_map_shape_list=feature_map_shape_list)
+    anchor_corners = anchors.get()
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertEquals(anchor_corners_out.shape, (7308, 4))
+
+    anchor_generator = ag.create_ssd_anchors(
+        num_layers=6, min_scale=0.2, max_scale=0.95,
+        aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
+        reduce_boxes_in_lowest_layer=False)
+
+    feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+                              (5, 5), (3, 3), (1, 1)]
+    anchors = anchor_generator.generate(
+        feature_map_shape_list=feature_map_shape_list)
+    anchor_corners = anchors.get()
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertEquals(anchor_corners_out.shape, (11640, 4))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/box_coders/BUILD
+++ b/object_detection/box_coders/BUILD
+# Tensorflow Object Detection API: Box Coder implementations.
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+# Apache 2.0
+py_library(
+    name = "faster_rcnn_box_coder",
+    srcs = [
+        "faster_rcnn_box_coder.py",
+    ],
+    deps = [
+        "//tensorflow_models/object_detection/core:box_coder",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
+
+py_test(
+    name = "faster_rcnn_box_coder_test",
+    srcs = [
+        "faster_rcnn_box_coder_test.py",
+    ],
+    deps = [
+        ":faster_rcnn_box_coder",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
+
+py_library(
+    name = "keypoint_box_coder",
+    srcs = [
+        "keypoint_box_coder.py",
+    ],
+    deps = [
+        "//tensorflow_models/object_detection/core:box_coder",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+
+py_test(
+    name = "keypoint_box_coder_test",
+    srcs = [
+        "keypoint_box_coder_test.py",
+    ],
+    deps = [
+        ":keypoint_box_coder",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:box_list",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+
+py_library(
+    name = "mean_stddev_box_coder",
+    srcs = [
+        "mean_stddev_box_coder.py",
+    ],
+    deps = [
+        "//tensorflow_models/object_detection/core:box_coder",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
+
+py_test(
+    name = "mean_stddev_box_coder_test",
+    srcs = [
+        "mean_stddev_box_coder_test.py",
+    ],
+    deps = [
+        ":mean_stddev_box_coder",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
+
+py_library(
+    name = "square_box_coder",
+    srcs = [
+        "square_box_coder.py",
+    ],
+    deps = [
+        "//tensorflow_models/object_detection/core:box_coder",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
+
+py_test(
+    name = "square_box_coder_test",
+    srcs = [
+        "square_box_coder_test.py",
+    ],
+    deps = [
+        ":square_box_coder",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:box_list",
+    ],
+)
--- a/object_detection/box_coders/__init__.py
+++ b/object_detection/box_coders/__init__.py
--- a/object_detection/box_coders/faster_rcnn_box_coder.py
+++ b/object_detection/box_coders/faster_rcnn_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+  """Faster RCNN box coder."""
+
+  def __init__(self, scale_factors=None):
+    """Constructor for FasterRcnnBoxCoder.
+
+    Args:
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        If set to None, does not perform scaling. For Faster RCNN,
+        the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+    """
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw].
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.log(w / wa)
+    th = tf.log(h / ha)
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+    return tf.transpose(tf.stack([ty, tx, th, tw]))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
--- a/object_detection/box_coders/faster_rcnn_box_coder_test.py
+++ b/object_detection/box_coders/faster_rcnn_box_coder_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.core import box_list
+
+
+class FasterRcnnBoxCoderTest(tf.test.TestCase):
+
+  def test_get_correct_relative_codes_after_encoding(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+                          [-0.083333, -0.222222, -0.693147, -1.098612]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    scale_factors = [2, 3, 4, 5]
+    expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+                          [-0.166667, -0.666667, -2.772588, -5.493062]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=scale_factors)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_boxes_after_decoding(self):
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+                 [-0.083333, -0.222222, -0.693147, -1.098612]]
+    expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, = sess.run([boxes.get()])
+      self.assertAllClose(boxes_out, expected_boxes)
+
+  def test_get_correct_boxes_after_decoding_with_scaling(self):
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+                 [-0.166667, -0.666667, -2.772588, -5.493062]]
+    scale_factors = [2, 3, 4, 5]
+    expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=scale_factors)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, = sess.run([boxes.get()])
+      self.assertAllClose(boxes_out, expected_boxes)
+
+  def test_very_small_Width_nan_after_encoding(self):
+    boxes = [[10.0, 10.0, 10.0000001, 20.0]]
+    anchors = [[15.0, 12.0, 30.0, 18.0]]
+    expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/box_coders/keypoint_box_coder.py
+++ b/object_detection/box_coders/keypoint_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Keypoint box coder.
+
+The keypoint box coder follows the coding schema described below (this is
+similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
+to box coordinates):
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  tky0 = (ky0 - ya) / ha
+  tkx0 = (kx0 - xa) / ha
+  tky1 = (ky1 - ya) / ha
+  tkx1 = (kx1 - xa) / ha
+  ...
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
+  keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
+  anchor-encoded keypoint coordinates.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+EPSILON = 1e-8
+
+
+class KeypointBoxCoder(box_coder.BoxCoder):
+  """Keypoint box coder."""
+
+  def __init__(self, num_keypoints, scale_factors=None):
+    """Constructor for KeypointBoxCoder.
+
+    Args:
+      num_keypoints: Number of keypoints to encode/decode.
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        In addition to scaling ty and tx, the first 2 scalars are used to scale
+        the y and x coordinates of the keypoints as well. If set to None, does
+        not perform scaling.
+    """
+    self._num_keypoints = num_keypoints
+
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+    self._keypoint_scale_factors = None
+    if scale_factors is not None:
+      self._keypoint_scale_factors = tf.expand_dims(tf.tile(
+          [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
+          [num_keypoints]), 1)
+
+  @property
+  def code_size(self):
+    return 4 + self._num_keypoints * 2
+
+  def _encode(self, boxes, anchors):
+    """Encode a box and keypoint collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
+        tensors with the shape [N, 4], and keypoints are tensors with the shape
+        [N, num_keypoints, 2].
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
+      represent the y and x coordinates of the first keypoint, tky1 and tkx1
+      represent the y and x coordinates of the second keypoint, and so on.
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    keypoints = boxes.get_field(fields.BoxListFields.keypoints)
+    keypoints = tf.transpose(tf.reshape(keypoints,
+                                        [-1, self._num_keypoints * 2]))
+    num_boxes = boxes.num_boxes()
+
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.log(w / wa)
+    th = tf.log(h / ha)
+
+    tiled_anchor_centers = tf.tile(
+        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+    tiled_anchor_sizes = tf.tile(
+        tf.stack([ha, wa]), [self._num_keypoints, 1])
+    tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
+
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+      tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
+
+    tboxes = tf.stack([ty, tx, th, tw])
+    return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes and keypoints.
+
+    Args:
+      rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
+        anchor-encoded boxes and keypoints
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes and keypoints.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    num_codes = tf.shape(rel_codes)[0]
+    result = tf.unstack(tf.transpose(rel_codes))
+    ty, tx, th, tw = result[:4]
+    tkeypoints = result[4:]
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+      tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
+
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    decoded_boxes_keypoints = box_list.BoxList(
+        tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
+
+    tiled_anchor_centers = tf.tile(
+        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+    tiled_anchor_sizes = tf.tile(
+        tf.stack([ha, wa]), [self._num_keypoints, 1])
+    keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
+    keypoints = tf.reshape(tf.transpose(keypoints),
+                           [-1, self._num_keypoints, 2])
+    decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
+    return decoded_boxes_keypoints
--- a/object_detection/box_coders/keypoint_box_coder_test.py
+++ b/object_detection/box_coders/keypoint_box_coder_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.keypoint_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+
+class KeypointBoxCoderTest(tf.test.TestCase):
+
+  def test_get_correct_relative_codes_after_encoding(self):
+    boxes = [[10., 10., 20., 15.],
+             [0.2, 0.1, 0.5, 0.4]]
+    keypoints = [[[15., 12.], [10., 15.]],
+                 [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(keypoints[0])
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    expected_rel_codes = [
+        [-0.5, -0.416666, -0.405465, -0.182321,
+         -0.5, -0.5, -0.833333, 0.],
+        [-0.083333, -0.222222, -0.693147, -1.098612,
+         0.166667, -0.166667, -0.333333, -0.055556]
+    ]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+    boxes = [[10., 10., 20., 15.],
+             [0.2, 0.1, 0.5, 0.4]]
+    keypoints = [[[15., 12.], [10., 15.]],
+                 [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(keypoints[0])
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    scale_factors = [2, 3, 4, 5]
+    expected_rel_codes = [
+        [-1., -1.25, -1.62186, -0.911608,
+         -1.0, -1.5, -1.666667, 0.],
+        [-0.166667, -0.666667, -2.772588, -5.493062,
+         0.333333, -0.5, -0.666667, -0.166667]
+    ]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(
+        num_keypoints, scale_factors=scale_factors)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_boxes_after_decoding(self):
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [
+        [-0.5, -0.416666, -0.405465, -0.182321,
+         -0.5, -0.5, -0.833333, 0.],
+        [-0.083333, -0.222222, -0.693147, -1.098612,
+         0.166667, -0.166667, -0.333333, -0.055556]
+    ]
+    expected_boxes = [[10., 10., 20., 15.],
+                      [0.2, 0.1, 0.5, 0.4]]
+    expected_keypoints = [[[15., 12.], [10., 15.]],
+                          [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(expected_keypoints[0])
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, keypoints_out = sess.run(
+          [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+      self.assertAllClose(boxes_out, expected_boxes)
+      self.assertAllClose(keypoints_out, expected_keypoints)
+
+  def test_get_correct_boxes_after_decoding_with_scaling(self):
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [
+        [-1., -1.25, -1.62186, -0.911608,
+         -1.0, -1.5, -1.666667, 0.],
+        [-0.166667, -0.666667, -2.772588, -5.493062,
+         0.333333, -0.5, -0.666667, -0.166667]
+    ]
+    scale_factors = [2, 3, 4, 5]
+    expected_boxes = [[10., 10., 20., 15.],
+                      [0.2, 0.1, 0.5, 0.4]]
+    expected_keypoints = [[[15., 12.], [10., 15.]],
+                          [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(expected_keypoints[0])
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(
+        num_keypoints, scale_factors=scale_factors)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, keypoints_out = sess.run(
+          [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+      self.assertAllClose(boxes_out, expected_boxes)
+      self.assertAllClose(keypoints_out, expected_keypoints)
+
+  def test_very_small_width_nan_after_encoding(self):
+    boxes = [[10., 10., 10.0000001, 20.]]
+    keypoints = [[[10., 10.], [10.0000001, 20.]]]
+    anchors = [[15., 12., 30., 18.]]
+    expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
+                           -0.833333, -0.833333, -0.833333, 0.833333]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(2)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/object_detection/box_coders/mean_stddev_box_coder.py
+++ b/object_detection/box_coders/mean_stddev_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Mean stddev box coder.
+
+This box coder use the following coding schema to encode boxes:
+rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
+"""
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+
+class MeanStddevBoxCoder(box_coder.BoxCoder):
+  """Mean stddev box coder."""
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of N anchors.  We assume that anchors has an associated
+        stddev field.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes
+    Raises:
+      ValueError: if the anchors BoxList does not have a stddev field
+    """
+    if not anchors.has_field('stddev'):
+      raise ValueError('anchors must have a stddev field')
+    box_corners = boxes.get()
+    means = anchors.get()
+    stddev = anchors.get_field('stddev')
+    return (box_corners - means) / stddev
+
+  def _decode(self, rel_codes, anchors):
+    """Decode.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.  We assume that anchors has an associated
+        stddev field.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes
+    Raises:
+      ValueError: if the anchors BoxList does not have a stddev field
+    """
+    if not anchors.has_field('stddev'):
+      raise ValueError('anchors must have a stddev field')
+    means = anchors.get()
+    stddevs = anchors.get_field('stddev')
+    box_corners = rel_codes * stddevs + means
+    return box_list.BoxList(box_corners)