Mask R-CNN model added to models/research/mlperf_object_detection/Mask_RCNN (#4678)

* Create README.md * readme changed * readme changed * ResNet backbone completed. * FPN added * Create README.md * initial commit * files removed * initial commit * protobuf file removed

Mask R-CNN model added to models/research/mlperf_object_detection/Mask_RCNN (#4678)
* Create README.md * readme changed * readme changed * ResNet backbone completed. * FPN added * Create README.md * initial commit * files removed * initial commit * protobuf file removed
c308c03c · Mehdi Sharifzadeh · Taylor Robie · 32e7d660 · c308c03c · c308c03c
Commit c308c03c authored Jul 02, 2018 by Mehdi Sharifzadeh Committed by Taylor Robie Jul 02, 2018
20 changed files
--- a/research/mlperf_object_detection/Mask_RCNN/README.md
+++ b/research/mlperf_object_detection/Mask_RCNN/README.md
+Mask RCNN Implimentation adopted from models/research/object_detection/
--- a/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config
+++ b/research/mlperf_object_detection/Mask_RCNN/configs/e2e_mask_rcnn_R-50-C4_atrous.config
+# Mask R-CNN with Resnet-50 (v1), Atrous version
+# Configured for MSCOCO Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  faster_rcnn {
+    num_classes: 81
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 800
+        max_dimension: 1365
+      }
+    }
+    number_of_stages: 3
+    feature_extractor {
+      type: 'faster_rcnn_resnet50'
+      first_stage_features_stride: 8
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.125, 0.25, 0.5, 1.0, 2.0]	# base size=256**2 => anchor sizes=32 64 128 256 512
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 8
+        width_stride: 8
+      }
+    }
+    first_stage_atrous_rate: 2
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 512
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_batch_size: 512
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        predict_instance_masks: true
+        mask_height: 14
+        mask_width: 14
+        mask_prediction_conv_depth: 0
+        mask_prediction_num_conv_layers: 3	#from mask rcnn heads 
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+        conv_hyperparams {
+          op: CONV
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.01
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 2000
+        max_total_detections: 2000
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+    second_stage_mask_prediction_loss_weight: 4.0
+  }
+}
+
+train_config: {
+  batch_size: 4
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.01
+          schedule {
+            step: 120000
+            learning_rate: .001
+          }
+          schedule {
+            step: 160000
+            learning_rate: .0001
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  #fine_tune_checkpoint: "/home/mehdisharif/data/coco/resnet_v1_50.ckpt"
+  #from_detection_checkpoint: True
+  # Note: The below line limits the training process to 200K steps, which we
+  # empirically found to be sufficient enough to train the pets dataset. This
+  # effectively bypasses the learning rate schedule (the learning rate will
+  # never decay). Remove the below line to train indefinitely.
+  num_steps: 20000000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "/home/mehdisharif/data/coco/output2017/coco_train.record"
+  }
+  label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
+  load_instance_masks: true
+  mask_type: PNG_MASKS
+}
+
+eval_config: {
+  metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
+  num_examples: 50
+  # Note: The below line limits the evaluation process to 10 evaluations.
+  # Remove the below line to evaluate indefinitely.
+  max_evals: 1
+  num_visualizations: 50
+  eval_interval_secs: 120
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "/home/mehdisharif/data/coco/output2017/coco_val.record"
+  }
+  label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
+  load_instance_masks: true
+  mask_type: PNG_MASKS
+  shuffle: false
+  num_readers: 1
+}
--- a/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config
+++ b/research/mlperf_object_detection/Mask_RCNN/configs/mask_rcnn_resnet50_atrous_coco.config
+# Mask R-CNN with Resnet-50 (v1), Atrous version
+# Configured for MSCOCO Dataset.
+# Users should configure the fine_tune_checkpoint field in the train config as
+# well as the label_map_path and input_path fields in the train_input_reader and
+# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
+# should be configured.
+
+model {
+  faster_rcnn {
+    num_classes: 90
+    image_resizer {
+      keep_aspect_ratio_resizer {
+        min_dimension: 800
+        max_dimension: 1365
+      }
+    }
+    number_of_stages: 3
+    feature_extractor {
+      type: 'faster_rcnn_resnet50'
+      first_stage_features_stride: 8
+    }
+    first_stage_anchor_generator {
+      grid_anchor_generator {
+        scales: [0.25, 0.5, 1.0, 2.0]
+        aspect_ratios: [0.5, 1.0, 2.0]
+        height_stride: 8
+        width_stride: 8
+      }
+    }
+    first_stage_atrous_rate: 2
+    first_stage_box_predictor_conv_hyperparams {
+      op: CONV
+      regularizer {
+        l2_regularizer {
+          weight: 0.0
+        }
+      }
+      initializer {
+        truncated_normal_initializer {
+          stddev: 0.01
+        }
+      }
+    }
+    first_stage_nms_score_threshold: 0.0
+    first_stage_nms_iou_threshold: 0.7
+    first_stage_max_proposals: 300
+    first_stage_localization_loss_weight: 2.0
+    first_stage_objectness_loss_weight: 1.0
+    initial_crop_size: 14
+    maxpool_kernel_size: 2
+    maxpool_stride: 2
+    second_stage_box_predictor {
+      mask_rcnn_box_predictor {
+        use_dropout: false
+        dropout_keep_probability: 1.0
+        predict_instance_masks: true
+        mask_height: 33
+        mask_width: 33
+        mask_prediction_conv_depth: 0
+        mask_prediction_num_conv_layers: 4
+        fc_hyperparams {
+          op: FC
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            variance_scaling_initializer {
+              factor: 1.0
+              uniform: true
+              mode: FAN_AVG
+            }
+          }
+        }
+        conv_hyperparams {
+          op: CONV
+          regularizer {
+            l2_regularizer {
+              weight: 0.0
+            }
+          }
+          initializer {
+            truncated_normal_initializer {
+              stddev: 0.01
+            }
+          }
+        }
+      }
+    }
+    second_stage_post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.0
+        iou_threshold: 0.6
+        max_detections_per_class: 100
+        max_total_detections: 300
+      }
+      score_converter: SOFTMAX
+    }
+    second_stage_localization_loss_weight: 2.0
+    second_stage_classification_loss_weight: 1.0
+    second_stage_mask_prediction_loss_weight: 4.0
+  }
+}
+
+train_config: {
+  batch_size: 2
+  optimizer {
+    momentum_optimizer: {
+      learning_rate: {
+        manual_step_learning_rate {
+          initial_learning_rate: 0.0003
+          schedule {
+            step: 900000
+            learning_rate: .00003
+          }
+          schedule {
+            step: 1200000
+            learning_rate: .000003
+          }
+        }
+      }
+      momentum_optimizer_value: 0.9
+    }
+    use_moving_average: false
+  }
+  gradient_clipping_by_norm: 10.0
+  #fine_tune_checkpoint: ""
+  from_detection_checkpoint: false
+  # Note: The below line limits the training process to 200K steps, which we
+  # empirically found to be sufficient enough to train the pets dataset. This
+  # effectively bypasses the learning rate schedule (the learning rate will
+  # never decay). Remove the below line to train indefinitely.
+  #num_steps: 200000
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+}
+
+train_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/coco_train.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+  load_instance_masks: true
+  mask_type: PNG_MASKS
+}
+
+eval_config: {
+  metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
+  num_examples: 50
+  # Note: The below line limits the evaluation process to 10 evaluations.
+  # Remove the below line to evaluate indefinitely.
+  max_evals: 1
+  num_visualizations: 50
+  eval_interval_secs: 120
+}
+
+eval_input_reader: {
+  tf_record_input_reader {
+    input_path: "PATH_TO_BE_CONFIGURED/coco_val.record"
+  }
+  label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
+  load_instance_masks: true
+  mask_type: PNG_MASKS
+  shuffle: true
+  num_readers: 1
+}
--- a/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py
+++ b/research/mlperf_object_detection/Mask_RCNN/mask_rcnn_run_loop.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Training and evaluation for Mask_RCNN.
+
+  This module repeatedly runs 1 training epoch and then evaluation
+  ##add explanation for all the options!!!!!!!
+"""
+
+import functools
+import json
+import os
+
+from object_detection import evaluator
+from object_detection import trainer
+from object_detection.builders import dataset_builder
+from object_detection.builders import graph_rewriter_builder
+from object_detection.builders import model_builder
+from object_detection.utils import config_util
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+import tensorflow as tf
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+flags = tf.app.flags
+flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
+flags.DEFINE_integer('task', 0, 'task id')
+flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
+flags.DEFINE_boolean('clone_on_cpu', False,
+                     'Force clones to be deployed on CPU.  Note that even if '
+                     'set to False (allowing ops to run on gpu), some ops may '
+                     'still be run on the CPU if they have no GPU kernel.')
+flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
+                                           'replicas.')
+flags.DEFINE_integer('parameter_server_tasks', 0,
+                     'Number of parameter server tasks. If None, does not use '
+                     'a parameter server.')
+flags.DEFINE_string('train_dir', '',
+                    'Directory to save the checkpoints and training summaries.')
+
+flags.DEFINE_string('pipeline_config_path', '',
+                    'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
+                    'file. If provided, other configs are ignored')
+
+flags.DEFINE_boolean('eval_training_data', False,
+                     'If training data should be evaluated for this job.')
+
+flags.DEFINE_string('eval_dir', '',
+                    'Directory to write eval summaries to.')
+
+flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
+                                        'evaluation. Overrides the `max_evals`'
+                                        ' parameter in the provided config.')
+flags.DEFINE_float('box_min_ap', -1, 'Option to run until the box average'
+                                    'precision reaches this number')
+flags.DEFINE_float('mask_min_ap', -1, 'Option to run until the mask average'
+                                     'precision reaches this number')
+flags.DEFINE_integer('epochs_between_evals', 1, 'Number of training epochs to '
+                                                 'run before running eval.')
+FLAGS = flags.FLAGS
+
+
+def stopping_criteria_met(eval_metrics, mask_min_ap, box_min_ap):
+  """Returns true if both of the min precision criteria are met in the given
+  evaluation metrics.
+
+  Args:
+    eval_metrics: dict of metrics names as keys and their corresponding values,
+      containing "DetectionMasks_Precision/mAP", and
+      "DetectionBoxes_Precision/mAP" fields.
+    mask_min_ap: minimum desired mask average precision, will be ignored if -1
+    box_min_ap: minimum desired box average precision, will be ignored if -1
+
+  Returns:
+    True if non -1 criteria are met, false o.w.
+  """
+  assert mask_min_ap == -1 or 0 < mask_min_ap < 1
+  assert box_min_ap == -1 or 0 < box_min_ap < 1
+  try:
+    mask_mAP_reached = eval_metrics['DetectionMasks_Precision/mAP']
+    box_mAP_reached = eval_metrics['DetectionBoxes_Precision/mAP']
+  except KeyError as err:
+    raise Exception('eval_metrics dict does not contain the mAP field') from err
+
+  return (mask_min_ap == -1 or mask_mAP_reached > mask_min_ap) & \
+         (box_min_ap == -1 or box_mAP_reached > box_min_ap) & \
+         (mask_min_ap != -1 or box_min_ap != -1)
+
+
+def main(_):
+  assert FLAGS.train_dir, '`train_dir` is missing.'
+  assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
+  assert FLAGS.eval_dir, '`eval_dir` is missing.'
+
+  configs = config_util.get_configs_from_pipeline_file(
+      FLAGS.pipeline_config_path)
+  if FLAGS.task == 0:
+    tf.gfile.MakeDirs(FLAGS.train_dir)
+    tf.gfile.Copy(FLAGS.pipeline_config_path,
+                  os.path.join(FLAGS.train_dir, 'pipeline.config'),
+                  overwrite=True)
+
+  tf.gfile.MakeDirs(FLAGS.eval_dir)
+  tf.gfile.Copy(FLAGS.pipeline_config_path,
+                os.path.join(FLAGS.eval_dir, 'pipeline.config'),
+                overwrite=True)
+
+  model_config = configs['model']
+
+  train_config = configs['train_config']
+  train_input_config = configs['train_input_config']
+
+  eval_config = configs['eval_config']
+  if FLAGS.eval_training_data:
+    eval_input_config = configs['train_input_config']
+  else:
+    eval_input_config = configs['eval_input_config']
+
+  # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training.
+  # total number of training is set to total_num_epochs provided in the config
+  if train_config.num_steps:
+    total_num_epochs = train_config.num_steps
+    train_config.num_steps = FLAGS.epochs_between_evals
+    total_training_cycle = total_num_epochs // train_config.num_steps
+  else:
+    # TODO(mehdi): make it run indef
+    total_num_epochs = 20000000
+    train_config.num_steps = FLAGS.epochs_between_evals
+    total_training_cycle = total_num_epochs // train_config.num_steps
+
+  train_model_fn = functools.partial(model_builder.build,
+                                     model_config=model_config,
+                                     is_training=True)
+  eval_model_fn = functools.partial(model_builder.build,
+                                    model_config=model_config,
+                                    is_training=False)
+
+  def get_next(config):
+    return dataset_util.make_initializable_iterator(
+        dataset_builder.build(config)).get_next()
+
+  # functions to create a tensor input dictionary for both training & evaluation
+  train_input_dict_fn = functools.partial(get_next, train_input_config)
+  eval_input_dict_fn = functools.partial(get_next, eval_input_config)
+
+  # If not explicitly specified in the constructor and the TF_CONFIG
+  # environment variable is present, load cluster_spec from TF_CONFIG.
+  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
+  cluster_data = env.get('cluster', None)
+  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
+  task_data = env.get('task', {'type': 'master', 'index': 0})
+  task_info = type('TaskSpec', (object,), task_data)
+
+  # Parameters for a single worker.
+  parameter_server_tasks = 0
+  worker_replicas = 1
+  worker_job_name = 'lonely_worker'
+  task = 0
+  is_chief = True
+  master = ''
+
+  if cluster_data and 'worker' in cluster_data:
+    # Number of total worker replicas include "worker"s and the "master".
+    worker_replicas = len(cluster_data['worker']) + 1
+  if cluster_data and 'ps' in cluster_data:
+    parameter_server_tasks = len(cluster_data['ps'])
+
+  if worker_replicas > 1 and parameter_server_tasks < 1:
+    raise ValueError('At least 1 ps task is needed for distributed training.')
+
+  if worker_replicas >= 1 and parameter_server_tasks > 0:
+    # Set up distributed training.
+    server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
+                             job_name=task_info.type,
+                             task_index=task_info.index)
+    if task_info.type == 'ps':
+      server.join()
+      return
+
+    worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
+    task = task_info.index
+    is_chief = (task_info.type == 'master')
+    master = server.target
+
+  label_map = label_map_util.load_labelmap(eval_input_config.label_map_path)
+  max_num_classes = max([item.id for item in label_map.item])
+  categories = label_map_util.convert_label_map_to_categories(label_map,
+                                                              max_num_classes)
+
+  if FLAGS.run_once:
+    eval_config.max_evals = 1
+
+  train_graph_rewriter_fn = eval_graph_rewriter_fn = None
+  if 'graph_rewriter_config' in configs:
+    train_graph_rewriter_fn = graph_rewriter_builder.build(
+        configs['graph_rewriter_config'], is_training=True)
+    eval_graph_rewriter_fn = graph_rewriter_builder.build(
+        configs['eval_rewriter_config'], is_training=False)
+
+  def train():
+    return trainer.train(create_tensor_dict_fn=train_input_dict_fn,
+                         create_model_fn=train_model_fn,
+                         train_config=train_config, master=master, task=task,
+                         num_clones=FLAGS.num_clones,
+                         worker_replicas=worker_replicas,
+                         clone_on_cpu=FLAGS.clone_on_cpu,
+                         ps_tasks=parameter_server_tasks,
+                         worker_job_name=worker_job_name,
+                         is_chief=is_chief, train_dir=FLAGS.train_dir,
+                         graph_hook_fn=train_graph_rewriter_fn)
+
+  def evaluate():
+    return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config,
+                              categories, FLAGS.train_dir, FLAGS.eval_dir,
+                              graph_hook_fn=eval_graph_rewriter_fn)
+
+  for cycle_index in range(total_training_cycle):
+    tf.logging.info('Starting a training cycle: %d/%d',
+                    cycle_index, total_training_cycle)
+    train()
+    tf.logging.info('Starting to evaluate.')
+    eval_metrics = evaluate()
+    if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap):
+      tf.logging.info('Stopping criteria met. Training stopped')
+      break
+
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/CONTRIBUTING.md
+# Contributing to the Tensorflow Object Detection API
+
+Patches to Tensorflow Object Detection API are welcome!
+
+We require contributors to fill out either the individual or corporate
+Contributor License Agreement (CLA).
+
+  * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
+  * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
+
+Please follow the
+[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
+when submitting pull requests.
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/README.md
+
+# Tensorflow Object Detection API
+Creating accurate machine learning models capable of localizing and identifying
+multiple objects in a single image remains a core challenge in computer vision.
+The TensorFlow Object Detection API is an open source framework built on top of
+TensorFlow that makes it easy to construct, train and deploy object detection
+models.  At Google we’ve certainly found this codebase to be useful for our
+computer vision needs, and we hope that you will as well.
+<p align="center">
+  <img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
+</p>
+Contributions to the codebase are welcome and we would love to hear back from
+you if you find this API useful.  Finally if you use the Tensorflow Object
+Detection API for a research publication, please consider citing:
+
+```
+"Speed/accuracy trade-offs for modern convolutional object detectors."
+Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
+Song Y, Guadarrama S, Murphy K, CVPR 2017
+```
+\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
+https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
+
+<p align="center">
+  <img src="g3doc/img/tf-od-api-logo.png" width=140 height=195>
+</p>
+
+## Maintainers
+
+* Jonathan Huang, github: [jch1](https://github.com/jch1)
+* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
+* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
+* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
+* Chen Sun, github: [jesu9](https://github.com/jesu9)
+* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
+* Alireza Fathi, github: [afathi3](https://github.com/afathi3)
+* Zhichao Lu, github: [pkulzc](https://github.com/pkulzc)
+
+
+## Table of contents
+
+Quick Start:
+
+  * <a href='object_detection_tutorial.ipynb'>
+      Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
+  * <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
+
+Setup:
+
+  * <a href='g3doc/installation.md'>Installation</a><br>
+  * <a href='g3doc/configuring_jobs.md'>
+      Configuring an object detection pipeline</a><br>
+  * <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
+
+Running:
+
+  * <a href='g3doc/running_locally.md'>Running locally</a><br>
+  * <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
+
+Extras:
+
+  * <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
+  * <a href='g3doc/exporting_models.md'>
+      Exporting a trained model for inference</a><br>
+  * <a href='g3doc/defining_your_own_model.md'>
+      Defining your own model architecture</a><br>
+  * <a href='g3doc/using_your_own_dataset.md'>
+      Bringing in your own dataset</a><br>
+  * <a href='g3doc/evaluation_protocols.md'>
+      Supported object detection evaluation protocols</a><br>
+  * <a href='g3doc/oid_inference_and_evaluation.md'>
+      Inference and evaluation on the Open Images dataset</a><br>
+  * <a href='g3doc/instance_segmentation.md'>
+      Run an instance segmentation model</a><br>
+
+## Getting Help
+
+To get help with issues you may encounter using the Tensorflow Object Detection
+API, create a new question on [StackOverflow](https://stackoverflow.com/) with
+the tags "tensorflow" and "object-detection".
+
+Please report bugs (actually broken code, not usage questions) to the
+tensorflow/models GitHub
+[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
+issue name with "object_detection".
+
+Please check [FAQ](g3doc/faq.md) for frequently asked questions before
+reporting an issue.
+
+
+## Release information
+
+### April 30, 2018
+
+We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1.
+Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels.
+The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
+For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
+
+<b>Thanks to contributors</b>: Chen Sun, David Ross
+
+### April 2, 2018
+
+Supercharge your mobile phones with the next generation mobile object detector!
+We are adding support for MobileNet V2 with SSDLite presented in
+[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
+This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
+Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
+
+<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
+
+### February 9, 2018
+
+We now support instance segmentation!!  In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
+[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop.
+Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model
+that predicts masks in addition to object bounding boxes.
+
+<b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang
+
+### November 17, 2017
+
+As a part of the Open Images V3 release we have released:
+
+* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images).
+* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)).
+* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
+
+See more information on the [Open Images website](https://github.com/openimages/dataset)!
+
+<b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova
+
+### November 6, 2017
+
+We have re-released faster versions of our (pre-trained) models in the
+<a href='g3doc/detection_model_zoo.md'>model zoo</a>.  In addition to what
+was available before, we are also adding Faster R-CNN models trained on COCO
+with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN
+with Resnet-101 model trained on the KITTI dataset.
+
+<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
+Tal Remez, Chen Sun.
+
+### October 31, 2017
+
+We have released a new state-of-the-art model for object detection using
+the Faster-RCNN with the
+[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This
+model achieves mAP of 43.1% on the test-dev validation dataset for COCO,
+improving on the best available model in the zoo by 6% in terms
+of absolute mAP.
+
+<b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le
+
+### August 11, 2017
+
+We have released an update to the [Android Detect
+demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
+which will now run models trained using the Tensorflow Object
+Detection API on an Android device.  By default, it currently runs a
+frozen SSD w/Mobilenet detector trained on COCO, but we encourage
+you to try out other detection models!
+
+<b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp
+
+
+### June 15, 2017
+
+In addition to our base Tensorflow detection model definitions, this
+release includes:
+
+* A selection of trainable detection models, including:
+  * Single Shot Multibox Detector (SSD) with MobileNet,
+  * SSD with Inception V2,
+  * Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
+  * Faster RCNN with Resnet 101,
+  * Faster RCNN with Inception Resnet v2
+* Frozen weights (trained on the COCO dataset) for each of the above models to
+  be used for out-of-the-box inference purposes.
+* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
+  out-of-the-box inference with one of our released models
+* Convenient [local training](g3doc/running_locally.md) scripts as well as
+  distributed training and evaluation pipelines via
+  [Google Cloud](g3doc/running_on_cloud.md).
+
+
+<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
+Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
+Viacheslav Kovalevskyi, Kevin Murphy
+
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/__init__.py
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/__init__.py
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly as used in Faster RCNN.
+
+Generates grid anchors on the fly as described in:
+"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
+Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import anchor_generator
+from object_detection.core import box_list
+from object_detection.utils import ops
+
+
+class GridAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Generates a grid of anchors at given scales and aspect ratios."""
+
+  def __init__(self,
+               scales=(0.5, 1.0, 2.0),
+               aspect_ratios=(0.5, 1.0, 2.0),
+               base_anchor_size=None,
+               anchor_stride=None,
+               anchor_offset=None):
+    """Constructs a GridAnchorGenerator.
+
+    Args:
+      scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
+      aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
+      base_anchor_size: base anchor size as height, width (
+                        (length-2 float32 list or tensor, default=[256, 256])
+      anchor_stride: difference in centers between base anchors for adjacent
+                     grid positions (length-2 float32 list or tensor,
+                     default=[16, 16])
+      anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+                     upper left element of the grid, this should be zero for
+                     feature networks with only VALID padding and even receptive
+                     field size, but may need additional calculation if other
+                     padding is used (length-2 float32 list or tensor,
+                     default=[0, 0])
+    """
+    # Handle argument defaults
+    if base_anchor_size is None:
+      base_anchor_size = [256, 256]
+    base_anchor_size = tf.to_float(tf.convert_to_tensor(base_anchor_size))
+    if anchor_stride is None:
+      anchor_stride = [16, 16]
+    anchor_stride = tf.to_float(tf.convert_to_tensor(anchor_stride))
+    if anchor_offset is None:
+      anchor_offset = [0, 0]
+    anchor_offset = tf.to_float(tf.convert_to_tensor(anchor_offset))
+
+    self._scales = scales
+    self._aspect_ratios = aspect_ratios
+    self._base_anchor_size = base_anchor_size
+    self._anchor_stride = anchor_stride
+    self._anchor_offset = anchor_offset
+
+  def name_scope(self):
+    return 'GridAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    """Returns the number of anchors per spatial location.
+
+    Returns:
+      a list of integers, one for each expected feature map to be passed to
+      the `generate` function.
+    """
+    return [len(self._scales) * len(self._aspect_ratios)]
+
+  def _generate(self, feature_map_shape_list):
+    """Generates a collection of bounding boxes to be used as anchors.
+
+    Args:
+      feature_map_shape_list: list of pairs of convnet layer resolutions in the
+        format [(height_0, width_0)].  For example, setting
+        feature_map_shape_list=[(8, 8)] asks for anchors that correspond
+        to an 8x8 layer.  For this anchor generator, only lists of length 1 are
+        allowed.
+
+    Returns:
+      boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+        the input feature map shapes.
+
+    Raises:
+      ValueError: if feature_map_shape_list, box_specs_list do not have the same
+        length.
+      ValueError: if feature_map_shape_list does not consist of pairs of
+        integers
+    """
+    if not (isinstance(feature_map_shape_list, list)
+            and len(feature_map_shape_list) == 1):
+      raise ValueError('feature_map_shape_list must be a list of length 1.')
+    if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                for list_item in feature_map_shape_list]):
+      raise ValueError('feature_map_shape_list must be a list of pairs.')
+    grid_height, grid_width = feature_map_shape_list[0]
+    scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
+                                                   self._aspect_ratios)
+    scales_grid = tf.reshape(scales_grid, [-1])
+    aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
+    anchors = tile_anchors(grid_height,
+                           grid_width,
+                           scales_grid,
+                           aspect_ratios_grid,
+                           self._base_anchor_size,
+                           self._anchor_stride,
+                           self._anchor_offset)
+
+    num_anchors = anchors.num_boxes_static()
+    if num_anchors is None:
+      num_anchors = anchors.num_boxes()
+    anchor_indices = tf.zeros([num_anchors])
+    anchors.add_field('feature_map_index', anchor_indices)
+    return [anchors]
+
+
+def tile_anchors(grid_height,
+                 grid_width,
+                 scales,
+                 aspect_ratios,
+                 base_anchor_size,
+                 anchor_stride,
+                 anchor_offset):
+  """Create a tiled set of anchors strided along a grid in image space.
+
+  This op creates a set of anchor boxes by placing a "basis" collection of
+  boxes with user-specified scales and aspect ratios centered at evenly
+  distributed points along a grid.  The basis collection is specified via the
+  scale and aspect_ratios arguments.  For example, setting scales=[.1, .2, .2]
+  and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
+  .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
+  and aspect ratio 1/2.  Each box is multiplied by "base_anchor_size" before
+  placing it over its respective center.
+
+  Grid points are specified via grid_height, grid_width parameters as well as
+  the anchor_stride and anchor_offset parameters.
+
+  Args:
+    grid_height: size of the grid in the y direction (int or int scalar tensor)
+    grid_width: size of the grid in the x direction (int or int scalar tensor)
+    scales: a 1-d  (float) tensor representing the scale of each box in the
+      basis set.
+    aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
+      box in the basis set.  The length of the scales and aspect_ratios tensors
+      must be equal.
+    base_anchor_size: base anchor size as [height, width]
+      (float tensor of shape [2])
+    anchor_stride: difference in centers between base anchors for adjacent grid
+                   positions (float tensor of shape [2])
+    anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+                   upper left element of the grid, this should be zero for
+                   feature networks with only VALID padding and even receptive
+                   field size, but may need some additional calculation if other
+                   padding is used (float tensor of shape [2])
+  Returns:
+    a BoxList holding a collection of N anchor boxes
+  """
+  ratio_sqrts = tf.sqrt(aspect_ratios)
+  heights = scales / ratio_sqrts * base_anchor_size[0]
+  widths = scales * ratio_sqrts * base_anchor_size[1]
+
+  # Get a grid of box centers
+  y_centers = tf.to_float(tf.range(grid_height))
+  y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
+  x_centers = tf.to_float(tf.range(grid_width))
+  x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
+  x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
+
+  widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
+  heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
+  bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
+  bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
+  bbox_centers = tf.reshape(bbox_centers, [-1, 2])
+  bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
+  bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
+  return box_list.BoxList(bbox_corners)
+
+
+def _center_size_bbox_to_corners_bbox(centers, sizes):
+  """Converts bbox center-size representation to corners representation.
+
+  Args:
+    centers: a tensor with shape [N, 2] representing bounding box centers
+    sizes: a tensor with shape [N, 2] representing bounding boxes
+
+  Returns:
+    corners: tensor with shape [N, 4] representing bounding boxes in corners
+      representation
+  """
+  return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/grid_anchor_generator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.grid_anchor_generator."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.utils import test_case
+
+
+class GridAnchorGeneratorTest(test_case.TestCase):
+
+  def test_construct_single_anchor(self):
+    """Builds a 1x1 anchor grid to test the size of the output boxes."""
+    def graph_fn():
+      scales = [0.5, 1.0, 2.0]
+      aspect_ratios = [0.25, 1.0, 4.0]
+      anchor_offset = [7, -3]
+      anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+          scales, aspect_ratios, anchor_offset=anchor_offset)
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
+      anchor_corners = anchors_list[0].get()
+      return (anchor_corners,)
+    exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+                          [-505, -131, 519, 125], [-57, -67, 71, 61],
+                          [-121, -131, 135, 125], [-249, -259, 263, 253],
+                          [-25, -131, 39, 125], [-57, -259, 71, 253],
+                          [-121, -515, 135, 509]]
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid(self):
+    def graph_fn():
+      base_anchor_size = [10, 10]
+      anchor_stride = [19, 19]
+      anchor_offset = [0, 0]
+      scales = [0.5, 1.0, 2.0]
+      aspect_ratios = [1.0]
+
+      anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+          scales,
+          aspect_ratios,
+          base_anchor_size=base_anchor_size,
+          anchor_stride=anchor_stride,
+          anchor_offset=anchor_offset)
+
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
+      anchor_corners = anchors_list[0].get()
+      return (anchor_corners,)
+    exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+                          [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+                          [-5., 14., 5, 24], [-10., 9., 10, 29],
+                          [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+                          [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+                          [14., 14., 24, 24], [9., 9., 29, 29]]
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
+    def graph_fn(feature_map_height, feature_map_width):
+      base_anchor_size = [10, 10]
+      anchor_stride = [19, 19]
+      anchor_offset = [0, 0]
+      scales = [0.5, 1.0, 2.0]
+      aspect_ratios = [1.0]
+      anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+          scales,
+          aspect_ratios,
+          base_anchor_size=base_anchor_size,
+          anchor_stride=anchor_stride,
+          anchor_offset=anchor_offset)
+
+      anchors_list = anchor_generator.generate(
+          feature_map_shape_list=[(feature_map_height, feature_map_width)])
+      anchor_corners = anchors_list[0].get()
+      return (anchor_corners,)
+
+    exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+                          [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+                          [-5., 14., 5, 24], [-10., 9., 10, 29],
+                          [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+                          [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+                          [14., 14., 24, 24], [9., 9., 29, 29]]
+    anchor_corners_out = self.execute_cpu(graph_fn,
+                                          [np.array(2, dtype=np.int32),
+                                           np.array(2, dtype=np.int32)])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly corresponding to multiple CNN layers.
+
+Generates grid anchors on the fly corresponding to multiple CNN layers as
+described in:
+"SSD: Single Shot MultiBox Detector"
+Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
+Cheng-Yang Fu, Alexander C. Berg
+(see Section 2.2: Choosing scales and aspect ratios for default boxes)
+"""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.core import anchor_generator
+from object_detection.core import box_list_ops
+
+
+class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Generate a grid of anchors for multiple CNN layers."""
+
+  def __init__(self,
+               box_specs_list,
+               base_anchor_size=None,
+               anchor_strides=None,
+               anchor_offsets=None,
+               clip_window=None):
+    """Constructs a MultipleGridAnchorGenerator.
+
+    To construct anchors, at multiple grid resolutions, one must provide a
+    list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
+    size, a corresponding list of (scale, aspect ratio) box specifications.
+
+    For example:
+    box_specs_list = [[(.1, 1.0), (.1, 2.0)],  # for 8x8 grid
+                      [(.2, 1.0), (.3, 1.0), (.2, 2.0)]]  # for 4x4 grid
+
+    To support the fully convolutional setting, we pass grid sizes in at
+    generation time, while scale and aspect ratios are fixed at construction
+    time.
+
+    Args:
+      box_specs_list: list of list of (scale, aspect ratio) pairs with the
+        outside list having the same number of entries as feature_map_shape_list
+        (which is passed in at generation time).
+      base_anchor_size: base anchor size as [height, width]
+                        (length-2 float tensor, default=[1.0, 1.0]).
+                        The height and width values are normalized to the
+                        minimum dimension of the input height and width, so that
+                        when the base anchor height equals the base anchor
+                        width, the resulting anchor is square even if the input
+                        image is not square.
+      anchor_strides: list of pairs of strides in pixels (in y and x directions
+        respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
+        means that we want the anchors corresponding to the first layer to be
+        strided by 25 pixels and those in the second layer to be strided by 50
+        pixels in both y and x directions. If anchor_strides=None, they are set
+        to be the reciprocal of the corresponding feature map shapes.
+      anchor_offsets: list of pairs of offsets in pixels (in y and x directions
+        respectively). The offset specifies where we want the center of the
+        (0, 0)-th anchor to lie for each layer. For example, setting
+        anchor_offsets=[(10, 10), (20, 20)]) means that we want the
+        (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
+        and likewise that we want the (0, 0)-th anchor of the second layer to
+        lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
+        set to be half of the corresponding anchor stride.
+      clip_window: a tensor of shape [4] specifying a window to which all
+        anchors should be clipped. If clip_window is None, then no clipping
+        is performed.
+
+    Raises:
+      ValueError: if box_specs_list is not a list of list of pairs
+      ValueError: if clip_window is not either None or a tensor of shape [4]
+    """
+    if isinstance(box_specs_list, list) and all(
+        [isinstance(list_item, list) for list_item in box_specs_list]):
+      self._box_specs = box_specs_list
+    else:
+      raise ValueError('box_specs_list is expected to be a '
+                       'list of lists of pairs')
+    if base_anchor_size is None:
+      base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
+    self._base_anchor_size = base_anchor_size
+    self._anchor_strides = anchor_strides
+    self._anchor_offsets = anchor_offsets
+    if clip_window is not None and clip_window.get_shape().as_list() != [4]:
+      raise ValueError('clip_window must either be None or a shape [4] tensor')
+    self._clip_window = clip_window
+    self._scales = []
+    self._aspect_ratios = []
+    for box_spec in self._box_specs:
+      if not all([isinstance(entry, tuple) and len(entry) == 2
+                  for entry in box_spec]):
+        raise ValueError('box_specs_list is expected to be a '
+                         'list of lists of pairs')
+      scales, aspect_ratios = zip(*box_spec)
+      self._scales.append(scales)
+      self._aspect_ratios.append(aspect_ratios)
+
+    for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
+                             ['anchor_strides', 'anchor_offsets']):
+      if arg and not (isinstance(arg, list) and
+                      len(arg) == len(self._box_specs)):
+        raise ValueError('%s must be a list with the same length '
+                         'as self._box_specs' % arg_name)
+      if arg and not all([
+          isinstance(list_item, tuple) and len(list_item) == 2
+          for list_item in arg
+      ]):
+        raise ValueError('%s must be a list of pairs.' % arg_name)
+
+  def name_scope(self):
+    return 'MultipleGridAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    """Returns the number of anchors per spatial location.
+
+    Returns:
+      a list of integers, one for each expected feature map to be passed to
+      the Generate function.
+    """
+    return [len(box_specs) for box_specs in self._box_specs]
+
+  def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
+    """Generates a collection of bounding boxes to be used as anchors.
+
+    The number of anchors generated for a single grid with shape MxM where we
+    place k boxes over each grid center is k*M^2 and thus the total number of
+    anchors is the sum over all grids. In our box_specs_list example
+    (see the constructor docstring), we would place two boxes over each grid
+    point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
+    thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
+    output anchors follows the order of how the grid sizes and box_specs are
+    specified (with box_spec index varying the fastest, followed by width
+    index, then height index, then grid index).
+
+    Args:
+      feature_map_shape_list: list of pairs of convnet layer resolutions in the
+        format [(height_0, width_0), (height_1, width_1), ...]. For example,
+        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
+        correspond to an 8x8 layer followed by a 7x7 layer.
+      im_height: the height of the image to generate the grid for. If both
+        im_height and im_width are 1, the generated anchors default to
+        normalized coordinates, otherwise absolute coordinates are used for the
+        grid.
+      im_width: the width of the image to generate the grid for. If both
+        im_height and im_width are 1, the generated anchors default to
+        normalized coordinates, otherwise absolute coordinates are used for the
+        grid.
+
+    Returns:
+      boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+        the input feature map shapes.
+
+    Raises:
+      ValueError: if feature_map_shape_list, box_specs_list do not have the same
+        length.
+      ValueError: if feature_map_shape_list does not consist of pairs of
+        integers
+    """
+    if not (isinstance(feature_map_shape_list, list)
+            and len(feature_map_shape_list) == len(self._box_specs)):
+      raise ValueError('feature_map_shape_list must be a list with the same '
+                       'length as self._box_specs')
+    if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                for list_item in feature_map_shape_list]):
+      raise ValueError('feature_map_shape_list must be a list of pairs.')
+
+    im_height = tf.to_float(im_height)
+    im_width = tf.to_float(im_width)
+
+    if not self._anchor_strides:
+      anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
+                        for pair in feature_map_shape_list]
+    else:
+      anchor_strides = [(tf.to_float(stride[0]) / im_height,
+                         tf.to_float(stride[1]) / im_width)
+                        for stride in self._anchor_strides]
+    if not self._anchor_offsets:
+      anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
+                        for stride in anchor_strides]
+    else:
+      anchor_offsets = [(tf.to_float(offset[0]) / im_height,
+                         tf.to_float(offset[1]) / im_width)
+                        for offset in self._anchor_offsets]
+
+    for arg, arg_name in zip([anchor_strides, anchor_offsets],
+                             ['anchor_strides', 'anchor_offsets']):
+      if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
+        raise ValueError('%s must be a list with the same length '
+                         'as self._box_specs' % arg_name)
+      if not all([isinstance(list_item, tuple) and len(list_item) == 2
+                  for list_item in arg]):
+        raise ValueError('%s must be a list of pairs.' % arg_name)
+
+    anchor_grid_list = []
+    min_im_shape = tf.minimum(im_height, im_width)
+    scale_height = min_im_shape / im_height
+    scale_width = min_im_shape / im_width
+    base_anchor_size = [
+        scale_height * self._base_anchor_size[0],
+        scale_width * self._base_anchor_size[1]
+    ]
+    for feature_map_index, (grid_size, scales, aspect_ratios, stride,
+                            offset) in enumerate(
+                                zip(feature_map_shape_list, self._scales,
+                                    self._aspect_ratios, anchor_strides,
+                                    anchor_offsets)):
+      tiled_anchors = grid_anchor_generator.tile_anchors(
+          grid_height=grid_size[0],
+          grid_width=grid_size[1],
+          scales=scales,
+          aspect_ratios=aspect_ratios,
+          base_anchor_size=base_anchor_size,
+          anchor_stride=stride,
+          anchor_offset=offset)
+      if self._clip_window is not None:
+        tiled_anchors = box_list_ops.clip_to_window(
+            tiled_anchors, self._clip_window, filter_nonoverlapping=False)
+      num_anchors_in_layer = tiled_anchors.num_boxes_static()
+      if num_anchors_in_layer is None:
+        num_anchors_in_layer = tiled_anchors.num_boxes()
+      anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
+      tiled_anchors.add_field('feature_map_index', anchor_indices)
+      anchor_grid_list.append(tiled_anchors)
+
+    return anchor_grid_list
+
+
+def create_ssd_anchors(num_layers=6,
+                       min_scale=0.2,
+                       max_scale=0.95,
+                       scales=None,
+                       aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
+                       interpolated_scale_aspect_ratio=1.0,
+                       base_anchor_size=None,
+                       anchor_strides=None,
+                       anchor_offsets=None,
+                       reduce_boxes_in_lowest_layer=True):
+  """Creates MultipleGridAnchorGenerator for SSD anchors.
+
+  This function instantiates a MultipleGridAnchorGenerator that reproduces
+  ``default box`` construction proposed by Liu et al in the SSD paper.
+  See Section 2.2 for details. Grid sizes are assumed to be passed in
+  at generation time from finest resolution to coarsest resolution --- this is
+  used to (linearly) interpolate scales of anchor boxes corresponding to the
+  intermediate grid sizes.
+
+  Anchors that are returned by calling the `generate` method on the returned
+  MultipleGridAnchorGenerator object are always in normalized coordinates
+  and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
+
+  Args:
+    num_layers: integer number of grid layers to create anchors for (actual
+      grid sizes passed in at generation time)
+    min_scale: scale of anchors corresponding to finest resolution (float)
+    max_scale: scale of anchors corresponding to coarsest resolution (float)
+    scales: As list of anchor scales to use. When not None and not empty,
+      min_scale and max_scale are not used.
+    aspect_ratios: list or tuple of (float) aspect ratios to place on each
+      grid point.
+    interpolated_scale_aspect_ratio: An additional anchor is added with this
+      aspect ratio and a scale interpolated between the scale for a layer
+      and the scale for the next layer (1.0 for the last layer).
+      This anchor is not included if this value is 0.
+    base_anchor_size: base anchor size as [height, width].
+      The height and width values are normalized to the minimum dimension of the
+      input height and width, so that when the base anchor height equals the
+      base anchor width, the resulting anchor is square even if the input image
+      is not square.
+    anchor_strides: list of pairs of strides in pixels (in y and x directions
+      respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
+      means that we want the anchors corresponding to the first layer to be
+      strided by 25 pixels and those in the second layer to be strided by 50
+      pixels in both y and x directions. If anchor_strides=None, they are set to
+      be the reciprocal of the corresponding feature map shapes.
+    anchor_offsets: list of pairs of offsets in pixels (in y and x directions
+      respectively). The offset specifies where we want the center of the
+      (0, 0)-th anchor to lie for each layer. For example, setting
+      anchor_offsets=[(10, 10), (20, 20)]) means that we want the
+      (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
+      and likewise that we want the (0, 0)-th anchor of the second layer to lie
+      at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
+      be half of the corresponding anchor stride.
+    reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
+      boxes per location is used in the lowest layer.
+
+  Returns:
+    a MultipleGridAnchorGenerator
+  """
+  if base_anchor_size is None:
+    base_anchor_size = [1.0, 1.0]
+  base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
+  box_specs_list = []
+  if scales is None or not scales:
+    scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
+              for i in range(num_layers)] + [1.0]
+  else:
+    # Add 1.0 to the end, which will only be used in scale_next below and used
+    # for computing an interpolated scale for the largest scale in the list.
+    scales += [1.0]
+
+  for layer, scale, scale_next in zip(
+      range(num_layers), scales[:-1], scales[1:]):
+    layer_box_specs = []
+    if layer == 0 and reduce_boxes_in_lowest_layer:
+      layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
+    else:
+      for aspect_ratio in aspect_ratios:
+        layer_box_specs.append((scale, aspect_ratio))
+      # Add one more anchor, with a scale between the current scale, and the
+      # scale for the next layer, with a specified aspect ratio (1.0 by
+      # default).
+      if interpolated_scale_aspect_ratio > 0.0:
+        layer_box_specs.append((np.sqrt(scale*scale_next),
+                                interpolated_scale_aspect_ratio))
+    box_specs_list.append(layer_box_specs)
+
+  return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
+                                     anchor_strides, anchor_offsets)
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
+from object_detection.utils import test_case
+
+
+class MultipleGridAnchorGeneratorTest(test_case.TestCase):
+
+  def test_construct_single_anchor_grid(self):
+    """Builds a 1x1 anchor grid to test the size of the output boxes."""
+    def graph_fn():
+
+      box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
+                         (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
+                         (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
+          anchor_strides=[(16, 16)],
+          anchor_offsets=[(7, -3)])
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
+      return anchors_list[0].get()
+    exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+                          [-505, -131, 519, 125], [-57, -67, 71, 61],
+                          [-121, -131, 135, 125], [-249, -259, 263, 253],
+                          [-25, -131, 39, 125], [-57, -259, 71, 253],
+                          [-121, -515, 135, 509]]
+
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid(self):
+    def graph_fn():
+      box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
+
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
+          anchor_strides=[(19, 19)],
+          anchor_offsets=[(0, 0)])
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
+      return anchors_list[0].get()
+    exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+                          [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+                          [-5., 14., 5, 24], [-10., 9., 10, 29],
+                          [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+                          [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+                          [14., 14., 24, 24], [9., 9., 29, 29]]
+
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid_non_square(self):
+
+    def graph_fn():
+      box_specs_list = [[(1.0, 1.0)]]
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list, base_anchor_size=tf.constant([1, 1],
+                                                       dtype=tf.float32))
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(
+          tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
+      return anchors_list[0].get()
+
+    exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_dynamic_size_anchor_grid(self):
+
+    def graph_fn(height, width):
+      box_specs_list = [[(1.0, 1.0)]]
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list, base_anchor_size=tf.constant([1, 1],
+                                                       dtype=tf.float32))
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
+                                                                        width)])
+      return anchors_list[0].get()
+
+    exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
+
+    anchor_corners_out = self.execute_cpu(graph_fn,
+                                          [np.array(1, dtype=np.int32),
+                                           np.array(2, dtype=np.int32)])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_anchor_grid_normalized(self):
+    def graph_fn():
+      box_specs_list = [[(1.0, 1.0)]]
+
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list, base_anchor_size=tf.constant([1, 1],
+                                                       dtype=tf.float32))
+      anchors_list = anchor_generator.generate(
+          feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
+              2, dtype=tf.int32))],
+          im_height=320,
+          im_width=640)
+      return anchors_list[0].get()
+
+    exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_multiple_grids(self):
+
+    def graph_fn():
+      box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                        [(1.0, 1.0), (1.0, 0.5)]]
+
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25), (.5, .5)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
+          2, 2)])
+      return [anchors.get() for anchors in anchors_list]
+    # height and width of box with .5 aspect ratio
+    h = np.sqrt(2)
+    w = 1.0/np.sqrt(2)
+    exp_small_grid_corners = [[-.25, -.25, .75, .75],
+                              [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
+                              [-.25, .25, .75, 1.25],
+                              [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
+                              [.25, -.25, 1.25, .75],
+                              [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
+                              [.25, .25, 1.25, 1.25],
+                              [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
+    # only test first entry of larger set of anchors
+    exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
+                            [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
+                            [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
+
+    anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+    self.assertEquals(anchor_corners_out.shape, (56, 4))
+    big_grid_corners = anchor_corners_out[0:3, :]
+    small_grid_corners = anchor_corners_out[48:, :]
+    self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+    self.assertAllClose(big_grid_corners, exp_big_grid_corners)
+
+  def test_construct_multiple_grids_with_clipping(self):
+
+    def graph_fn():
+      box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                        [(1.0, 1.0), (1.0, 0.5)]]
+
+      clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          clip_window=clip_window)
+      anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
+          2, 2)])
+      return [anchors.get() for anchors in anchors_list]
+    # height and width of box with .5 aspect ratio
+    h = np.sqrt(2)
+    w = 1.0/np.sqrt(2)
+    exp_small_grid_corners = [[0, 0, .75, .75],
+                              [0, 0, .25+.5*h, .25+.5*w],
+                              [0, .25, .75, 1],
+                              [0, .75-.5*w, .25+.5*h, 1],
+                              [.25, 0, 1, .75],
+                              [.75-.5*h, 0, 1, .25+.5*w],
+                              [.25, .25, 1, 1],
+                              [.75-.5*h, .75-.5*w, 1, 1]]
+
+    anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+    small_grid_corners = anchor_corners_out[48:, :]
+    self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+
+  def test_invalid_box_specs(self):
+    # not all box specs are pairs
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5, .3)]]
+    with self.assertRaises(ValueError):
+      ag.MultipleGridAnchorGenerator(box_specs_list)
+
+    # box_specs_list is not a list of lists
+    box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
+    with self.assertRaises(ValueError):
+      ag.MultipleGridAnchorGenerator(box_specs_list)
+
+  def test_invalid_generate_arguments(self):
+    box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+                      [(1.0, 1.0), (1.0, 0.5)]]
+
+    # incompatible lengths with box_specs_list
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25), (.5, .5)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.5, .5)],
+          anchor_offsets=[(.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+
+    # not pairs
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25), (.5, .5)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25, .1), (.5, .5)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+    with self.assertRaises(ValueError):
+      anchor_generator = ag.MultipleGridAnchorGenerator(
+          box_specs_list,
+          base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+          anchor_strides=[(.25, .25), (.5, .5)],
+          anchor_offsets=[(.125, .125), (.25, .25)])
+      anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
+
+
+class CreateSSDAnchorsTest(test_case.TestCase):
+
+  def test_create_ssd_anchors_returns_correct_shape(self):
+
+    def graph_fn1():
+      anchor_generator = ag.create_ssd_anchors(
+          num_layers=6,
+          min_scale=0.2,
+          max_scale=0.95,
+          aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
+          reduce_boxes_in_lowest_layer=True)
+
+      feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+                                (5, 5), (3, 3), (1, 1)]
+      anchors_list = anchor_generator.generate(
+          feature_map_shape_list=feature_map_shape_list)
+      return [anchors.get() for anchors in anchors_list]
+    anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
+    self.assertEquals(anchor_corners_out.shape, (7308, 4))
+
+    def graph_fn2():
+      anchor_generator = ag.create_ssd_anchors(
+          num_layers=6, min_scale=0.2, max_scale=0.95,
+          aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
+          reduce_boxes_in_lowest_layer=False)
+
+      feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+                                (5, 5), (3, 3), (1, 1)]
+      anchors_list = anchor_generator.generate(
+          feature_map_shape_list=feature_map_shape_list)
+      return [anchors.get() for anchors in anchors_list]
+    anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
+    self.assertEquals(anchor_corners_out.shape, (11640, 4))
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generates grid anchors on the fly corresponding to multiple CNN layers.
+
+Generates grid anchors on the fly corresponding to multiple CNN layers as
+described in:
+"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
+T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
+"""
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.core import anchor_generator
+from object_detection.core import box_list_ops
+
+
+class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
+  """Generate a grid of anchors for multiple CNN layers of different scale."""
+
+  def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
+               scales_per_octave, normalize_coordinates=True):
+    """Constructs a MultiscaleGridAnchorGenerator.
+
+    To construct anchors, at multiple scale resolutions, one must provide a
+    the minimum level and maximum levels on a scale pyramid. To define the size
+    of anchor, the anchor scale is provided to decide the size relatively to the
+    stride of the corresponding feature map. The generator allows one pixel
+    location on feature map maps to multiple anchors, that have different aspect
+    ratios and intermediate scales.
+
+    Args:
+      min_level: minimum level in feature pyramid.
+      max_level: maximum level in feature pyramid.
+      anchor_scale: anchor scale and feature stride define the size of the base
+        anchor on an image. For example, given a feature pyramid with strides
+        [2^3, ..., 2^7] and anchor scale 4. The base anchor size is
+        4 * [2^3, ..., 2^7].
+      aspect_ratios: list or tuple of (float) aspect ratios to place on each
+        grid point.
+      scales_per_octave: integer number of intermediate scales per scale octave.
+      normalize_coordinates: whether to produce anchors in normalized
+        coordinates. (defaults to True).
+    """
+    self._anchor_grid_info = []
+    self._aspect_ratios = aspect_ratios
+    self._scales_per_octave = scales_per_octave
+    self._normalize_coordinates = normalize_coordinates
+
+    for level in range(min_level, max_level + 1):
+      anchor_stride = [2**level, 2**level]
+      scales = []
+      aspects = []
+      for scale in range(scales_per_octave):
+        scales.append(2**(float(scale) / scales_per_octave))
+      for aspect_ratio in aspect_ratios:
+        aspects.append(aspect_ratio)
+      base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
+      self._anchor_grid_info.append({
+          'level': level,
+          'info': [scales, aspects, base_anchor_size, anchor_stride]
+      })
+
+  def name_scope(self):
+    return 'MultiscaleGridAnchorGenerator'
+
+  def num_anchors_per_location(self):
+    """Returns the number of anchors per spatial location.
+
+    Returns:
+      a list of integers, one for each expected feature map to be passed to
+      the Generate function.
+    """
+    return len(self._anchor_grid_info) * [
+        len(self._aspect_ratios) * self._scales_per_octave]
+
+  def _generate(self, feature_map_shape_list, im_height, im_width):
+    """Generates a collection of bounding boxes to be used as anchors.
+
+    Currently we require the input image shape to be statically defined.  That
+    is, im_height and im_width should be integers rather than tensors.
+
+    Args:
+      feature_map_shape_list: list of pairs of convnet layer resolutions in the
+        format [(height_0, width_0), (height_1, width_1), ...]. For example,
+        setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
+        correspond to an 8x8 layer followed by a 7x7 layer.
+      im_height: the height of the image to generate the grid for.
+      im_width: the width of the image to generate the grid for.
+
+    Returns:
+      boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+        the input feature map shapes.
+    Raises:
+      ValueError: if im_height and im_width are not integers.
+    """
+    if not isinstance(im_height, int) or not isinstance(im_width, int):
+      raise ValueError('MultiscaleGridAnchorGenerator currently requires '
+                       'input image shape to be statically defined.')
+    anchor_grid_list = []
+    for feat_shape, grid_info in zip(feature_map_shape_list,
+                                     self._anchor_grid_info):
+      # TODO(rathodv) check the feature_map_shape_list is consistent with
+      # self._anchor_grid_info
+      level = grid_info['level']
+      stride = 2**level
+      scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
+      feat_h = feat_shape[0]
+      feat_w = feat_shape[1]
+      anchor_offset = [0, 0]
+      if im_height % 2.0**level == 0:
+        anchor_offset[0] = stride / 2.0
+      if im_width % 2.0**level == 0:
+        anchor_offset[1] = stride / 2.0
+      ag = grid_anchor_generator.GridAnchorGenerator(
+          scales,
+          aspect_ratios,
+          base_anchor_size=base_anchor_size,
+          anchor_stride=anchor_stride,
+          anchor_offset=anchor_offset)
+      (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
+
+      if self._normalize_coordinates:
+        anchor_grid = box_list_ops.to_normalized_coordinates(
+            anchor_grid, im_height, im_width, check_range=False)
+      anchor_grid_list.append(anchor_grid)
+
+    return anchor_grid_list
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
+from object_detection.utils import test_case
+
+
+class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
+
+  def test_construct_single_anchor(self):
+    min_level = 5
+    max_level = 5
+    anchor_scale = 4.0
+    aspect_ratios = [1.0]
+    scales_per_octave = 1
+    im_height = 64
+    im_width = 64
+    feature_map_shape_list = [(2, 2)]
+    exp_anchor_corners = [[-48, -48, 80, 80],
+                          [-48, -16, 80, 112],
+                          [-16, -48, 112, 80],
+                          [-16, -16, 112, 112]]
+    anchor_generator = mg.MultiscaleGridAnchorGenerator(
+        min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+        normalize_coordinates=False)
+    anchors_list = anchor_generator.generate(
+        feature_map_shape_list, im_height=im_height, im_width=im_width)
+    anchor_corners = anchors_list[0].get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_single_anchor_in_normalized_coordinates(self):
+    min_level = 5
+    max_level = 5
+    anchor_scale = 4.0
+    aspect_ratios = [1.0]
+    scales_per_octave = 1
+    im_height = 64
+    im_width = 128
+    feature_map_shape_list = [(2, 2)]
+    exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
+                          [-48./64, -16./128, 80./64, 112./128],
+                          [-16./64, -48./128, 112./64, 80./128],
+                          [-16./64, -16./128, 112./64, 112./128]]
+    anchor_generator = mg.MultiscaleGridAnchorGenerator(
+        min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+        normalize_coordinates=True)
+    anchors_list = anchor_generator.generate(
+        feature_map_shape_list, im_height=im_height, im_width=im_width)
+    anchor_corners = anchors_list[0].get()
+
+    with self.test_session():
+      anchor_corners_out = anchor_corners.eval()
+      self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_num_anchors_per_location(self):
+    min_level = 5
+    max_level = 6
+    anchor_scale = 4.0
+    aspect_ratios = [1.0, 2.0]
+    scales_per_octave = 3
+    anchor_generator = mg.MultiscaleGridAnchorGenerator(
+        min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+        normalize_coordinates=False)
+    self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
+
+  def test_construct_single_anchor_fails_with_tensor_image_size(self):
+    min_level = 5
+    max_level = 5
+    anchor_scale = 4.0
+    aspect_ratios = [1.0]
+    scales_per_octave = 1
+    im_height = tf.constant(64)
+    im_width = tf.constant(64)
+    feature_map_shape_list = [(2, 2)]
+    anchor_generator = mg.MultiscaleGridAnchorGenerator(
+        min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+        normalize_coordinates=False)
+    with self.assertRaises(ValueError):
+      anchor_generator.generate(
+          feature_map_shape_list, im_height=im_height, im_width=im_width)
+
+  def test_construct_single_anchor_with_odd_input_dimension(self):
+
+    def graph_fn():
+      min_level = 5
+      max_level = 5
+      anchor_scale = 4.0
+      aspect_ratios = [1.0]
+      scales_per_octave = 1
+      im_height = 65
+      im_width = 65
+      feature_map_shape_list = [(3, 3)]
+      anchor_generator = mg.MultiscaleGridAnchorGenerator(
+          min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+          normalize_coordinates=False)
+      anchors_list = anchor_generator.generate(
+          feature_map_shape_list, im_height=im_height, im_width=im_width)
+      anchor_corners = anchors_list[0].get()
+      return (anchor_corners,)
+    anchor_corners_out = self.execute(graph_fn, [])
+    exp_anchor_corners = [[-64, -64, 64, 64],
+                          [-64, -32, 64, 96],
+                          [-64, 0, 64, 128],
+                          [-32, -64, 96, 64],
+                          [-32, -32, 96, 96],
+                          [-32, 0, 96, 128],
+                          [0, -64, 128, 64],
+                          [0, -32, 128, 96],
+                          [0, 0, 128, 128]]
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_single_anchor_on_two_feature_maps(self):
+
+    def graph_fn():
+      min_level = 5
+      max_level = 6
+      anchor_scale = 4.0
+      aspect_ratios = [1.0]
+      scales_per_octave = 1
+      im_height = 64
+      im_width = 64
+      feature_map_shape_list = [(2, 2), (1, 1)]
+      anchor_generator = mg.MultiscaleGridAnchorGenerator(
+          min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+          normalize_coordinates=False)
+      anchors_list = anchor_generator.generate(feature_map_shape_list,
+                                               im_height=im_height,
+                                               im_width=im_width)
+      anchor_corners = [anchors.get() for anchors in anchors_list]
+      return anchor_corners
+
+    anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+    exp_anchor_corners = [[-48, -48, 80, 80],
+                          [-48, -16, 80, 112],
+                          [-16, -48, 112, 80],
+                          [-16, -16, 112, 112],
+                          [-96, -96, 160, 160]]
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_single_anchor_with_two_scales_per_octave(self):
+
+    def graph_fn():
+      min_level = 6
+      max_level = 6
+      anchor_scale = 4.0
+      aspect_ratios = [1.0]
+      scales_per_octave = 2
+      im_height = 64
+      im_width = 64
+      feature_map_shape_list = [(1, 1)]
+
+      anchor_generator = mg.MultiscaleGridAnchorGenerator(
+          min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+          normalize_coordinates=False)
+      anchors_list = anchor_generator.generate(feature_map_shape_list,
+                                               im_height=im_height,
+                                               im_width=im_width)
+      anchor_corners = [anchors.get() for anchors in anchors_list]
+      return anchor_corners
+    # There are 4 set of anchors in this configuration. The order is:
+    # [[2**0.0 intermediate scale + 1.0 aspect],
+    #  [2**0.5 intermediate scale + 1.0 aspect]]
+    exp_anchor_corners = [[-96., -96., 160., 160.],
+                          [-149.0193, -149.0193, 213.0193, 213.0193]]
+
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
+    def graph_fn():
+      min_level = 6
+      max_level = 6
+      anchor_scale = 4.0
+      aspect_ratios = [1.0, 2.0]
+      scales_per_octave = 2
+      im_height = 64
+      im_width = 64
+      feature_map_shape_list = [(1, 1)]
+      anchor_generator = mg.MultiscaleGridAnchorGenerator(
+          min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+          normalize_coordinates=False)
+      anchors_list = anchor_generator.generate(feature_map_shape_list,
+                                               im_height=im_height,
+                                               im_width=im_width)
+      anchor_corners = [anchors.get() for anchors in anchors_list]
+      return anchor_corners
+    # There are 4 set of anchors in this configuration. The order is:
+    # [[2**0.0 intermediate scale + 1.0 aspect],
+    #  [2**0.5 intermediate scale + 1.0 aspect],
+    #  [2**0.0 intermediate scale + 2.0 aspect],
+    #  [2**0.5 intermediate scale + 2.0 aspect]]
+
+    exp_anchor_corners = [[-96., -96., 160., 160.],
+                          [-149.0193, -149.0193, 213.0193, 213.0193],
+                          [-58.50967, -149.0193, 122.50967, 213.0193],
+                          [-96., -224., 160., 288.]]
+    anchor_corners_out = self.execute(graph_fn, [])
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+  def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
+
+    def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
+                 feature_map2_width):
+      min_level = 5
+      max_level = 6
+      anchor_scale = 4.0
+      aspect_ratios = [1.0]
+      scales_per_octave = 1
+      im_height = 64
+      im_width = 64
+      feature_map_shape_list = [(feature_map1_height, feature_map1_width),
+                                (feature_map2_height, feature_map2_width)]
+      anchor_generator = mg.MultiscaleGridAnchorGenerator(
+          min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+          normalize_coordinates=False)
+      anchors_list = anchor_generator.generate(feature_map_shape_list,
+                                               im_height=im_height,
+                                               im_width=im_width)
+      anchor_corners = [anchors.get() for anchors in anchors_list]
+      return anchor_corners
+
+    anchor_corners_out = np.concatenate(
+        self.execute_cpu(graph_fn, [
+            np.array(2, dtype=np.int32),
+            np.array(2, dtype=np.int32),
+            np.array(1, dtype=np.int32),
+            np.array(1, dtype=np.int32)
+        ]),
+        axis=0)
+    exp_anchor_corners = [[-48, -48, 80, 80],
+                          [-48, -16, 80, 112],
+                          [-16, -48, 112, 80],
+                          [-16, -16, 112, 112],
+                          [-96, -96, 160, 160]]
+    self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/__init__.py
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+  """Faster RCNN box coder."""
+
+  def __init__(self, scale_factors=None):
+    """Constructor for FasterRcnnBoxCoder.
+
+    Args:
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        If set to None, does not perform scaling. For Faster RCNN,
+        the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+    """
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw].
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.log(w / wa)
+    th = tf.log(h / ha)
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+    return tf.transpose(tf.stack([ty, tx, th, tw]))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/faster_rcnn_box_coder_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.core import box_list
+
+
+class FasterRcnnBoxCoderTest(tf.test.TestCase):
+
+  def test_get_correct_relative_codes_after_encoding(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+                          [-0.083333, -0.222222, -0.693147, -1.098612]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+    boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    scale_factors = [2, 3, 4, 5]
+    expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+                          [-0.166667, -0.666667, -2.772588, -5.493062]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=scale_factors)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_boxes_after_decoding(self):
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+                 [-0.083333, -0.222222, -0.693147, -1.098612]]
+    expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, = sess.run([boxes.get()])
+      self.assertAllClose(boxes_out, expected_boxes)
+
+  def test_get_correct_boxes_after_decoding_with_scaling(self):
+    anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+                 [-0.166667, -0.666667, -2.772588, -5.493062]]
+    scale_factors = [2, 3, 4, 5]
+    expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=scale_factors)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, = sess.run([boxes.get()])
+      self.assertAllClose(boxes_out, expected_boxes)
+
+  def test_very_small_Width_nan_after_encoding(self):
+    boxes = [[10.0, 10.0, 10.0000001, 20.0]]
+    anchors = [[15.0, 12.0, 30.0, 18.0]]
+    expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Keypoint box coder.
+
+The keypoint box coder follows the coding schema described below (this is
+similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
+to box coordinates):
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  tky0 = (ky0 - ya) / ha
+  tkx0 = (kx0 - xa) / wa
+  tky1 = (ky1 - ya) / ha
+  tkx1 = (kx1 - xa) / wa
+  ...
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
+  keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
+  anchor-encoded keypoint coordinates.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+EPSILON = 1e-8
+
+
+class KeypointBoxCoder(box_coder.BoxCoder):
+  """Keypoint box coder."""
+
+  def __init__(self, num_keypoints, scale_factors=None):
+    """Constructor for KeypointBoxCoder.
+
+    Args:
+      num_keypoints: Number of keypoints to encode/decode.
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        In addition to scaling ty and tx, the first 2 scalars are used to scale
+        the y and x coordinates of the keypoints as well. If set to None, does
+        not perform scaling.
+    """
+    self._num_keypoints = num_keypoints
+
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+    self._keypoint_scale_factors = None
+    if scale_factors is not None:
+      self._keypoint_scale_factors = tf.expand_dims(tf.tile(
+          [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
+          [num_keypoints]), 1)
+
+  @property
+  def code_size(self):
+    return 4 + self._num_keypoints * 2
+
+  def _encode(self, boxes, anchors):
+    """Encode a box and keypoint collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
+        tensors with the shape [N, 4], and keypoints are tensors with the shape
+        [N, num_keypoints, 2].
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
+      represent the y and x coordinates of the first keypoint, tky1 and tkx1
+      represent the y and x coordinates of the second keypoint, and so on.
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    keypoints = boxes.get_field(fields.BoxListFields.keypoints)
+    keypoints = tf.transpose(tf.reshape(keypoints,
+                                        [-1, self._num_keypoints * 2]))
+    num_boxes = boxes.num_boxes()
+
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.log(w / wa)
+    th = tf.log(h / ha)
+
+    tiled_anchor_centers = tf.tile(
+        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+    tiled_anchor_sizes = tf.tile(
+        tf.stack([ha, wa]), [self._num_keypoints, 1])
+    tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
+
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+      tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
+
+    tboxes = tf.stack([ty, tx, th, tw])
+    return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes and keypoints.
+
+    Args:
+      rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
+        anchor-encoded boxes and keypoints
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes and keypoints.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    num_codes = tf.shape(rel_codes)[0]
+    result = tf.unstack(tf.transpose(rel_codes))
+    ty, tx, th, tw = result[:4]
+    tkeypoints = result[4:]
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+      tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
+
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    decoded_boxes_keypoints = box_list.BoxList(
+        tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
+
+    tiled_anchor_centers = tf.tile(
+        tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+    tiled_anchor_sizes = tf.tile(
+        tf.stack([ha, wa]), [self._num_keypoints, 1])
+    keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
+    keypoints = tf.reshape(tf.transpose(keypoints),
+                           [-1, self._num_keypoints, 2])
+    decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
+    return decoded_boxes_keypoints
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/keypoint_box_coder_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.keypoint_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+
+class KeypointBoxCoderTest(tf.test.TestCase):
+
+  def test_get_correct_relative_codes_after_encoding(self):
+    boxes = [[10., 10., 20., 15.],
+             [0.2, 0.1, 0.5, 0.4]]
+    keypoints = [[[15., 12.], [10., 15.]],
+                 [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(keypoints[0])
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    expected_rel_codes = [
+        [-0.5, -0.416666, -0.405465, -0.182321,
+         -0.5, -0.5, -0.833333, 0.],
+        [-0.083333, -0.222222, -0.693147, -1.098612,
+         0.166667, -0.166667, -0.333333, -0.055556]
+    ]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+    boxes = [[10., 10., 20., 15.],
+             [0.2, 0.1, 0.5, 0.4]]
+    keypoints = [[[15., 12.], [10., 15.]],
+                 [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(keypoints[0])
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    scale_factors = [2, 3, 4, 5]
+    expected_rel_codes = [
+        [-1., -1.25, -1.62186, -0.911608,
+         -1.0, -1.5, -1.666667, 0.],
+        [-0.166667, -0.666667, -2.772588, -5.493062,
+         0.333333, -0.5, -0.666667, -0.166667]
+    ]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(
+        num_keypoints, scale_factors=scale_factors)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+  def test_get_correct_boxes_after_decoding(self):
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [
+        [-0.5, -0.416666, -0.405465, -0.182321,
+         -0.5, -0.5, -0.833333, 0.],
+        [-0.083333, -0.222222, -0.693147, -1.098612,
+         0.166667, -0.166667, -0.333333, -0.055556]
+    ]
+    expected_boxes = [[10., 10., 20., 15.],
+                      [0.2, 0.1, 0.5, 0.4]]
+    expected_keypoints = [[[15., 12.], [10., 15.]],
+                          [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(expected_keypoints[0])
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, keypoints_out = sess.run(
+          [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+      self.assertAllClose(boxes_out, expected_boxes)
+      self.assertAllClose(keypoints_out, expected_keypoints)
+
+  def test_get_correct_boxes_after_decoding_with_scaling(self):
+    anchors = [[15., 12., 30., 18.],
+               [0.1, 0.0, 0.7, 0.9]]
+    rel_codes = [
+        [-1., -1.25, -1.62186, -0.911608,
+         -1.0, -1.5, -1.666667, 0.],
+        [-0.166667, -0.666667, -2.772588, -5.493062,
+         0.333333, -0.5, -0.666667, -0.166667]
+    ]
+    scale_factors = [2, 3, 4, 5]
+    expected_boxes = [[10., 10., 20., 15.],
+                      [0.2, 0.1, 0.5, 0.4]]
+    expected_keypoints = [[[15., 12.], [10., 15.]],
+                          [[0.5, 0.3], [0.2, 0.4]]]
+    num_keypoints = len(expected_keypoints[0])
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(
+        num_keypoints, scale_factors=scale_factors)
+    boxes = coder.decode(rel_codes, anchors)
+    with self.test_session() as sess:
+      boxes_out, keypoints_out = sess.run(
+          [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+      self.assertAllClose(boxes_out, expected_boxes)
+      self.assertAllClose(keypoints_out, expected_keypoints)
+
+  def test_very_small_width_nan_after_encoding(self):
+    boxes = [[10., 10., 10.0000001, 20.]]
+    keypoints = [[[10., 10.], [10.0000001, 20.]]]
+    anchors = [[15., 12., 30., 18.]]
+    expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
+                           -0.833333, -0.833333, -0.833333, 0.833333]]
+    boxes = box_list.BoxList(tf.constant(boxes))
+    boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+    anchors = box_list.BoxList(tf.constant(anchors))
+    coder = keypoint_box_coder.KeypointBoxCoder(2)
+    rel_codes = coder.encode(boxes, anchors)
+    with self.test_session() as sess:
+      rel_codes_out, = sess.run([rel_codes])
+      self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py
+++ b/research/mlperf_object_detection/Mask_RCNN/object_detection/box_coders/mean_stddev_box_coder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Mean stddev box coder.
+
+This box coder use the following coding schema to encode boxes:
+rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
+"""
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+
+class MeanStddevBoxCoder(box_coder.BoxCoder):
+  """Mean stddev box coder."""
+
+  def __init__(self, stddev=0.01):
+    """Constructor for MeanStddevBoxCoder.
+
+    Args:
+      stddev: The standard deviation used to encode and decode boxes.
+    """
+    self._stddev = stddev
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of N anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes
+
+    Raises:
+      ValueError: if the anchors still have deprecated stddev field.
+    """
+    box_corners = boxes.get()
+    if anchors.has_field('stddev'):
+      raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
+                       "should not be specified in the box list.")
+    means = anchors.get()
+    return (box_corners - means) / self._stddev
+
+  def _decode(self, rel_codes, anchors):
+    """Decode.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes
+
+    Raises:
+      ValueError: if the anchors still have deprecated stddev field and expects
+        the decode method to use stddev value from that field.
+    """
+    means = anchors.get()
+    if anchors.has_field('stddev'):
+      raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
+                       "should not be specified in the box list.")
+    box_corners = rel_codes * self._stddev + means
+    return box_list.BoxList(box_corners)