Merge pull request #3494 from pkulzc/master

Update object detection with internal changes and remove unused BUILD files.

Merge pull request #3494 from pkulzc/master
Update object detection with internal changes and remove unused BUILD files.
3f78f4cf · derekjchow · GitHub · 73748d01 · 0319908c · 3f78f4cf
Unverified Commit 3f78f4cf authored Mar 09, 2018 by derekjchow Committed by GitHub Mar 09, 2018
20 changed files
--- a/research/object_detection/protos/losses.proto
+++ b/research/object_detection/protos/losses.proto
@@ -38,11 +38,17 @@ message WeightedL2LocalizationLoss {
  optional bool anchorwise_output = 1 [default=false];
 }

-// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
+// SmoothL1 (Huber) location loss.
+// The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
+// 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
+// predictions and target.
 message WeightedSmoothL1LocalizationLoss {
  // DEPRECATED, do not use.
  // Output loss per anchor.
  optional bool anchorwise_output = 1 [default=false];
+
+  // Delta value for huber loss.
+  optional float delta = 2 [default=1.0];
 }

 // Intersection over union location loss: 1 - IOU

--- a/research/object_detection/protos/multiscale_anchor_generator.proto
+++ b/research/object_detection/protos/multiscale_anchor_generator.proto
@@ -20,4 +20,7 @@ message MultiscaleAnchorGenerator {

  // Number of intermediate scale each scale octave
  optional int32 scales_per_octave = 5 [default = 2];
+
+  // Whether to produce anchors in normalized coordinates.
+  optional bool normalize_coordinates = 6 [default = true];
 }
--- a/research/object_detection/protos/preprocessor.proto
+++ b/research/object_detection/protos/preprocessor.proto
@@ -388,14 +388,6 @@ message SSDRandomCropPadFixedAspectRatioOperation {

  // Probability a crop operation is skipped.
  optional float random_coef = 7;
-
-  // Min ratio of padded image height and width to the input image's height and
-  // width. Two entries per operation.
-  repeated float min_padded_size_ratio = 8;
-
-  // Max ratio of padded image height and width to the input image's height and
-  // width. Two entries per operation.
-  repeated float max_padded_size_ratio = 9;
 }

 // Randomly crops and pads an image to a fixed aspect ratio according to:
@@ -408,4 +400,12 @@ message SSDRandomCropPadFixedAspectRatio {

  // Aspect ratio to pad to. This value is used for all crop and pad operations.
  optional float aspect_ratio = 2 [default=1.0];
+
+  // Min ratio of padded image height and width to the input image's height and
+  // width. Two entries per operation.
+  repeated float min_padded_size_ratio = 3;
+
+  // Max ratio of padded image height and width to the input image's height and
+  // width. Two entries per operation.
+  repeated float max_padded_size_ratio = 4;
 }
--- a/research/object_detection/protos/ssd.proto
+++ b/research/object_detection/protos/ssd.proto
@@ -36,6 +36,10 @@ message Ssd {
  // zeros vector or a one-hot vector (where background is the 0th class).
  optional bool encode_background_as_zeros = 12 [default=false];

+  // classification weight to be associated to negative
+  // anchors (default: 1.0). The weight must be in [0., 1.].
+  optional float negative_class_weight = 13 [default = 1.0];
+
  // Box predictor to attach to the features.
  optional BoxPredictor box_predictor = 7;

@@ -49,6 +53,10 @@ message Ssd {
  // the anchors.
  optional bool normalize_loss_by_num_matches = 10 [default=true];

+  // Whether to normalize the localization loss by the code size of the box
+  // encodings. This is applied along with other normalization factors.
+  optional bool normalize_loc_loss_by_codesize = 14 [default=false];
+
  // Loss configuration for training.
  optional Loss loss = 11;
 }

--- a/research/object_detection/samples/configs/BUILD
+++ b/research/object_detection/samples/configs/BUILD
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-exports_files([
-    "faster_rcnn_resnet50_pets.config",
-    "ssd_inception_v2_pets.config",
-    "ssd_mobilenet_v1_focal_loss_pets.config",
-])
--- a/research/object_detection/test_data/BUILD
+++ b/research/object_detection/test_data/BUILD
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-exports_files([
-    "pets_examples.record",
-])
--- a/research/object_detection/test_images/BUILD
+++ b/research/object_detection/test_images/BUILD
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-exports_files([
-    "image1.jpg",
-    "image2.jpg",
-])
--- a/research/object_detection/trainer.py
+++ b/research/object_detection/trainer.py
@@ -235,7 +235,7 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
          train_config.prefetch_queue_capacity, data_augmentation_options)

    # Gather initial summaries.
-    # TODO: See if summaries can be added/extracted from global tf
+    # TODO(rathodv): See if summaries can be added/extracted from global tf
    # collections so that they don't have to be passed around.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    global_summaries = set([])
@@ -258,17 +258,19 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,

    sync_optimizer = None
    if train_config.sync_replicas:
-      training_optimizer = tf.SyncReplicasOptimizer(
+      training_optimizer = tf.train.SyncReplicasOptimizer(
          training_optimizer,
          replicas_to_aggregate=train_config.replicas_to_aggregate,
-          total_num_replicas=train_config.worker_replicas)
+          total_num_replicas=worker_replicas)
      sync_optimizer = training_optimizer

    # Create ops required to initialize the model from a given checkpoint.
    init_fn = None
    if train_config.fine_tune_checkpoint:
      var_map = detection_model.restore_map(
-          from_detection_checkpoint=train_config.from_detection_checkpoint)
+          from_detection_checkpoint=train_config.from_detection_checkpoint,
+          load_all_detection_checkpoint_vars=(
+              train_config.load_all_detection_checkpoint_vars))
      available_var_map = (variables_helper.
                           get_variables_available_in_checkpoint(
                               var_map, train_config.fine_tune_checkpoint))

--- a/research/object_detection/utils/BUILD
+++ b/research/object_detection/utils/BUILD
-# Tensorflow Object Detection API: Utility functions.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-# Apache 2.0
-
-py_library(
-    name = "test_case",
-    srcs = ["test_case.py"],
-    deps = ["//tensorflow"],
-)
-
-py_library(
-    name = "category_util",
-    srcs = ["category_util.py"],
-    deps = ["//tensorflow"],
-)
-
-py_library(
-    name = "config_util",
-    srcs = ["config_util.py"],
-    deps = [
-        "//pyglib/logging",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/protos:eval_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:image_resizer_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:model_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:train_py_pb2",
-    ],
-)
-
-py_library(
-    name = "dataset_util",
-    srcs = ["dataset_util.py"],
-    deps = [
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "json_utils",
-    srcs = ["json_utils.py"],
-    deps = [],
-)
-
-py_test(
-    name = "json_utils_test",
-    srcs = ["json_utils_test.py"],
-    deps = [
-        ":json_utils",
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "label_map_util",
-    srcs = ["label_map_util.py"],
-    deps = [
-        "//google/protobuf",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/protos:string_int_label_map_py_pb2",
-    ],
-)
-
-py_library(
-    name = "learning_schedules",
-    srcs = ["learning_schedules.py"],
-    deps = [
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "metrics",
-    srcs = ["metrics.py"],
-    deps = ["//numpy"],
-)
-
-py_library(
-    name = "np_box_list",
-    srcs = ["np_box_list.py"],
-    deps = ["//numpy"],
-)
-
-py_library(
-    name = "np_box_mask_list",
-    srcs = ["np_box_mask_list.py"],
-    deps = [
-        ":np_box_list",
-        "//numpy",
-    ],
-)
-
-py_library(
-    name = "np_box_list_ops",
-    srcs = ["np_box_list_ops.py"],
-    deps = [
-        ":np_box_list",
-        ":np_box_ops",
-        "//numpy",
-    ],
-)
-
-py_library(
-    name = "np_box_mask_list_ops",
-    srcs = ["np_box_mask_list_ops.py"],
-    deps = [
-        ":np_box_list_ops",
-        ":np_box_mask_list",
-        ":np_mask_ops",
-        "//numpy",
-    ],
-)
-
-py_library(
-    name = "np_box_ops",
-    srcs = ["np_box_ops.py"],
-    deps = ["//tensorflow"],
-)
-
-py_library(
-    name = "np_mask_ops",
-    srcs = ["np_mask_ops.py"],
-    deps = ["//numpy"],
-)
-
-py_library(
-    name = "object_detection_evaluation",
-    srcs = ["object_detection_evaluation.py"],
-    deps = [
-        ":label_map_util",
-        ":metrics",
-        ":per_image_evaluation",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-
-py_library(
-    name = "ops",
-    srcs = ["ops.py"],
-    deps = [
-        ":shape_utils",
-        ":static_shape",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:box_list",
-        "//tensorflow/models/research/object_detection/core:box_list_ops",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-
-py_library(
-    name = "per_image_evaluation",
-    srcs = ["per_image_evaluation.py"],
-    deps = [
-        ":np_box_list",
-        ":np_box_list_ops",
-        ":np_box_mask_list",
-        ":np_box_mask_list_ops",
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "shape_utils",
-    srcs = ["shape_utils.py"],
-    deps = [
-        ":static_shape",
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "static_shape",
-    srcs = ["static_shape.py"],
-    deps = [],
-)
-
-py_library(
-    name = "test_utils",
-    srcs = ["test_utils.py"],
-    deps = [
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:anchor_generator",
-        "//tensorflow/models/research/object_detection/core:box_coder",
-        "//tensorflow/models/research/object_detection/core:box_list",
-        "//tensorflow/models/research/object_detection/core:box_predictor",
-        "//tensorflow/models/research/object_detection/core:matcher",
-        "//tensorflow/models/research/object_detection/utils:shape_utils",
-    ],
-)
-
-py_library(
-    name = "variables_helper",
-    srcs = ["variables_helper.py"],
-    deps = [
-        "//tensorflow",
-    ],
-)
-
-py_library(
-    name = "visualization_utils",
-    srcs = ["visualization_utils.py"],
-    deps = [
-        "//PIL:pil",
-        "//Tkinter",  # buildcleaner: keep
-        "//matplotlib",
-        "//six",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-
-py_test(
-    name = "category_util_test",
-    srcs = ["category_util_test.py"],
-    deps = [
-        ":category_util",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "config_util_test",
-    srcs = ["config_util_test.py"],
-    deps = [
-        ":config_util",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/protos:image_resizer_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:model_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
-        "//tensorflow/models/research/object_detection/protos:train_py_pb2",
-    ],
-)
-
-py_test(
-    name = "dataset_util_test",
-    srcs = ["dataset_util_test.py"],
-    deps = [
-        ":dataset_util",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
-    ],
-)
-
-py_test(
-    name = "label_map_util_test",
-    srcs = ["label_map_util_test.py"],
-    deps = [
-        ":label_map_util",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "learning_schedules_test",
-    srcs = ["learning_schedules_test.py"],
-    deps = [
-        ":learning_schedules",
-        ":test_case",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "metrics_test",
-    srcs = ["metrics_test.py"],
-    deps = [
-        ":metrics",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_box_list_test",
-    srcs = ["np_box_list_test.py"],
-    deps = [
-        ":np_box_list",
-        "//numpy",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_box_mask_list_test",
-    srcs = ["np_box_mask_list_test.py"],
-    deps = [
-        ":np_box_mask_list",
-        "//numpy",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_box_list_ops_test",
-    srcs = ["np_box_list_ops_test.py"],
-    deps = [
-        ":np_box_list",
-        ":np_box_list_ops",
-        "//numpy",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_box_mask_list_ops_test",
-    srcs = ["np_box_mask_list_ops_test.py"],
-    deps = [
-        ":np_box_mask_list",
-        ":np_box_mask_list_ops",
-        "//numpy",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_box_ops_test",
-    srcs = ["np_box_ops_test.py"],
-    deps = [
-        ":np_box_ops",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "np_mask_ops_test",
-    srcs = ["np_mask_ops_test.py"],
-    deps = [
-        ":np_mask_ops",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "object_detection_evaluation_test",
-    srcs = ["object_detection_evaluation_test.py"],
-    deps = [
-        ":object_detection_evaluation",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-
-py_test(
-    name = "ops_test",
-    srcs = ["ops_test.py"],
-    deps = [
-        ":ops",
-        ":test_case",
-        "//tensorflow",
-        "//tensorflow/models/research/object_detection/core:standard_fields",
-    ],
-)
-
-py_test(
-    name = "per_image_evaluation_test",
-    srcs = ["per_image_evaluation_test.py"],
-    deps = [
-        ":per_image_evaluation",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "shape_utils_test",
-    srcs = ["shape_utils_test.py"],
-    deps = [
-        ":shape_utils",
-        "//numpy",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "static_shape_test",
-    srcs = ["static_shape_test.py"],
-    deps = [
-        ":static_shape",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "test_utils_test",
-    srcs = ["test_utils_test.py"],
-    deps = [
-        ":test_utils",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "variables_helper_test",
-    srcs = ["variables_helper_test.py"],
-    deps = [
-        ":variables_helper",
-        "//tensorflow",
-    ],
-)
-
-py_test(
-    name = "visualization_utils_test",
-    srcs = ["visualization_utils_test.py"],
-    data = [
-        "//tensorflow/models/research/object_detection/test_images:image1.jpg",
-    ],
-    deps = [
-        ":visualization_utils",
-        "//pyglib/flags",
-        "//PIL:pil",
-    ],
-)
--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -241,6 +241,10 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
  if hparams:
    kwargs.update(hparams.values())
  for key, value in kwargs.items():
+    # pylint: disable=g-explicit-bool-comparison
+    if value == "" or value is None:
+      continue
+    # pylint: enable=g-explicit-bool-comparison
    if key == "learning_rate":
      _update_initial_learning_rate(configs, value)
      tf.logging.info("Overwriting learning rate: %f", value)
@@ -270,9 +274,8 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
      _update_input_path(configs["eval_input_config"], value)
      tf.logging.info("Overwriting eval input path: %s", value)
    if key == "label_map_path":
-      if value:
-        _update_label_map_path(configs, value)
-        tf.logging.info("Overwriting label map path: %s", value)
+      _update_label_map_path(configs, value)
+      tf.logging.info("Overwriting label map path: %s", value)
    if key == "mask_type":
      _update_mask_type(configs, value)
      tf.logging.info("Overwritten mask type: %s", value)

--- a/research/object_detection/utils/config_util_test.py
+++ b/research/object_detection/utils/config_util_test.py
@@ -397,6 +397,27 @@ class ConfigUtilTest(tf.test.TestCase):
    self.assertEqual(new_label_map_path,
                     configs["eval_input_config"].label_map_path)

+  def testDontOverwriteEmptyLabelMapPath(self):
+    """Tests that label map path will not by overwritten with empty string."""
+    original_label_map_path = "path/to/original/label_map"
+    new_label_map_path = ""
+    pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
+
+    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
+    train_input_reader = pipeline_config.train_input_reader
+    train_input_reader.label_map_path = original_label_map_path
+    eval_input_reader = pipeline_config.eval_input_reader
+    eval_input_reader.label_map_path = original_label_map_path
+    _write_config(pipeline_config, pipeline_config_path)
+
+    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
+    configs = config_util.merge_external_params_with_configs(
+        configs, label_map_path=new_label_map_path)
+    self.assertEqual(original_label_map_path,
+                     configs["train_input_config"].label_map_path)
+    self.assertEqual(original_label_map_path,
+                     configs["eval_input_config"].label_map_path)
+
  def testNewMaskType(self):
    """Tests that mask type can be overwritten in input readers."""
    original_mask_type = input_reader_pb2.NUMERICAL_MASKS

--- a/research/object_detection/utils/json_utils.py
+++ b/research/object_detection/utils/json_utils.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Utilities for dealing with writing json strings.

 json_utils wraps json.dump and json.dumps so that they can be used to safely

--- a/research/object_detection/utils/json_utils_test.py
+++ b/research/object_detection/utils/json_utils_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Tests for google3.image.understanding.object_detection.utils.json_utils."""
 import os


--- a/research/object_detection/utils/learning_schedules.py
+++ b/research/object_detection/utils/learning_schedules.py
@@ -142,6 +142,7 @@ def manual_stepping(global_step, boundaries, rates):
  if len(rates) != len(boundaries) + 1:
    raise ValueError('Number of provided learning rates must exceed '
                     'number of boundary points by exactly 1.')
+  if not boundaries: return tf.constant(rates[0])
  step_boundaries = tf.constant(boundaries, tf.int32)
  num_boundaries = len(boundaries)
  learning_rates = tf.constant(rates, tf.float32)

--- a/research/object_detection/utils/learning_schedules_test.py
+++ b/research/object_detection/utils/learning_schedules_test.py
@@ -75,5 +75,21 @@ class LearningSchedulesTest(test_case.TestCase):
    exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
    self.assertAllClose(output_rates, exp_rates)

+  def testManualSteppingWithZeroBoundaries(self):
+    def graph_fn(global_step):
+      boundaries = []
+      rates = [0.01]
+      learning_rate = learning_schedules.manual_stepping(
+          global_step, boundaries, rates)
+      return (learning_rate,)
+
+    output_rates = [
+        self.execute(graph_fn, [np.array(i).astype(np.int64)])
+        for i in range(4)
+    ]
+    exp_rates = [0.01] * 4
+    self.assertAllClose(output_rates, exp_rates)
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/utils/np_box_list_ops.py
+++ b/research/object_detection/utils/np_box_list_ops.py
@@ -19,7 +19,6 @@ Example box operations that are supported:
  * Areas: compute bounding box areas
  * IOU: pairwise intersection-over-union scores
 """
-
 import numpy as np

 from object_detection.utils import np_box_list

--- a/research/object_detection/utils/np_box_mask_list_ops.py
+++ b/research/object_detection/utils/np_box_mask_list_ops.py
@@ -19,7 +19,6 @@ Example box operations that are supported:
  * Areas: compute bounding box areas
  * IOU: pairwise intersection-over-union scores
 """
-
 import numpy as np

 from object_detection.utils import np_box_list_ops

--- a/research/object_detection/utils/ops.py
+++ b/research/object_detection/utils/ops.py
@@ -224,7 +224,7 @@ def padded_one_hot_encoding(indices, depth, left_pad):
    ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
      either negative or non-integers.

-  TODO: add runtime checks for depth and indices.
+  TODO(rathodv): add runtime checks for depth and indices.
  """
  if depth < 0 or not isinstance(depth, six.integer_types):
    raise ValueError('`depth` must be a non-negative integer.')
@@ -474,7 +474,7 @@ def normalize_to_target(inputs,
  Note that the rank of `inputs` must be known and the dimension to which
  normalization is to be applied should be statically defined.

-  TODO: Add option to scale by L2 norm of the entire input.
+  TODO(jonathanhuang): Add option to scale by L2 norm of the entire input.

  Args:
    inputs: A `Tensor` of arbitrary size.
@@ -704,7 +704,7 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
  Returns:
    A tf.float32 tensor of size [num_masks, image_height, image_width].
  """
-  # TODO: Make this a public function.
+  # TODO(rathodv): Make this a public function.
  def transform_boxes_relative_to_boxes(boxes, reference_boxes):
    boxes = tf.reshape(boxes, [-1, 2, 2])
    min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
@@ -820,3 +820,127 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
    gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
    return tf.reshape(gathered_result_flattened,
                      tf.stack(indices_shape + params_shape[1:]))
+
+
+def matmul_crop_and_resize(image, boxes, crop_size, scope=None):
+  """Matrix multiplication based implementation of the crop and resize op.
+
+  Extracts crops from the input image tensor and bilinearly resizes them
+  (possibly with aspect ratio change) to a common output size specified by
+  crop_size. This is more general than the crop_to_bounding_box op which
+  extracts a fixed size slice from the input image and does not allow
+  resizing or aspect ratio change.
+
+  Returns a tensor with crops from the input image at positions defined at
+  the bounding box locations in boxes. The cropped boxes are all resized
+  (with bilinear interpolation) to a fixed size = `[crop_height, crop_width]`.
+  The result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`.
+
+  Running time complexity:
+    O((# channels) * (# boxes) * (crop_size)^2 * M), where M is the number
+  of pixels of the longer edge of the image.
+
+  Note that this operation is meant to replicate the behavior of the standard
+  tf.image.crop_and_resize operation but there are a few differences.
+  Specifically:
+    1) The extrapolation value (the values that are interpolated from outside
+      the bounds of the image window) is always zero
+    2) Only XLA supported operations are used (e.g., matrix multiplication).
+    3) There is no `box_indices` argument --- to run this op on multiple images,
+      one must currently call this op independently on each image.
+    4) All shapes and the `crop_size` parameter are assumed to be statically
+      defined.  Moreover, the number of boxes must be strictly nonzero.
+
+  Args:
+    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
+      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
+      A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+      Both `image_height` and `image_width` need to be positive.
+    boxes: A `Tensor` of type `float32`.
+      A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+      specifies the coordinates of a box in the `box_ind[i]` image and is
+      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
+      coordinate value of `y` is mapped to the image coordinate at
+      `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
+      height is mapped to `[0, image_height - 1] in image height coordinates.
+      We do allow y1 > y2, in which case the sampled crop is an up-down flipped
+      version of the original image. The width dimension is treated similarly.
+      Normalized coordinates outside the `[0, 1]` range are allowed, in which
+      case we use `extrapolation_value` to extrapolate the input image values.
+    crop_size: A list of two integers `[crop_height, crop_width]`. All
+      cropped image patches are resized to this size. The aspect ratio of the
+      image content is not preserved. Both `crop_height` and `crop_width` need
+      to be positive.
+    scope: A name for the operation (optional).
+
+  Returns:
+    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`
+
+  Raises:
+    ValueError: if image tensor does not have shape
+      `[1, image_height, image_width, depth]` and all dimensions statically
+      defined.
+    ValueError: if boxes tensor does not have shape `[num_boxes, 4]` where
+      num_boxes > 0.
+    ValueError: if crop_size is not a list of two positive integers
+  """
+  img_shape = image.shape.as_list()
+  boxes_shape = boxes.shape.as_list()
+  _, img_height, img_width, _ = img_shape
+  if not isinstance(crop_size, list) or len(crop_size) != 2:
+    raise ValueError('`crop_size` must be a list of length 2')
+  dimensions = img_shape + crop_size + boxes_shape
+  if not all([isinstance(dim, int) for dim in dimensions]):
+    raise ValueError('all input shapes must be statically defined')
+  if len(crop_size) != 2:
+    raise ValueError('`crop_size` must be a list of length 2')
+  if len(boxes_shape) != 2 or boxes_shape[1] != 4:
+    raise ValueError('`boxes` should have shape `[num_boxes, 4]`')
+  if len(img_shape) != 4 and img_shape[0] != 1:
+    raise ValueError('image should have shape '
+                     '`[1, image_height, image_width, depth]`')
+  num_crops = boxes_shape[0]
+  if not num_crops > 0:
+    raise ValueError('number of boxes must be > 0')
+  if not (crop_size[0] > 0 and crop_size[1] > 0):
+    raise ValueError('`crop_size` must be a list of two positive integers.')
+
+  def _lin_space_weights(num, img_size):
+    if num > 1:
+      alpha = (img_size - 1) / float(num - 1)
+      indices = np.reshape(np.arange(num), (1, num))
+      start_weights = alpha * (num - 1 - indices)
+      stop_weights = alpha * indices
+    else:
+      start_weights = num * [.5 * (img_size - 1)]
+      stop_weights = num * [.5 * (img_size - 1)]
+    return (tf.constant(start_weights, dtype=tf.float32),
+            tf.constant(stop_weights, dtype=tf.float32))
+
+  with tf.name_scope(scope, 'MatMulCropAndResize'):
+    y1_weights, y2_weights = _lin_space_weights(crop_size[0], img_height)
+    x1_weights, x2_weights = _lin_space_weights(crop_size[1], img_width)
+    [y1, x1, y2, x2] = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+
+    # Pixel centers of input image and grid points along height and width
+    image_idx_h = tf.constant(
+        np.reshape(np.arange(img_height), (1, 1, img_height)), dtype=tf.float32)
+    image_idx_w = tf.constant(
+        np.reshape(np.arange(img_width), (1, 1, img_width)), dtype=tf.float32)
+    grid_pos_h = tf.expand_dims(y1 * y1_weights + y2 * y2_weights, 2)
+    grid_pos_w = tf.expand_dims(x1 * x1_weights + x2 * x2_weights, 2)
+
+    # Create kernel matrices of pairwise kernel evaluations between pixel
+    # centers of image and grid points.
+    kernel_h = tf.nn.relu(1 - tf.abs(image_idx_h - grid_pos_h))
+    kernel_w = tf.nn.relu(1 - tf.abs(image_idx_w - grid_pos_w))
+
+    # TODO(jonathanhuang): investigate whether all channels can be processed
+    # without the explicit unstack --- possibly with a permute and map_fn call.
+    result_channels = []
+    for channel in tf.unstack(image, axis=3):
+      result_channels.append(
+          tf.matmul(
+              tf.matmul(kernel_h, tf.tile(channel, [num_crops, 1, 1])),
+              kernel_w, transpose_b=True))
+    return tf.stack(result_channels, axis=3)
--- a/research/object_detection/utils/ops_test.py
+++ b/research/object_detection/utils/ops_test.py
@@ -1171,12 +1171,15 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):

    def graph_fn(inputs):
      custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
-      tf_op_output = tf.image.resize_images(
-          inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-      return (custom_op_output, tf_op_output)
-    inputs = np.reshape(np.arange(2**4), [2, 2, 2, 2])
-    (custom_op_output, tf_op_output) = self.execute(graph_fn, [inputs])
-    self.assertAllClose(custom_op_output, tf_op_output)
+      return custom_op_output
+    inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
+    custom_op_output = self.execute(graph_fn, [inputs])
+
+    expected_output = [[[[0], [0], [1], [1]],
+                        [[0], [0], [1], [1]],
+                        [[2], [2], [3], [3]],
+                        [[2], [2], [3], [3]]]]
+    self.assertAllClose(custom_op_output, expected_output)


 class MatmulGatherOnZerothAxis(test_case.TestCase):
@@ -1190,7 +1193,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
                       [5, 6, 7, 8],
                       [9, 10, 11, 12],
                       [0, 1, 0, 0]], dtype=np.float32)
-    indices = np.array([2, 2, 1])
+    indices = np.array([2, 2, 1], dtype=np.int32)
    expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
    gather_output = self.execute(graph_fn, [params, indices])
    self.assertAllClose(gather_output, expected_output)
@@ -1204,7 +1207,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
                       [[5, 6], [7, 8]],
                       [[9, 10], [11, 12]],
                       [[0, 1], [0, 0]]], dtype=np.float32)
-    indices = np.array([0, 3, 1])
+    indices = np.array([0, 3, 1], dtype=np.int32)
    expected_output = np.array([[[1, 2], [3, 4]],
                                [[0, 1], [0, 0]],
                                [[5, 6], [7, 8]]])
@@ -1220,7 +1223,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
                       [5, 6, 7, 8],
                       [9, 10, 11, 12],
                       [0, 1, 0, 0]], dtype=np.float32)
-    indices = np.array([0, 0, 0, 0, 0, 0])
+    indices = np.array([0, 0, 0, 0, 0, 0], dtype=np.int32)
    expected_output = np.array(6*[[1, 2, 3, 4]])
    gather_output = self.execute(graph_fn, [params, indices])
    self.assertAllClose(gather_output, expected_output)
@@ -1241,5 +1244,109 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
          params_placeholder: params, indices_placeholder: indices})
      self.assertAllClose(gather_output, expected_output)

+
+class OpsTestMatMulCropAndResize(test_case.TestCase):
+
+  def testMatMulCropAndResize2x2To1x1(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
+
+    image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
+    boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
+    expected_output = [[[[2.5]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize2x2To1x1Flipped(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
+
+    image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
+    boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
+    expected_output = [[[[2.5]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize2x2To3x3(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
+
+    image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
+    boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
+    expected_output = [[[[1.0], [1.5], [2.0]],
+                        [[2.0], [2.5], [3.0]],
+                        [[3.0], [3.5], [4.0]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize2x2To3x3Flipped(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
+
+    image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
+    boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
+    expected_output = [[[[4.0], [3.5], [3.0]],
+                        [[3.0], [2.5], [2.0]],
+                        [[2.0], [1.5], [1.0]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize3x3To2x2(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
+
+    image = np.array([[[[1], [2], [3]],
+                       [[4], [5], [6]],
+                       [[7], [8], [9]]]], dtype=np.float32)
+    boxes = np.array([[0, 0, 1, 1],
+                      [0, 0, .5, .5]], dtype=np.float32)
+    expected_output = [[[[1], [3]], [[7], [9]]],
+                       [[[1], [2]], [[4], [5]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize3x3To2x2MultiChannel(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
+
+    image = np.array([[[[1, 0], [2, 1], [3, 2]],
+                       [[4, 3], [5, 4], [6, 5]],
+                       [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
+    boxes = np.array([[0, 0, 1, 1],
+                      [0, 0, .5, .5]], dtype=np.float32)
+    expected_output = [[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
+                       [[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testMatMulCropAndResize3x3To2x2Flipped(self):
+
+    def graph_fn(image, boxes):
+      return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
+
+    image = np.array([[[[1], [2], [3]],
+                       [[4], [5], [6]],
+                       [[7], [8], [9]]]], dtype=np.float32)
+    boxes = np.array([[1, 1, 0, 0],
+                      [.5, .5, 0, 0]], dtype=np.float32)
+    expected_output = [[[[9], [7]], [[3], [1]]],
+                       [[[5], [4]], [[2], [1]]]]
+    crop_output = self.execute(graph_fn, [image, boxes])
+    self.assertAllClose(crop_output, expected_output)
+
+  def testInvalidInputShape(self):
+    image = tf.constant([[[1], [2]], [[3], [4]]], dtype=tf.float32)
+    boxes = tf.constant([[-1, -1, 1, 1]], dtype=tf.float32)
+    crop_size = [4, 4]
+    with self.assertRaises(ValueError):
+      _ = ops.matmul_crop_and_resize(image, boxes, crop_size)
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/utils/shape_utils.py
+++ b/research/object_detection/utils/shape_utils.py
@@ -152,7 +152,7 @@ def static_or_dynamic_map_fn(fn, elems, dtype=None,
  Tensors or lists of Tensors).  Likewise, the output of `fn` can only be a
  Tensor or list of Tensors.

-  TODO: make this function fully interchangeable with tf.map_fn.
+  TODO(jonathanhuang): make this function fully interchangeable with tf.map_fn.

  Args:
    fn: The callable to be performed. It accepts one argument, which will have