add inference tools for Open Image dataset.

e836fc63 · Vivek Rathod · 11e9c7ad · e836fc63 · e836fc63 · e836fc63
Commit e836fc63 authored Nov 17, 2017 by Vivek Rathod
4 changed files
--- a/research/object_detection/inference/BUILD
+++ b/research/object_detection/inference/BUILD
+# Tensorflow Object Detection API: main runnables.
+package(
+    default_visibility = ["//visibility:public"],
+)
+licenses(["notice"])
+# Apache 2.0
+py_library(
+    name = "detection_inference",
+    srcs = ["detection_inference.py"],
+    deps = [
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:standard_fields",
+    ],
+)
+py_test(
+    name = "detection_inference_test",
+    srcs = ["detection_inference_test.py"],
+    deps = [
+        ":detection_inference",
+        "//third_party/py/PIL:pil",
+        "//third_party/py/numpy",
+        "//tensorflow",
+        "//tensorflow_models/object_detection/core:standard_fields",
+        "//tensorflow_models/object_detection/utils:dataset_util",
+    ],
+)
+py_binary(
+    name = "infer_detections",
+    srcs = ["infer_detections.py"],
+    deps = [
+        ":detection_inference",
+        "//tensorflow",
+    ],
+)
--- a/research/object_detection/inference/detection_inference.py
+++ b/research/object_detection/inference/detection_inference.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for detection inference."""
+from __future__ import division
+import tensorflow as tf
+from object_detection.core import standard_fields
+def build_input(tfrecord_paths):
+  """Builds the graph's input.
+  Args:
+    tfrecord_paths: List of paths to the input TFRecords
+  Returns:
+    serialized_example_tensor: The next serialized example. String scalar Tensor
+    image_tensor: The decoded image of the example. Uint8 tensor,
+        shape=[1, None, None,3]
+  """
+  filename_queue = tf.train.string_input_producer(
+      tfrecord_paths, shuffle=False, num_epochs=1)
+  tf_record_reader = tf.TFRecordReader()
+  _, serialized_example_tensor = tf_record_reader.read(filename_queue)
+  features = tf.parse_single_example(
+      serialized_example_tensor,
+      features={
+          standard_fields.TfExampleFields.image_encoded:
+              tf.FixedLenFeature([], tf.string),
+      })
+  encoded_image = features[standard_fields.TfExampleFields.image_encoded]
+  image_tensor = tf.image.decode_image(encoded_image, channels=3)
+  image_tensor.set_shape([None, None, 3])
+  image_tensor = tf.expand_dims(image_tensor, 0)
+  return serialized_example_tensor, image_tensor
+def build_inference_graph(image_tensor, inference_graph_path):
+  """Loads the inference graph and connects it to the input image.
+  Args:
+    image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3]
+    inference_graph_path: Path to the inference graph with embedded weights
+  Returns:
+    detected_boxes_tensor: Detected boxes. Float tensor,
+        shape=[num_detections, 4]
+    detected_scores_tensor: Detected scores. Float tensor,
+        shape=[num_detections]
+    detected_labels_tensor: Detected labels. Int64 tensor,
+        shape=[num_detections]
+  """
+  with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file:
+    graph_content = graph_def_file.read()
+  graph_def = tf.GraphDef()
+  graph_def.MergeFromString(graph_content)
+  tf.import_graph_def(
+      graph_def, name='', input_map={'image_tensor': image_tensor})
+  g = tf.get_default_graph()
+  num_detections_tensor = tf.squeeze(
+      g.get_tensor_by_name('num_detections:0'), 0)
+  num_detections_tensor = tf.cast(num_detections_tensor, tf.int32)
+  detected_boxes_tensor = tf.squeeze(
+      g.get_tensor_by_name('detection_boxes:0'), 0)
+  detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor]
+  detected_scores_tensor = tf.squeeze(
+      g.get_tensor_by_name('detection_scores:0'), 0)
+  detected_scores_tensor = detected_scores_tensor[:num_detections_tensor]
+  detected_labels_tensor = tf.squeeze(
+      g.get_tensor_by_name('detection_classes:0'), 0)
+  detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64)
+  detected_labels_tensor = detected_labels_tensor[:num_detections_tensor]
+  return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor
+def infer_detections_and_add_to_example(
+    serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor,
+    detected_labels_tensor, discard_image_pixels):
+  """Runs the supplied tensors and adds the inferred detections to the example.
+  Args:
+    serialized_example_tensor: Serialized TF example. Scalar string tensor
+    detected_boxes_tensor: Detected boxes. Float tensor,
+        shape=[num_detections, 4]
+    detected_scores_tensor: Detected scores. Float tensor,
+        shape=[num_detections]
+    detected_labels_tensor: Detected labels. Int64 tensor,
+        shape=[num_detections]
+    discard_image_pixels: If true, discards the image from the result
+  Returns:
+    The de-serialized TF example augmented with the inferred detections.
+  """
+  tf_example = tf.train.Example()
+  (serialized_example, detected_boxes, detected_scores,
+   detected_classes) = tf.get_default_session().run([
+       serialized_example_tensor, detected_boxes_tensor, detected_scores_tensor,
+       detected_labels_tensor
+   ])
+  detected_boxes = detected_boxes.T
+  tf_example.ParseFromString(serialized_example)
+  feature = tf_example.features.feature
+  feature[standard_fields.TfExampleFields.
+          detection_score].float_list.value[:] = detected_scores
+  feature[standard_fields.TfExampleFields.
+          detection_bbox_ymin].float_list.value[:] = detected_boxes[0]
+  feature[standard_fields.TfExampleFields.
+          detection_bbox_xmin].float_list.value[:] = detected_boxes[1]
+  feature[standard_fields.TfExampleFields.
+          detection_bbox_ymax].float_list.value[:] = detected_boxes[2]
+  feature[standard_fields.TfExampleFields.
+          detection_bbox_xmax].float_list.value[:] = detected_boxes[3]
+  feature[standard_fields.TfExampleFields.
+          detection_class_label].int64_list.value[:] = detected_classes
+  if discard_image_pixels:
+    del feature[standard_fields.TfExampleFields.image_encoded]
+  return tf_example
--- a/research/object_detection/inference/detection_inference_test.py
+++ b/research/object_detection/inference/detection_inference_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Tests for detection_inference.py."""
+import os
+import StringIO
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from object_detection.core import standard_fields
+from object_detection.inference import detection_inference
+from object_detection.utils import dataset_util
+def get_mock_tfrecord_path():
+  return os.path.join(tf.test.get_temp_dir(), 'mock.tfrec')
+def create_mock_tfrecord():
+  pil_image = Image.fromarray(np.array([[[123, 0, 0]]], dtype=np.uint8), 'RGB')
+  image_output_stream = StringIO.StringIO()
+  pil_image.save(image_output_stream, format='png')
+  encoded_image = image_output_stream.getvalue()
+  feature_map = {
+      'test_field':
+          dataset_util.float_list_feature([1, 2, 3, 4]),
+      standard_fields.TfExampleFields.image_encoded:
+          dataset_util.bytes_feature(encoded_image),
+  }
+  tf_example = tf.train.Example(features=tf.train.Features(feature=feature_map))
+  with tf.python_io.TFRecordWriter(get_mock_tfrecord_path()) as writer:
+    writer.write(tf_example.SerializeToString())
+def get_mock_graph_path():
+  return os.path.join(tf.test.get_temp_dir(), 'mock_graph.pb')
+def create_mock_graph():
+  g = tf.Graph()
+  with g.as_default():
+    in_image_tensor = tf.placeholder(
+        tf.uint8, shape=[1, None, None, 3], name='image_tensor')
+    tf.constant([2.0], name='num_detections')
+    tf.constant(
+        [[[0, 0.8, 0.7, 1], [0.1, 0.2, 0.8, 0.9], [0.2, 0.3, 0.4, 0.5]]],
+        name='detection_boxes')
+    tf.constant([[0.1, 0.2, 0.3]], name='detection_scores')
+    tf.identity(
+        tf.constant([[1.0, 2.0, 3.0]]) *
+        tf.reduce_sum(tf.cast(in_image_tensor, dtype=tf.float32)),
+        name='detection_classes')
+    graph_def = g.as_graph_def()
+  with tf.gfile.Open(get_mock_graph_path(), 'w') as fl:
+    fl.write(graph_def.SerializeToString())
+class InferDetectionsTests(tf.test.TestCase):
+  def test_simple(self):
+    create_mock_graph()
+    create_mock_tfrecord()
+    serialized_example_tensor, image_tensor = detection_inference.build_input(
+        [get_mock_tfrecord_path()])
+    self.assertAllEqual(image_tensor.get_shape().as_list(), [1, None, None, 3])
+    (detected_boxes_tensor, detected_scores_tensor,
+     detected_labels_tensor) = detection_inference.build_inference_graph(
+         image_tensor, get_mock_graph_path())
+    with self.test_session(use_gpu=False) as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(tf.local_variables_initializer())
+      tf.train.start_queue_runners()
+      tf_example = detection_inference.infer_detections_and_add_to_example(
+          serialized_example_tensor, detected_boxes_tensor,
+          detected_scores_tensor, detected_labels_tensor, False)
+    self.assertProtoEquals(r"""
+        features {
+          feature {
+            key: "image/detection/bbox/ymin"
+            value { float_list { value: [0.0, 0.1] } } }
+          feature {
+            key: "image/detection/bbox/xmin"
+            value { float_list { value: [0.8, 0.2] } } }
+          feature {
+            key: "image/detection/bbox/ymax"
+            value { float_list { value: [0.7, 0.8] } } }
+          feature {
+            key: "image/detection/bbox/xmax"
+            value { float_list { value: [1.0, 0.9] } } }
+          feature {
+            key: "image/detection/label"
+            value { int64_list { value: [123, 246] } } }
+          feature {
+            key: "image/detection/score"
+            value { float_list { value: [0.1, 0.2] } } }
+          feature {
+            key: "image/encoded"
+            value { bytes_list { value:
+              "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\001\000\000"
+              "\000\001\010\002\000\000\000\220wS\336\000\000\000\022IDATx"
+              "\234b\250f`\000\000\000\000\377\377\003\000\001u\000|gO\242"
+              "\213\000\000\000\000IEND\256B`\202" } } }
+          feature {
+            key: "test_field"
+            value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } }
+    """, tf_example)
+  def test_discard_image(self):
+    create_mock_graph()
+    create_mock_tfrecord()
+    serialized_example_tensor, image_tensor = detection_inference.build_input(
+        [get_mock_tfrecord_path()])
+    (detected_boxes_tensor, detected_scores_tensor,
+     detected_labels_tensor) = detection_inference.build_inference_graph(
+         image_tensor, get_mock_graph_path())
+    with self.test_session(use_gpu=False) as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(tf.local_variables_initializer())
+      tf.train.start_queue_runners()
+      tf_example = detection_inference.infer_detections_and_add_to_example(
+          serialized_example_tensor, detected_boxes_tensor,
+          detected_scores_tensor, detected_labels_tensor, True)
+    self.assertProtoEquals(r"""
+        features {
+          feature {
+            key: "image/detection/bbox/ymin"
+            value { float_list { value: [0.0, 0.1] } } }
+          feature {
+            key: "image/detection/bbox/xmin"
+            value { float_list { value: [0.8, 0.2] } } }
+          feature {
+            key: "image/detection/bbox/ymax"
+            value { float_list { value: [0.7, 0.8] } } }
+          feature {
+            key: "image/detection/bbox/xmax"
+            value { float_list { value: [1.0, 0.9] } } }
+          feature {
+            key: "image/detection/label"
+            value { int64_list { value: [123, 246] } } }
+          feature {
+            key: "image/detection/score"
+            value { float_list { value: [0.1, 0.2] } } }
+          feature {
+            key: "test_field"
+            value { float_list { value: [1.0, 2.0, 3.0, 4.0] } } } }
+    """, tf_example)
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/inference/infer_detections.py
+++ b/research/object_detection/inference/infer_detections.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Infers detections on a TFRecord of TFExamples given an inference graph.
+Example usage:
+  ./infer_detections \
+    --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \
+    --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \
+    --inference_graph=/path/to/frozen_weights_inference_graph.pb
+The output is a TFRecord of TFExamples. Each TFExample from the input is first
+augmented with detections from the inference graph and then copied to the
+output.
+The input and output nodes of the inference graph are expected to have the same
+types, shapes, and semantics, as the input and output nodes of graphs produced
+by export_inference_graph.py, when run with --input_type=image_tensor.
+The script can also discard the image pixels in the output. This greatly
+reduces the output size and can potentially accelerate reading data in
+subsequent processing steps that don't require the images (e.g. computing
+metrics).
+"""
+import itertools
+import tensorflow as tf
+from object_detection.inference import detection_inference
+tf.flags.DEFINE_string('input_tfrecord_paths', None,
+                       'A comma separated list of paths to input TFRecords.')
+tf.flags.DEFINE_string('output_tfrecord_path', None,
+                       'Path to the output TFRecord.')
+tf.flags.DEFINE_string('inference_graph', None,
+                       'Path to the inference graph with embedded weights.')
+tf.flags.DEFINE_boolean('discard_image_pixels', False,
+                        'Discards the images in the output TFExamples. This'
+                        ' significantly reduces the output size and is useful'
+                        ' if the subsequent tools don\'t need access to the'
+                        ' images (e.g. when computing evaluation measures).')
+FLAGS = tf.flags.FLAGS
+def main(_):
+  tf.logging.set_verbosity(tf.logging.INFO)
+  required_flags = ['input_tfrecord_paths', 'output_tfrecord_path',
+                    'inference_graph']
+  for flag_name in required_flags:
+    if not getattr(FLAGS, flag_name):
+      raise ValueError('Flag --{} is required'.format(flag_name))
+  with tf.Session() as sess:
+    input_tfrecord_paths = [
+        v for v in FLAGS.input_tfrecord_paths.split(',') if v]
+    tf.logging.info('Reading input from %d files', len(input_tfrecord_paths))
+    serialized_example_tensor, image_tensor = detection_inference.build_input(
+        input_tfrecord_paths)
+    tf.logging.info('Reading graph and building model...')
+    (detected_boxes_tensor, detected_scores_tensor,
+     detected_labels_tensor) = detection_inference.build_inference_graph(
+         image_tensor, FLAGS.inference_graph)
+    tf.logging.info('Running inference and writing output to {}'.format(
+        FLAGS.output_tfrecord_path))
+    sess.run(tf.local_variables_initializer())
+    tf.train.start_queue_runners()
+    with tf.python_io.TFRecordWriter(
+        FLAGS.output_tfrecord_path) as tf_record_writer:
+      try:
+        for counter in itertools.count():
+          tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10,
+                                 counter)
+          tf_example = detection_inference.infer_detections_and_add_to_example(
+              serialized_example_tensor, detected_boxes_tensor,
+              detected_scores_tensor, detected_labels_tensor,
+              FLAGS.discard_image_pixels)
+          tf_record_writer.write(tf_example.SerializeToString())
+      except tf.errors.OutOfRangeError:
+        tf.logging.info('Finished processing records')
+if __name__ == '__main__':
+  tf.app.run()