Commit f282f6ef authored by Alexander Gorban's avatar Alexander Gorban
Browse files

Merge branch 'master' of github.com:tensorflow/models

parents 58a5da7b a2970b03
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Tool to export an object detection model for inference.
Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint. Outputs either an inference
graph or a SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
The inference graph contains one of three input nodes depending on the user
specified option.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
* `encoded_image_string_tensor`: Accepts a scalar string tensor of encoded PNG
or JPEG image.
* `tf_example`: Accepts a serialized TFExample proto. The batch size in this
case is always 1.
and the following output nodes returned by the model.postprocess(..):
* `num_detections`: Outputs float32 tensors of the form [batch]
that specifies the number of valid boxes per image in the batch.
* `detection_boxes`: Outputs float32 tensors of the form
[batch, num_boxes, 4] containing detected boxes.
* `detection_scores`: Outputs float32 tensors of the form
[batch, num_boxes] containing class scores for the detections.
* `detection_classes`: Outputs float32 tensors of the form
[batch, num_boxes] containing classes for the detections.
* `detection_masks`: Outputs float32 tensors of the form
[batch, num_boxes, mask_height, mask_width] containing predicted instance
masks for each box if its present in the dictionary of postprocessed
tensors returned by the model.
Note that currently `batch` is always 1, but we will support `batch` > 1 in
the future.
Optionally, one can freeze the graph by converting the weights in the provided
checkpoint as graph constants thereby eliminating the need to use a checkpoint
file during inference.
Note that this tool uses `use_moving_averages` from eval_config to decide
which weights to freeze.
Example Usage:
--------------
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--checkpoint_path path/to/model-ckpt \
--inference_graph_path path/to/inference_graph.pb
"""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor`, `encoded_image_string_tensor`, '
'`tf_example`]')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. '
'If provided, bakes the weights from the checkpoint into '
'the graph.')
flags.DEFINE_string('inference_graph_path', '', 'Path to write the output '
'inference graph.')
flags.DEFINE_bool('export_as_saved_model', False, 'Whether the exported graph '
'should be saved as a SavedModel')
FLAGS = flags.FLAGS
def main(_):
assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.'
assert FLAGS.inference_graph_path, 'Inference graph path missing.'
assert FLAGS.input_type, 'Input type missing.'
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.checkpoint_path,
FLAGS.inference_graph_path,
FLAGS.export_as_saved_model)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to export object detection inference graph."""
import logging
import os
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import importer
from tensorflow.python.platform import gfile
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.training import saver as saver_lib
from object_detection.builders import model_builder
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
slim = tf.contrib.slim
# TODO: Replace with freeze_graph.freeze_graph_with_def_protos when
# newer version of Tensorflow becomes more common.
def freeze_graph_with_def_protos(
input_graph_def,
input_saver_def,
input_checkpoint,
output_node_names,
restore_op_name,
filename_tensor_name,
clear_devices,
initializer_nodes,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
# 'input_checkpoint' may be a prefix if we're using Saver V2 format
if not saver_lib.checkpoint_exists(input_checkpoint):
raise ValueError(
'Input checkpoint "' + input_checkpoint + '" does not exist!')
if not output_node_names:
raise ValueError(
'You must supply the name of a node to --output_node_names.')
# Remove all the explicit device specifications for this node. This helps to
# make the graph more portable.
if clear_devices:
for node in input_graph_def.node:
node.device = ''
_ = importer.import_graph_def(input_graph_def, name='')
with session.Session() as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
return output_graph_def
def get_frozen_graph_def(inference_graph_def, use_moving_averages,
input_checkpoint, output_node_names):
"""Freezes all variables in a graph definition."""
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
saver = tf.train.Saver()
frozen_graph_def = freeze_graph_with_def_protos(
input_graph_def=inference_graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=input_checkpoint,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
initializer_nodes='')
return frozen_graph_def
# TODO: Support batch tf example inputs.
def _tf_example_input_placeholder():
tf_example_placeholder = tf.placeholder(
tf.string, shape=[], name='tf_example')
tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
tf_example_placeholder)
image = tensor_dict[fields.InputDataFields.image]
return tf.expand_dims(image, axis=0)
def _image_tensor_input_placeholder():
return tf.placeholder(dtype=tf.uint8,
shape=(1, None, None, 3),
name='image_tensor')
def _encoded_image_string_tensor_input_placeholder():
image_str = tf.placeholder(dtype=tf.string,
shape=[],
name='encoded_image_string_tensor')
image_tensor = tf.image.decode_image(image_str, channels=3)
image_tensor.set_shape((None, None, 3))
return tf.expand_dims(image_tensor, axis=0)
input_placeholder_fn_map = {
'image_tensor': _image_tensor_input_placeholder,
'encoded_image_string_tensor':
_encoded_image_string_tensor_input_placeholder,
'tf_example': _tf_example_input_placeholder,
}
def _add_output_tensor_nodes(postprocessed_tensors):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
* num_detections: float32 tensor of shape [batch_size].
* detection_boxes: float32 tensor of shape [batch_size, num_boxes, 4]
containing detected boxes.
* detection_scores: float32 tensor of shape [batch_size, num_boxes]
containing scores for the detected boxes.
* detection_classes: float32 tensor of shape [batch_size, num_boxes]
containing class predictions for the detected boxes.
* detection_masks: (Optional) float32 tensor of shape
[batch_size, num_boxes, mask_height, mask_width] containing masks for each
detection box.
Args:
postprocessed_tensors: a dictionary containing the following fields
'detection_boxes': [batch, max_detections, 4]
'detection_scores': [batch, max_detections]
'detection_classes': [batch, max_detections]
'detection_masks': [batch, max_detections, mask_height, mask_width]
(optional).
'num_detections': [batch]
Returns:
A tensor dict containing the added output tensor nodes.
"""
label_id_offset = 1
boxes = postprocessed_tensors.get('detection_boxes')
scores = postprocessed_tensors.get('detection_scores')
classes = postprocessed_tensors.get('detection_classes') + label_id_offset
masks = postprocessed_tensors.get('detection_masks')
num_detections = postprocessed_tensors.get('num_detections')
outputs = {}
outputs['detection_boxes'] = tf.identity(boxes, name='detection_boxes')
outputs['detection_scores'] = tf.identity(scores, name='detection_scores')
outputs['detection_classes'] = tf.identity(classes, name='detection_classes')
outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
if masks is not None:
outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
return outputs
def _write_inference_graph(inference_graph_path,
checkpoint_path=None,
use_moving_averages=False,
output_node_names=(
'num_detections,detection_scores,'
'detection_boxes,detection_classes')):
"""Writes inference graph to disk with the option to bake in weights.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
output_node_names: Output tensor names, defaults are: num_detections,
detection_scores, detection_boxes, detection_classes.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
if checkpoint_path:
output_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
)
with gfile.GFile(inference_graph_path, 'wb') as f:
f.write(output_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(output_graph_def.node))
return
tf.train.write_graph(inference_graph_def,
os.path.dirname(inference_graph_path),
os.path.basename(inference_graph_path),
as_text=False)
def _write_saved_model(inference_graph_path, inputs, outputs,
checkpoint_path=None, use_moving_averages=False):
"""Writes SavedModel to disk.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
Args:
inference_graph_path: Path to write inference graph.
inputs: The input image tensor to use for detection.
outputs: A tensor dictionary containing the outputs of a DetectionModel.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
checkpoint_graph_def = None
if checkpoint_path:
output_node_names = ','.join(outputs.keys())
checkpoint_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names
)
with tf.Graph().as_default():
with session.Session() as sess:
tf.import_graph_def(checkpoint_graph_def)
builder = tf.saved_model.builder.SavedModelBuilder(inference_graph_path)
tensor_info_inputs = {
'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
tensor_info_outputs = {}
for k, v in outputs.items():
tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)
detection_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs=tensor_info_inputs,
outputs=tensor_info_outputs,
method_name=signature_constants.PREDICT_METHOD_NAME))
builder.add_meta_graph_and_variables(
sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
detection_signature,
},
)
builder.save()
def _export_inference_graph(input_type,
detection_model,
use_moving_averages,
checkpoint_path,
inference_graph_path,
export_as_saved_model=False):
"""Export helper."""
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
inputs = tf.to_float(input_placeholder_fn_map[input_type]())
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
outputs = _add_output_tensor_nodes(postprocessed_tensors)
out_node_names = list(outputs.keys())
if export_as_saved_model:
_write_saved_model(inference_graph_path, inputs, outputs, checkpoint_path,
use_moving_averages)
else:
_write_inference_graph(inference_graph_path, checkpoint_path,
use_moving_averages,
output_node_names=','.join(out_node_names))
def export_inference_graph(input_type, pipeline_config, checkpoint_path,
inference_graph_path, export_as_saved_model=False):
"""Exports inference graph for the model specified in the pipeline config.
Args:
input_type: Type of input for the graph. Can be one of [`image_tensor`,
`tf_example`].
pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
checkpoint_path: Path to the checkpoint file to freeze.
inference_graph_path: Path to write inference graph to.
export_as_saved_model: If the model should be exported as a SavedModel. If
false, it is saved as an inference graph.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
_export_inference_graph(input_type, detection_model,
pipeline_config.eval_config.use_moving_averages,
checkpoint_path, inference_graph_path,
export_as_saved_model)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.export_inference_graph."""
import os
import numpy as np
import six
import tensorflow as tf
from object_detection import exporter
from object_detection.builders import model_builder
from object_detection.core import model
from object_detection.protos import pipeline_pb2
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
from unittest import mock # pylint: disable=g-import-not-at-top
class FakeModel(model.DetectionModel):
def __init__(self, add_detection_masks=False):
self._add_detection_masks = add_detection_masks
def preprocess(self, inputs):
return tf.identity(inputs)
def predict(self, preprocessed_inputs):
return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)}
def postprocess(self, prediction_dict):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]], tf.float32),
'detection_scores': tf.constant([[0.7, 0.6]], tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
}
if self._add_detection_masks:
postprocessed_tensors['detection_masks'] = tf.constant(
np.arange(32).reshape([2, 4, 4]), tf.float32)
return postprocessed_tensors
def restore_fn(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict):
pass
class ExportInferenceGraphTest(tf.test.TestCase):
def _save_checkpoint_from_mock_model(self, checkpoint_path,
use_moving_averages):
g = tf.Graph()
with g.as_default():
mock_model = FakeModel()
preprocessed_inputs = mock_model.preprocess(
tf.ones([1, 3, 4, 3], tf.float32))
predictions = mock_model.predict(preprocessed_inputs)
mock_model.postprocess(predictions)
if use_moving_averages:
tf.train.ExponentialMovingAverage(0.0).apply()
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with self.test_session() as sess:
sess.run(init)
saver.save(sess, checkpoint_path)
def _load_inference_graph(self, inference_graph_path):
od_graph = tf.Graph()
with od_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(inference_graph_path) as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return od_graph
def _create_tf_example(self, image_array):
with self.test_session():
encoded_image = tf.image.encode_jpeg(tf.constant(image_array)).eval()
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_feature(encoded_image),
'image/format': _bytes_feature('jpg'),
'image/source_id': _bytes_feature('image_id')
})).SerializeToString()
return example
def test_export_graph_with_image_tensor_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_graph_with_tf_example_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_graph_with_encoded_image_string_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph_with_moving_averages(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=True)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = True
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
def test_export_model_with_all_output_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
inference_graph.get_tensor_by_name('detection_boxes:0')
inference_graph.get_tensor_by_name('detection_scores:0')
inference_graph.get_tensor_by_name('detection_classes:0')
inference_graph.get_tensor_by_name('detection_masks:0')
inference_graph.get_tensor_by_name('num_detections:0')
def test_export_model_with_detection_only_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=False)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
inference_graph.get_tensor_by_name('detection_boxes:0')
inference_graph.get_tensor_by_name('detection_scores:0')
inference_graph.get_tensor_by_name('detection_classes:0')
inference_graph.get_tensor_by_name('num_detections:0')
with self.assertRaises(KeyError):
inference_graph.get_tensor_by_name('detection_masks:0')
def test_export_and_run_inference_with_image_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
image_tensor = inference_graph.get_tensor_by_name('image_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_tensor: np.ones((1, 4, 4, 3)).astype(np.uint8)})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
def _create_encoded_image_string(self, image_array_np, encoding_format):
od_graph = tf.Graph()
with od_graph.as_default():
if encoding_format == 'jpg':
encoded_string = tf.image.encode_jpeg(image_array_np)
elif encoding_format == 'png':
encoded_string = tf.image.encode_png(image_array_np)
else:
raise ValueError('Supports only the following formats: `jpg`, `png`')
with self.test_session(graph=od_graph):
return encoded_string.eval()
def test_export_and_run_inference_with_encoded_image_string_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
jpg_image_str = self._create_encoded_image_string(
np.ones((4, 4, 3)).astype(np.uint8), 'jpg')
png_image_str = self._create_encoded_image_string(
np.ones((4, 4, 3)).astype(np.uint8), 'png')
with self.test_session(graph=inference_graph) as sess:
image_str_tensor = inference_graph.get_tensor_by_name(
'encoded_image_string_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
for image_str in [jpg_image_str, png_image_str]:
(boxes_np, scores_np, classes_np, masks_np,
num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_str_tensor: image_str})
self.assertAllClose(boxes_np, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores_np, [[0.7, 0.6]])
self.assertAllClose(classes_np, [[1, 2]])
self.assertAllClose(masks_np, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections_np, [2])
def test_export_and_run_inference_with_tf_example(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
tf_example = inference_graph.get_tensor_by_name('tf_example:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
def test_export_saved_model_and_run_inference(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'saved_model')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path,
export_as_saved_model=True)
with tf.Graph().as_default() as od_graph:
with self.test_session(graph=od_graph) as sess:
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING], inference_graph_path)
tf_example = od_graph.get_tensor_by_name('import/tf_example:0')
boxes = od_graph.get_tensor_by_name('import/detection_boxes:0')
scores = od_graph.get_tensor_by_name('import/detection_scores:0')
classes = od_graph.get_tensor_by_name('import/detection_classes:0')
masks = od_graph.get_tensor_by_name('import/detection_masks:0')
num_detections = od_graph.get_tensor_by_name('import/num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
if __name__ == '__main__':
tf.test.main()
# Configuring the Object Detection Training Pipeline
## Overview
The Tensorflow Object Detection API uses protobuf files to configure the
training and evaluation process. The schema for the training pipeline can be
found in object_detection/protos/pipeline.proto. At a high level, the config
file is split into 5 parts:
1. The `model` configuration. This defines what type of model will be trained
(ie. meta-architecture, feature extractor).
2. The `train_config`, which decides what parameters should be used to train
model parameters (ie. SGD parameters, input preprocessing and feature extractor
initialization values).
3. The `eval_config`, which determines what set of metrics will be reported for
evaluation (currently we only support the PASCAL VOC metrics).
4. The `train_input_config`, which defines what dataset the model should be
trained on.
5. The `eval_input_config`, which defines what dataset the model will be
evaluated on. Typically this should be different than the training input
dataset.
A skeleton configuration file is shown below:
```
model {
(... Add model config here...)
}
train_config : {
(... Add train_config here...)
}
train_input_reader: {
(... Add train_input configuration here...)
}
eval_config: {
}
eval_input_reader: {
(... Add eval_input configuration here...)
}
```
## Picking Model Parameters
There are a large number of model parameters to configure. The best settings
will depend on your given application. Faster R-CNN models are better suited to
cases where high accuracy is desired and latency is of lower priority.
Conversely, if processing time is the most important factor, SSD models are
recommended. Read [our paper](https://arxiv.org/abs/1611.10012) for a more
detailed discussion on the speed vs accuracy tradeoff.
To help new users get started, sample model configurations have been provided
in the object_detection/samples/model_configs folder. The contents of these
configuration files can be pasted into `model` field of the skeleton
configuration. Users should note that the `num_classes` field should be changed
to a value suited for the dataset the user is training on.
## Defining Inputs
The Tensorflow Object Detection API accepts inputs in the TFRecord file format.
Users must specify the locations of both the training and evaluation files.
Additionally, users should also specify a label map, which define the mapping
between a class id and class name. The label map should be identical between
training and evaluation datasets.
An example input configuration looks as follows:
```
tf_record_input_reader {
input_path: "/usr/home/username/data/train.record"
}
label_map_path: "/usr/home/username/data/label_map.pbtxt"
```
Users should substitute the `input_path` and `label_map_path` arguments and
insert the input configuration into the `train_input_reader` and
`eval_input_reader` fields in the skeleton configuration. Note that the paths
can also point to Google Cloud Storage buckets (ie.
"gs://project_bucket/train.record") for use on Google Cloud.
## Configuring the Trainer
The `train_config` defines parts of the training process:
1. Model parameter initialization.
2. Input preprocessing.
3. SGD parameters.
A sample `train_config` is below:
```
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0002
schedule {
step: 0
learning_rate: .0002
}
schedule {
step: 900000
learning_rate: .00002
}
schedule {
step: 1200000
learning_rate: .000002
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "/usr/home/username/tmp/model.ckpt-#####"
from_detection_checkpoint: true
gradient_clipping_by_norm: 10.0
data_augmentation_options {
random_horizontal_flip {
}
}
```
### Model Parameter Initialization
While optional, it is highly recommended that users utilize other object
detection checkpoints. Training an object detector from scratch can take days.
To speed up the training process, it is recommended that users re-use the
feature extractor parameters from a pre-existing object classification or
detection checkpoint. `train_config` provides two fields to specify
pre-existing checkpoints: `fine_tune_checkpoint` and
`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to
the pre-existing checkpoint
(ie:"/usr/home/username/checkpoint/model.ckpt-#####").
`from_detection_checkpoint` is a boolean value. If false, it assumes the
checkpoint was from an object classification checkpoint. Note that starting
from a detection checkpoint will usually result in a faster training job than
a classification checkpoint.
The list of provided checkpoints can be found [here](detection_model_zoo.md).
### Input Preprocessing
The `data_augmentation_options` in `train_config` can be used to specify
how training data can be modified. This field is optional.
### SGD Parameters
The remainings parameters in `train_config` are hyperparameters for gradient
descent. Please note that the optimal learning rates provided in these
configuration files may depend on the specifics of the training setup (e.g.
number of workers, gpu type).
## Configuring the Evaluator
Currently evaluation is fixed to generating metrics as defined by the PASCAL
VOC challenge. The parameters for `eval_config` are set to reasonable defaults
and typically do not need to be configured.
# So you want to create a new model!
In this section, we discuss some of the abstractions that we use
for defining detection models. If you would like to define a new model
architecture for detection and use it in the Tensorflow Detection API,
then this section should also serve as a high level guide to the files that you
will need to edit to get your new model working.
## DetectionModels (`object_detection/core/model.py`)
In order to be trained, evaluated, and exported for serving using our
provided binaries, all models under the Tensorflow Object Detection API must
implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular,
each of these models are responsible for implementing 5 functions:
* `preprocess`: Run any preprocessing (e.g., scaling/shifting/reshaping) of
input values that is necessary prior to running the detector on an input
image.
* `predict`: Produce “raw” prediction tensors that can be passed to loss or
postprocess functions.
* `postprocess`: Convert predicted output tensors to final detections.
* `loss`: Compute scalar loss tensors with respect to provided groundtruth.
* `restore`: Load a checkpoint into the Tensorflow graph.
Given a `DetectionModel` at training time, we pass each image batch through
the following sequence of functions to compute a loss which can be optimized via
SGD:
```
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
```
And at eval time, we pass each image batch through the following sequence of
functions to produce a set of detections:
```
inputs (images tensor) -> preprocess -> predict -> postprocess ->
outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
```
Some conventions to be aware of:
* `DetectionModel`s should make no assumptions about the input size or aspect
ratio --- they are responsible for doing any resize/reshaping necessary
(see docstring for the `preprocess` function).
* Output classes are always integers in the range `[0, num_classes)`.
Any mapping of these integers to semantic labels is to be handled outside
of this class. We never explicitly emit a “background class” --- thus 0 is
the first non-background class and any logic of predicting and removing
implicit background classes must be handled internally by the implementation.
* Detected boxes are to be interpreted as being in
`[y_min, x_min, y_max, x_max]` format and normalized relative to the
image window.
* We do not specifically assume any kind of probabilistic interpretation of the
scores --- the only important thing is their relative ordering. Thus
implementations of the postprocess function are free to output logits,
probabilities, calibrated probabilities, or anything else.
## Defining a new Faster R-CNN or SSD Feature Extractor
In most cases, you probably will not implement a `DetectionModel` from scratch
--- instead you might create a new feature extractor to be used by one of the
SSD or Faster R-CNN meta-architectures. (We think of meta-architectures as
classes that define entire families of models using the `DetectionModel`
abstraction).
Note: For the following discussion to make sense, we recommend first becoming
familiar with the [Faster R-CNN](https://arxiv.org/abs/1506.01497) paper.
Let’s now imagine that you have invented a brand new network architecture
(say, “InceptionV100”) for classification and want to see how InceptionV100
would behave as a feature extractor for detection (say, with Faster R-CNN).
A similar procedure would hold for SSD models, but we’ll discuss Faster R-CNN.
To use InceptionV100, we will have to define a new
`FasterRCNNFeatureExtractor` and pass it to our `FasterRCNNMetaArch`
constructor as input. See
`object_detection/meta_architectures/faster_rcnn_meta_arch.py` for definitions
of `FasterRCNNFeatureExtractor` and `FasterRCNNMetaArch`, respectively.
A `FasterRCNNFeatureExtractor` must define a few
functions:
* `preprocess`: Run any preprocessing of input values that is necessary prior
to running the detector on an input image.
* `_extract_proposal_features`: Extract first stage Region Proposal Network
(RPN) features.
* `_extract_box_classifier_features`: Extract second stage Box Classifier
features.
* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the
Tensorflow graph.
See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py`
definition as one example. Some remarks:
* We typically initialize the weights of this feature extractor
using those from the
[Slim Resnet-101 classification checkpoint](https://github.com/tensorflow/models/tree/master/slim#pre-trained-models),
and we know
that images were preprocessed when training this checkpoint
by subtracting a channel mean from each input
image. Thus, we implement the preprocess function to replicate the same
channel mean subtraction behavior.
* The “full” resnet classification network defined in slim is cut into two
parts --- all but the last “resnet block” is put into the
`_extract_proposal_features` function and the final block is separately
defined in the `_extract_box_classifier_features function`. In general,
some experimentation may be required to decide on an optimal layer at
which to “cut” your feature extractor into these two pieces for Faster R-CNN.
## Register your model for configuration
Assuming that your new feature extractor does not require nonstandard
configuration, you will want to ideally be able to simply change the
“feature_extractor.type” fields in your configuration protos to point to a
new feature extractor. In order for our API to know how to understand this
new type though, you will first have to register your new feature
extractor with the model builder (`object_detection/builders/model_builder.py`),
whose job is to create models from config protos..
Registration is simple --- just add a pointer to the new Feature Extractor
class that you have defined in one of the SSD or Faster R-CNN Feature
Extractor Class maps at the top of the
`object_detection/builders/model_builder.py` file.
We recommend adding a test in `object_detection/builders/model_builder_test.py`
to make sure that parsing your proto will work as expected.
## Taking your new model for a spin
After registration you are ready to go with your model! Some final tips:
* To save time debugging, try running your configuration file locally first
(both training and evaluation).
* Do a sweep of learning rates to figure out which learning rate is best
for your model.
* A small but often important detail: you may find it necessary to disable
batchnorm training (that is, load the batch norm parameters from the
classification checkpoint, but do not update them during gradient descent).
# Tensorflow detection model zoo
We provide a collection of detection models pre-trained on the
[COCO dataset](http://mscoco.org).
These models can be useful for out-of-the-box inference if you are interested
in categories already in COCO (e.g., humans, cars, etc).
They are also useful for initializing your models when training on novel
datasets.
In the table below, we list each such pre-trained model including:
* a model name that corresponds to a config file that was used to train this
model in the `samples/configs` directory,
* a download link to a tar.gz file containing the pre-trained model,
* model speed (one of {slow, medium, fast}),
* detector performance on COCO data as measured by the COCO mAP measure.
Here, higher is better, and we only report bounding box mAP rounded to the
nearest integer.
* Output types (currently only `Boxes`)
You can un-tar each tar.gz file via, e.g.,:
```
tar -xzvf ssd_mobilenet_v1_coco.tar.gz
```
Inside the un-tar'ed directory, you will find:
* a graph proto (`graph.pbtxt`)
* a checkpoint
(`model.ckpt.data-00000-of-00001`, `model.ckpt.index`, `model.ckpt.meta`)
* a frozen graph proto with weights baked into the graph as constants
(`frozen_inference_graph.pb`) to be used for out of the box inference
(try this out in the Jupyter notebook!)
| Model name | Speed | COCO mAP | Outputs |
| ------------ | :--------------: | :--------------: | :-------------: |
| [ssd_mobilenet_v1_coco](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz) | fast | 21 | Boxes |
| [ssd_inception_v2_coco](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_11_06_2017.tar.gz) | fast | 24 | Boxes |
| [rfcn_resnet101_coco](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_11_06_2017.tar.gz) | medium | 30 | Boxes |
| [faster_rcnn_resnet101_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz) | medium | 32 | Boxes |
| [faster_rcnn_inception_resnet_v2_atrous_coco](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_11_06_2017.tar.gz) | slow | 37 | Boxes |
# Exporting a trained model for inference
After your model has been trained, you should export it to a Tensorflow
graph proto. A checkpoint will typically consist of three files:
* model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001,
* model.ckpt-${CHECKPOINT_NUMBER}.index
* model.ckpt-${CHECKPOINT_NUMBER}.meta
After you've identified a candidate checkpoint to export, run the following
command from tensorflow/models/object_detection:
``` bash
# From tensorflow/models
python object_detection/export_inference_graph \
--input_type image_tensor \
--pipeline_config_path ${PIPELINE_CONFIG_PATH} \
--checkpoint_path model.ckpt-${CHECKPOINT_NUMBER} \
--inference_graph_path output_inference_graph.pb
```
Afterwards, you should see a graph named output_inference_graph.pb.
# Installation
## Dependencies
Tensorflow Object Detection API depends on the following libraries:
* Protobuf 2.6
* Pillow 1.0
* lxml
* tf Slim (which is included in the "tensorflow/models" checkout)
* Jupyter notebook
* Matplotlib
* Tensorflow
For detailed steps to install Tensorflow, follow the
[Tensorflow installation instructions](https://www.tensorflow.org/install/).
A typically user can install Tensorflow using one of the following commands:
``` bash
# For CPU
pip install tensorflow
# For GPU
pip install tensorflow-gpu
```
The remaining libraries can be installed on Ubuntu 16.04 using via apt-get:
``` bash
sudo apt-get install protobuf-compiler python-pil python-lxml
sudo pip install jupyter
sudo pip install matplotlib
```
Alternatively, users can install dependencies using pip:
``` bash
sudo pip install pillow
sudo pip install lxml
sudo pip install jupyter
sudo pip install matplotlib
```
## Protobuf Compilation
The Tensorflow Object Detection API uses Protobufs to configure model and
training parameters. Before the framework can be used, the Protobuf libraries
must be compiled. This should be done by running the following command from
the tensorflow/models directory:
``` bash
# From tensorflow/models/
protoc object_detection/protos/*.proto --python_out=.
```
## Add Libraries to PYTHONPATH
When running locally, the tensorflow/models/ and slim directories should be
appended to PYTHONPATH. This can be done by running the following from
tensorflow/models/:
``` bash
# From tensorflow/models/
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
```
Note: This command needs to run from every new terminal you start. If you wish
to avoid running this manually, you can add it as a new line to the end of your
~/.bashrc file.
# Testing the Installation
You can test that you have correctly installed the Tensorflow Object Detection\
API by running the following command:
``` bash
python object_detection/builders/model_builder_test.py
```
# Preparing Inputs
Tensorflow Object Detection API reads data using the TFRecord file format. Two
sample scripts (`create_pascal_tf_record.py` and `create_pet_tf_record.py`) are
provided to convert from the PASCAL VOC dataset and Oxford-IIIT Pet dataset to
TFRecords.
## Generating the PASCAL VOC TFRecord files.
The raw 2012 PASCAL VOC data set can be downloaded
[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar).
Extract the tar file and run the `create_pascal_tf_record` script:
```bash
# From tensorflow/models/object_detection
tar -xvf VOCtrainval_11-May-2012.tar
python create_pascal_tf_record.py --data_dir=VOCdevkit \
--year=VOC2012 --set=train --output_path=pascal_train.record
python create_pascal_tf_record.py --data_dir=VOCdevkit \
--year=VOC2012 --set=val --output_path=pascal_val.record
```
You should end up with two TFRecord files named `pascal_train.record` and
`pascal_val.record` in the `tensorflow/models/object_detection` directory.
The label map for the PASCAL VOC data set can be found at
`data/pascal_label_map.pbtxt`.
## Generating the Oxford-IIIT Pet TFRecord files.
The Oxford-IIIT Pet data set can be downloaded from
[their website](http://www.robots.ox.ac.uk/~vgg/data/pets/). Extract the tar
file and run the `create_pet_tf_record` script to generate TFRecords.
```bash
# From tensorflow/models/object_detection
tar -xvf annotations.tar.gz
tar -xvf images.tar.gz
python create_pet_tf_record.py --data_dir=`pwd` --output_dir=`pwd`
```
You should end up with two TFRecord files named `pet_train.record` and
`pet_val.record` in the `tensorflow/models/object_detection` directory.
The label map for the Pet dataset can be found at `data/pet_label_map.pbtxt`.
# Running Locally
This page walks through the steps required to train an object detection model
on a local machine. It assumes the reader has completed the
following prerequisites:
1. The Tensorflow Object Detection API has been installed as documented in the
[installation instructions](installation.md). This includes installing library
dependencies, compiling the configuration protobufs and setting up the Python
environment.
2. A valid data set has been created. See [this page](preparing_inputs.md) for
instructions on how to generate a dataset for the PASCAL VOC challenge or the
Oxford-IIIT Pet dataset.
3. A Object Detection pipeline configuration has been written. See
[this page](configuring_jobs.md) for details on how to write a pipeline configuration.
## Recommended Directory Structure for Training and Evaluation
```
+data
-label_map file
-train TFRecord file
-eval TFRecord file
+models
+ model
-pipeline config file
+train
+eval
```
## Running the Training Job
A local training job can be run with the following command:
```bash
# From the tensorflow/models/ directory
python object_detection/train.py \
--logtostderr \
--pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \
--train_dir=${PATH_TO_TRAIN_DIR}
```
where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config and
`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints
and events will be written to. By default, the training job will
run indefinitely until the user kills it.
## Running the Evaluation Job
Evaluation is run as a separate job. The eval job will periodically poll the
train directory for new checkpoints and evaluate them on a test dataset. The
job can be run using the following command:
```bash
# From the tensorflow/models/ directory
python object_detection/eval.py \
--logtostderr \
--pipeline_config_path=${PATH_TO_YOUR_PIPELINE_CONFIG} \
--checkpoint_dir=${PATH_TO_TRAIN_DIR} \
--eval_dir=${PATH_TO_EVAL_DIR}
```
where `${PATH_TO_YOUR_PIPELINE_CONFIG}` points to the pipeline config,
`${PATH_TO_TRAIN_DIR}` points to the directory in which training checkpoints
were saved (same as the training job) and `${PATH_TO_EVAL_DIR}` points to the
directory in which evaluation events will be saved. As with the training job,
the eval job run until terminated by default.
## Running Tensorboard
Progress for training and eval jobs can be inspected using Tensorboard. If
using the recommended directory structure, Tensorboard can be run using the
following command:
```bash
tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY}
```
where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the
train and eval directories. Please note it make take Tensorboard a couple
minutes to populate with data.
# Quick Start: Jupyter notebook for off-the-shelf inference
If you'd like to hit the ground running and run detection on a few example
images right out of the box, we recommend trying out the Jupyter notebook demo.
To run the Jupyter notebook, run the following command from
`tensorflow/models/object_detection`:
```
# From tensorflow/models/object_detection
jupyter notebook
```
The notebook should open in your favorite web browser. Click the
[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link
to open the demo.
# Running on Google Cloud Platform
The Tensorflow Object Detection API supports distributed training on Google
Cloud ML Engine. This section documents instructions on how to train and
evaluate your model using Cloud ML. The reader should complete the following
prerequistes:
1. The reader has created and configured a project on Google Cloud Platform.
See [the Cloud ML quick start guide](https://cloud.google.com/ml-engine/docs/quickstarts/command-line).
2. The reader has installed the Tensorflow Object Detection API as documented
in the [installation instructions](installation.md).
3. The reader has a valid data set and stored it in a Google Cloud Storage
bucket. See [this page](preparing_inputs.md) for instructions on how to generate
a dataset for the PASCAL VOC challenge or the Oxford-IIIT Pet dataset.
4. The reader has configured a valid Object Detection pipeline, and stored it
in a Google Cloud Storage bucket. See [this page](configuring_jobs.md) for
details on how to write a pipeline configuration.
Additionally, it is recommended users test their job by running training and
evaluation jobs for a few iterations
[locally on their own machines](running_locally.md).
## Packaging
In order to run the Tensorflow Object Detection API on Cloud ML, it must be
packaged (along with it's TF-Slim dependency). The required packages can be
created with the following command
``` bash
# From tensorflow/models/
python setup.py sdist
(cd slim && python setup.py sdist)
```
This will create python packages in dist/object_detection-0.1.tar.gz and
slim/dist/slim-0.1.tar.gz.
## Running a Multiworker Training Job
Google Cloud ML requires a YAML configuration file for a multiworker training
job using GPUs. A sample YAML file is given below:
```
trainingInput:
runtimeVersion: "1.0"
scaleTier: CUSTOM
masterType: standard_gpu
workerCount: 9
workerType: standard_gpu
parameterServerCount: 3
parameterServerType: standard
```
Please keep the following guidelines in mind when writing the YAML
configuration:
* A job with n workers will have n + 1 training machines (n workers + 1 master).
* The number of parameters servers used should be an odd number to prevent
a parameter server from storing only weight variables or only bias variables
(due to round robin parameter scheduling).
* The learning rate in the training config should be decreased when using a
larger number of workers. Some experimentation is required to find the
optimal learning rate.
The YAML file should be saved on the local machine (not on GCP). Once it has
been written, a user can start a training job on Cloud ML Engine using the
following command:
``` bash
# From tensorflow/models/
gcloud ml-engine jobs submit training object_detection_`date +%s` \
--job-dir=gs://${TRAIN_DIR} \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
--module-name object_detection.train \
--region us-central1 \
--config ${PATH_TO_LOCAL_YAML_FILE} \
-- \
--train_dir=gs://${TRAIN_DIR} \
--pipeline_config_path=gs://${PIPELINE_CONFIG_PATH}
```
Where `${PATH_TO_LOCAL_YAML_FILE}` is the local path to the YAML configuration,
`gs://${TRAIN_DIR}` specifies the directory on Google Cloud Storage where the
training checkpoints and events will be written to and
`gs://${PIPELINE_CONFIG_PATH}` points to the pipeline configuration stored on
Google Cloud Storage.
Users can monitor the progress of their training job on the [ML Engine
Dashboard](https://pantheon.corp.google.com/mlengine/jobs).
## Running an Evaluation Job on Cloud
Evaluation jobs run on a single machine, so it is not necessary to write a YAML
configuration for evaluation. Run the following command to start the evaluation
job:
``` bash
gcloud ml-engine jobs submit training object_detection_eval_`date +%s` \
--job-dir=gs://${TRAIN_DIR} \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
--module-name object_detection.eval \
--region us-central1 \
--scale-tier BASIC_GPU \
-- \
--checkpoint_dir=gs://${TRAIN_DIR} \
--eval_dir=gs://${EVAL_DIR} \
--pipeline_config_path=gs://${PIPELINE_CONFIG_PATH}
```
Where `gs://${TRAIN_DIR}` points to the directory on Google Cloud Storage where
training checkpoints are saved (same as the training job), `gs://${EVAL_DIR}`
points to where evaluation events will be saved on Google Cloud Storage and
`gs://${PIPELINE_CONFIG_PATH}` points to where the pipeline configuration is
stored on Google Cloud Storage.
## Running Tensorboard
You can run Tensorboard locally on your own machine to view progress of your
training and eval jobs on Google Cloud ML. Run the following command to start
Tensorboard:
``` bash
tensorboard --logdir=gs://${YOUR_CLOUD_BUCKET}
```
Note it may Tensorboard a few minutes to populate with results.
# Quick Start: Distributed Training on the Oxford-IIIT Pets Dataset on Google Cloud
This page is a walkthrough for training an object detector using the Tensorflow
Object Detection API. In this tutorial, we'll be training on the Oxford-IIIT Pets
dataset to build a system to detect various breeds of cats and dogs. The output
of the detector will look like the following:
![](img/oxford_pet.png)
## Setting up a Project on Google Cloud
To accelerate the process, we'll run training and evaluation on [Google Cloud
ML Engine](https://cloud.google.com/ml-engine/) to leverage multiple GPUs. To
begin, you will have to set up Google Cloud via the following steps (if you have
already done this, feel free to skip to the next section):
1. [Create a GCP project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
2. [Install the Google Cloud SDK](https://cloud.google.com/sdk/downloads) on
your workstation or laptop.
This will provide the tools you need to upload files to Google Cloud Storage and
start ML training jobs.
3. [Enable the ML Engine
APIs](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component&_ga=1.73374291.1570145678.1496689256).
By default, a new GCP project does not enable APIs to start ML Engine training
jobs. Use the above link to explicitly enable them.
4. [Set up a Google Cloud Storage (GCS)
bucket](https://cloud.google.com/storage/docs/creating-buckets). ML Engine
training jobs can only access files on a Google Cloud Storage bucket. In this
tutorial, we'll be required to upload our dataset and configuration to GCS.
Please remember the name of your GCS bucket, as we will reference it multiple
times in this document. Substitute `${YOUR_GCS_BUCKET}` with the name of
your bucket in this document. For your convenience, you should define the
environment variable below:
``` bash
export YOUR_GCS_BUCKET=${YOUR_GCS_BUCKET}
```
## Installing Tensorflow and the Tensorflow Object Detection API
Please run through the [installation instructions](installation.md) to install
Tensorflow and all it dependencies. Ensure the Protobuf libraries are
compiled and the library directories are added to `PYTHONPATH`.
## Getting the Oxford-IIIT Pets Dataset and Uploading it to Google Cloud Storage
In order to train a detector, we require a dataset of images, bounding boxes and
classifications. For this demo, we'll use the Oxford-IIIT Pets dataset. The raw
dataset for Oxford-IIIT Pets lives
[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). You will need to download
both the image dataset [`images.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz)
and the groundtruth data [`annotations.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz)
to the `tensorflow/models` directory. This may take some time. After downloading
the tarballs, your `object_detection` directory should appear as follows:
```lang-none
+ object_detection/
+ data/
- images.tar.gz
- annotations.tar.gz
- create_pet_tf_record.py
... other files and directories
```
The Tensorflow Object Detection API expects data to be in the TFRecord format,
so we'll now run the `create_pet_tf_record` script to convert from the raw
Oxford-IIIT Pet dataset into TFRecords. Run the following commands from the
`object_detection` directory:
``` bash
# From tensorflow/models/
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xvf annotations.tar.gz
tar -xvf images.tar.gz
python object_detection/create_pet_tf_record.py \
--label_map_path=object_detection/data/pet_label_map.pbtxt \
--data_dir=`pwd` \
--output_dir=`pwd`
```
Note: It is normal to see some warnings when running this script. You may ignore
them.
Two TFRecord files named `pet_train.record` and `pet_val.record` should be generated
in the `object_detection` directory.
Now that the data has been generated, we'll need to upload it to Google Cloud
Storage so the data can be accessed by ML Engine. Run the following command to
copy the files into your GCS bucket (substituting `${YOUR_GCS_BUCKET}`):
``` bash
# From tensorflow/models/
gsutil cp pet_train.record gs://${YOUR_GCS_BUCKET}/data/pet_train.record
gsutil cp pet_val.record gs://${YOUR_GCS_BUCKET}/data/pet_val.record
gsutil cp object_detection/data/pet_label_map.pbtxt gs://${YOUR_GCS_BUCKET}/data/pet_label_map.pbtxt
```
Please remember the path where you upload the data to, as we will need this
information when configuring the pipeline in a following step.
## Downloading a COCO-pretrained Model for Transfer Learning
Training a state of the art object detector from scratch can take days, even
when using multiple GPUs! In order to speed up training, we'll take an object
detector trained on a different dataset (COCO), and reuse some of it's
parameters to initialize our new model.
Download our [COCO-pretrained Faster R-CNN with Resnet-101
model](http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz).
Unzip the contents of the folder and copy the `model.ckpt*` files into your GCS
Bucket.
``` bash
wget http://storage.googleapis.com/download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_11_06_2017.tar.gz
tar -xvf faster_rcnn_resnet101_coco_11_06_2017.tar.gz
gsutil cp faster_rcnn_resnet101_coco_11_06_2017/model.ckpt.* gs://${YOUR_GCS_BUCKET}/data/
```
Remember the path where you uploaded the model checkpoint to, as we will need it
in the following step.
## Configuring the Object Detection Pipeline
In the Tensorflow Object Detection API, the model parameters, training
parameters and eval parameters are all defined by a config file. More details
can be found [here](configuring_jobs.md). For this tutorial, we will use some
predefined templates provided with the source code. In the
`object_detection/samples/configs` folder, there are skeleton object_detection
configuration files. We will use `faster_rcnn_resnet101_pets.config` as a
starting point for configuring the pipeline. Open the file with your favourite
text editor.
We'll need to configure some paths in order for the template to work. Search the
file for instances of `PATH_TO_BE_CONFIGURED` and replace them with the
appropriate value (typically `gs://${YOUR_GCS_BUCKET}/data/`). Afterwards
upload your edited file onto GCS, making note of the path it was uploaded to
(we'll need it when starting the training/eval jobs).
``` bash
# From tensorflow/models/
# Edit the faster_rcnn_resnet101_pets.config template. Please note that there
# are multiple places where PATH_TO_BE_CONFIGURED needs to be set.
sed -i "s|PATH_TO_BE_CONFIGURED|"gs://${YOUR_GCS_BUCKET}"/data|g" \
object_detection/samples/configs/faster_rcnn_resnet101_pets.config
# Copy edited template to cloud.
gsutil cp object_detection/samples/configs/faster_rcnn_resnet101_pets.config \
gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
```
## Checking Your Google Cloud Storage Bucket
At this point in the tutorial, you should have uploaded the training/validation
datasets (including label map), our COCO trained FasterRCNN finetune checkpoint and your job
configuration to your Google Cloud Storage Bucket. Your bucket should look like
the following:
```lang-none
+ ${YOUR_GCS_BUCKET}/
+ data/
- faster_rcnn_resnet101_pets.config
- model.ckpt.index
- model.ckpt.meta
- model.ckpt.data-00000-of-00001
- pet_label_map.pbtxt
- pet_train.record
- pet_val.record
```
You can inspect your bucket using the [Google Cloud Storage
browser](https://console.cloud.google.com/storage/browser).
## Starting Training and Evaluation Jobs on Google Cloud ML Engine
Before we can start a job on Google Cloud ML Engine, we must:
1. Package the Tensorflow Object Detection code.
2. Write a cluster configuration for our Google Cloud ML job.
To package the Tensorflow Object Detection code, run the following commands from
the `tensorflow/models/` directory:
``` bash
# From tensorflow/models/
python setup.py sdist
(cd slim && python setup.py sdist)
```
You should see two tar.gz files created at `dist/object_detection-0.1.tar.gz`
and `slim/dist/slim-0.1.tar.gz`.
For running the training Cloud ML job, we'll configure the cluster to use 10
training jobs (1 master + 9 workers) and three parameters servers. The
configuration file can be found at `object_detection/samples/cloud/cloud.yml`.
To start training, execute the following command from the `tensorflow/models/`
directory:
``` bash
# From tensorflow/models/
gcloud ml-engine jobs submit training `whoami`_object_detection_`date +%s` \
--job-dir=gs://${YOUR_GCS_BUCKET}/train \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
--module-name object_detection.train \
--region us-central1 \
--config object_detection/samples/cloud/cloud.yml \
-- \
--train_dir=gs://${YOUR_GCS_BUCKET}/train \
--pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
```
Once training has started, we can run an evaluation concurrently:
``` bash
# From tensorflow/models/
gcloud ml-engine jobs submit training `whoami`_object_detection_eval_`date +%s` \
--job-dir=gs://${YOUR_GCS_BUCKET}/train \
--packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz \
--module-name object_detection.eval \
--region us-central1 \
--scale-tier BASIC_GPU \
-- \
--checkpoint_dir=gs://${YOUR_GCS_BUCKET}/train \
--eval_dir=gs://${YOUR_GCS_BUCKET}/eval \
--pipeline_config_path=gs://${YOUR_GCS_BUCKET}/data/faster_rcnn_resnet101_pets.config
```
Note: Even though we're running an evaluation job, the `gcloud ml-engine jobs
submit training` command is correct. ML Engine does not distinguish between
training and evaluation jobs.
Users can monitor and stop training and evaluation jobs on the [ML Engine
Dashboard](https://console.cloud.google.com/mlengine/jobs).
## Monitoring Progress with Tensorboard
You can monitor progress of the training and eval jobs by running Tensorboard on
your local machine:
``` bash
# This command needs to be run once to allow your local machine to access your
# GCS bucket.
gcloud auth application-default login
tensorboard --logdir=gs://${YOUR_GCS_BUCKET}
```
Once Tensorboard is running, navigate to `localhost:6006` from your favourite
web browser. You should something similar see the following:
![](img/tensorboard.png)
You will also want to click on the images tab to see example detections made by
the model while it trains. After about an hour and a half of training, you can
expect to see something like this:
![](img/tensorboard2.png)
Note: It takes roughly 10 minutes for a job to get started on ML Engine, and
roughly an hour for the system to evaluate the validation dataset. It may take
some time to populate the dashboards. If you do not see any entries after half
an hour, check the logs from the [ML Engine
Dashboard](https://console.cloud.google.com/mlengine/jobs).
## Exporting the Tensorflow Graph
After your model has been trained, you should export it to a Tensorflow
graph proto. First, you need to identify a candidate checkpoint to export. You
can search your bucket using the [Google Cloud Storage
Browser](https://console.cloud.google.com/storage/browser). The file should be
stored under `${YOUR_GCS_BUCKET}/train`. The checkpoint will typically consist of
three files:
* `model.ckpt-${CHECKPOINT_NUMBER}.data-00000-of-00001`
* `model.ckpt-${CHECKPOINT_NUMBER}.index`
* `model.ckpt-${CHECKPOINT_NUMBER}.meta`
After you've identified a candidate checkpoint to export, run the following
command from `tensorflow/models/object_detection`:
``` bash
# From tensorflow/models
gsutil cp gs://${YOUR_GCS_BUCKET}/train/model.ckpt-${CHECKPOINT_NUMBER}.* .
python object_detection/export_inference_graph.py \
--input_type image_tensor \
--pipeline_config_path object_detection/samples/configs/faster_rcnn_resnet101_pets.config \
--checkpoint_path model.ckpt-${CHECKPOINT_NUMBER} \
--inference_graph_path output_inference_graph.pb
```
Afterwards, you should see a graph named `output_inference_graph.pb`.
## What's Next
Congratulations, you have now trained an object detector for various cats and
dogs! There different things you can do now:
1. [Test your exported model using the provided Jupyter notebook.](running_notebook.md)
2. [Experiment with different model configurations.](configuring_jobs.md)
3. Train an object detector using your own data.
# Tensorflow Object Detection API: Matcher implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "argmax_matcher",
srcs = [
"argmax_matcher.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:matcher",
],
)
py_test(
name = "argmax_matcher_test",
srcs = ["argmax_matcher_test.py"],
deps = [
":argmax_matcher",
"//tensorflow",
],
)
py_library(
name = "bipartite_matcher",
srcs = [
"bipartite_matcher.py",
],
deps = [
"//tensorflow",
"//tensorflow/contrib/image:image_py",
"//tensorflow_models/object_detection/core:matcher",
],
)
py_test(
name = "bipartite_matcher_test",
srcs = [
"bipartite_matcher_test.py",
],
deps = [
":bipartite_matcher",
"//tensorflow",
],
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment