Commit 748eceae authored by Marianne Linhares Monteiro's avatar Marianne Linhares Monteiro Committed by GitHub
Browse files

Merge branch 'master' into cifar10_experiment

parents 40e906d2 ed65b632
......@@ -1255,6 +1255,82 @@ def random_resize_method(image, target_size):
return resized_image
def _compute_new_static_size(image,
min_dimension,
max_dimension):
"""Compute new static shape for resize_to_range method."""
image_shape = image.get_shape().as_list()
orig_height = image_shape[0]
orig_width = image_shape[1]
orig_min_dim = min(orig_height, orig_width)
# Calculates the larger of the possible sizes
large_scale_factor = min_dimension / float(orig_min_dim)
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = int(round(orig_height * large_scale_factor))
large_width = int(round(orig_width * large_scale_factor))
large_size = [large_height, large_width]
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = max(orig_height, orig_width)
small_scale_factor = max_dimension / float(orig_max_dim)
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = int(round(orig_height * small_scale_factor))
small_width = int(round(orig_width * small_scale_factor))
small_size = [small_height, small_width]
new_size = large_size
if max(large_size) > max_dimension:
new_size = small_size
else:
new_size = large_size
return tf.constant(new_size)
def _compute_new_dynamic_size(image,
min_dimension,
max_dimension):
"""Compute new dynamic shape for resize_to_range method."""
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
large_scale_factor = min_dimension / orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
large_size = tf.stack([large_height, large_width])
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = tf.maximum(orig_height, orig_width)
max_dimension = tf.constant(max_dimension, dtype=tf.float32)
small_scale_factor = max_dimension / orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
small_size = tf.stack([small_height, small_width])
new_size = tf.cond(
tf.to_float(tf.reduce_max(large_size)) > max_dimension,
lambda: small_size, lambda: large_size)
else:
new_size = large_size
return new_size
def resize_to_range(image,
masks=None,
min_dimension=None,
......@@ -1295,64 +1371,22 @@ def resize_to_range(image,
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
large_scale_factor = min_dimension / orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
large_size = tf.stack([large_height, large_width])
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = tf.maximum(orig_height, orig_width)
max_dimension = tf.constant(max_dimension, dtype=tf.float32)
small_scale_factor = max_dimension / orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
small_size = tf.stack([small_height, small_width])
new_size = tf.cond(
tf.to_float(tf.reduce_max(large_size)) > max_dimension,
lambda: small_size, lambda: large_size)
if image.get_shape().is_fully_defined():
new_size = _compute_new_static_size(image, min_dimension,
max_dimension)
else:
new_size = large_size
new_size = _compute_new_dynamic_size(image, min_dimension,
max_dimension)
new_image = tf.image.resize_images(image, new_size,
align_corners=align_corners)
result = new_image
if masks is not None:
num_instances = tf.shape(masks)[0]
def resize_masks_branch():
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.squeeze(new_masks, axis=3)
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0,
resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(new_masks, new_size,
align_corners=align_corners)
new_masks = tf.squeeze(new_masks, 3)
result = [new_image, new_masks]
return result
......
......@@ -1395,7 +1395,7 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(expected_images_shape_,
resized_images_shape_)
def testResizeToRange(self):
def testResizeToRangePreservesStaticSpatialShape(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
......@@ -1406,13 +1406,27 @@ class PreprocessorTest(tf.test.TestCase):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
def testResizeToRangeWithDynamicSpatialShape(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
max_dim = 100
expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(out_image_shape)
out_image_shape = sess.run(out_image_shape,
feed_dict={in_image:
np.random.randn(*in_shape)})
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithMasks(self):
def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
......@@ -1430,30 +1444,25 @@ class PreprocessorTest(tf.test.TestCase):
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
def testResizeToRangeWithNoInstanceMask(self):
def testResizeToRangeWithMasksAndDynamicSpatialShape(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
min_dim = 50
max_dim = 100
expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
......@@ -1462,38 +1471,15 @@ class PreprocessorTest(tf.test.TestCase):
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithMasks(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
height = 50
width = 100
expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
[out_image_shape, out_masks_shape],
feed_dict={
in_image: np.random.randn(*in_image_shape),
in_masks: np.random.randn(*in_masks_shape)
})
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithNoInstanceMask(self):
def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
......
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'aeroplane'
......
item {
id: 0
name: 'none_of_the_above'
}
item {
id: 1
name: 'Abyssinian'
......
......@@ -16,16 +16,19 @@
r"""Tool to export an object detection model for inference.
Prepares an object detection tensorflow graph for inference using model
configuration and an optional trained checkpoint. Outputs either an inference
graph or a SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
configuration and an optional trained checkpoint. Outputs inference
graph, associated checkpoint files, a frozen inference graph and a
SavedModel (https://tensorflow.github.io/serving/serving_basic.html).
The inference graph contains one of three input nodes depending on the user
specified option.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [1, None, None, 3]
* `encoded_image_string_tensor`: Accepts a scalar string tensor of encoded PNG
or JPEG image.
* `tf_example`: Accepts a serialized TFExample proto. The batch size in this
case is always 1.
* `image_tensor`: Accepts a uint8 4-D tensor of shape [None, None, None, 3]
* `encoded_image_string_tensor`: Accepts a 1-D string tensor of shape [None]
containing encoded PNG or JPEG images. Image resolutions are expected to be
the same if more than 1 image is provided.
* `tf_example`: Accepts a 1-D string tensor of shape [None] containing
serialized TFExample protos. Image resolutions are expected to be the same
if more than 1 image is provided.
and the following output nodes returned by the model.postprocess(..):
* `num_detections`: Outputs float32 tensors of the form [batch]
......@@ -41,23 +44,27 @@ and the following output nodes returned by the model.postprocess(..):
masks for each box if its present in the dictionary of postprocessed
tensors returned by the model.
Note that currently `batch` is always 1, but we will support `batch` > 1 in
the future.
Optionally, one can freeze the graph by converting the weights in the provided
checkpoint as graph constants thereby eliminating the need to use a checkpoint
file during inference.
Note that this tool uses `use_moving_averages` from eval_config to decide
which weights to freeze.
Notes:
* This tool uses `use_moving_averages` from eval_config to decide which
weights to freeze.
Example Usage:
--------------
python export_inference_graph \
--input_type image_tensor \
--pipeline_config_path path/to/ssd_inception_v2.config \
--checkpoint_path path/to/model-ckpt \
--inference_graph_path path/to/inference_graph.pb
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
- graph.pbtxt
- model.ckpt.data-00000-of-00001
- model.ckpt.info
- model.ckpt.meta
- frozen_inference_graph.pb
+ saved_model (a directory)
"""
import tensorflow as tf
from google.protobuf import text_format
......@@ -70,31 +77,29 @@ flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
'one of [`image_tensor`, `encoded_image_string_tensor`, '
'`tf_example`]')
flags.DEFINE_string('pipeline_config_path', '',
flags.DEFINE_string('pipeline_config_path', None,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('checkpoint_path', '', 'Optional path to checkpoint file. '
'If provided, bakes the weights from the checkpoint into '
'the graph.')
flags.DEFINE_string('inference_graph_path', '', 'Path to write the output '
'inference graph.')
flags.DEFINE_bool('export_as_saved_model', False, 'Whether the exported graph '
'should be saved as a SavedModel')
flags.DEFINE_string('trained_checkpoint_prefix', None,
'Path to trained checkpoint, typically of the form '
'path/to/model.ckpt')
flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
FLAGS = flags.FLAGS
def main(_):
assert FLAGS.pipeline_config_path, 'TrainEvalPipelineConfig missing.'
assert FLAGS.inference_graph_path, 'Inference graph path missing.'
assert FLAGS.input_type, 'Input type missing.'
assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
assert FLAGS.trained_checkpoint_prefix, (
'`trained_checkpoint_prefix` is missing')
assert FLAGS.output_directory, '`output_directory` is missing'
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
exporter.export_inference_graph(FLAGS.input_type, pipeline_config,
FLAGS.checkpoint_path,
FLAGS.inference_graph_path,
FLAGS.export_as_saved_model)
exporter.export_inference_graph(
FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_prefix,
FLAGS.output_directory)
if __name__ == '__main__':
......
......@@ -17,6 +17,7 @@
import logging
import os
import tensorflow as tf
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python import pywrap_tensorflow
from tensorflow.python.client import session
from tensorflow.python.framework import graph_util
......@@ -42,6 +43,7 @@ def freeze_graph_with_def_protos(
filename_tensor_name,
clear_devices,
initializer_nodes,
optimize_graph=False,
variable_names_blacklist=''):
"""Converts all variables in a graph and checkpoint into constants."""
del restore_op_name, filename_tensor_name # Unused by updated loading code.
......@@ -61,86 +63,106 @@ def freeze_graph_with_def_protos(
for node in input_graph_def.node:
node.device = ''
_ = importer.import_graph_def(input_graph_def, name='')
with session.Session() as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
with tf.Graph().as_default():
tf.import_graph_def(input_graph_def, name='')
if optimize_graph:
logging.info('Graph Rewriter optimizations enabled')
rewrite_options = rewriter_config_pb2.RewriterConfig(
optimize_tensor_layout=True)
rewrite_options.optimizers.append('pruning')
rewrite_options.optimizers.append('constfold')
rewrite_options.optimizers.append('layout')
graph_options = tf.GraphOptions(
rewrite_options=rewrite_options, infer_shapes=True)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
logging.info('Graph Rewriter optimizations disabled')
graph_options = tf.GraphOptions()
config = tf.ConfigProto(graph_options=graph_options)
with session.Session(config=config) as sess:
if input_saver_def:
saver = saver_lib.Saver(saver_def=input_saver_def)
saver.restore(sess, input_checkpoint)
else:
var_list = {}
reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
var_to_shape_map = reader.get_variable_to_shape_map()
for key in var_to_shape_map:
try:
tensor = sess.graph.get_tensor_by_name(key + ':0')
except KeyError:
# This tensor doesn't exist in the graph (for example it's
# 'global_step' or a similar housekeeping element) so skip it.
continue
var_list[key] = tensor
saver = saver_lib.Saver(var_list=var_list)
saver.restore(sess, input_checkpoint)
if initializer_nodes:
sess.run(initializer_nodes)
variable_names_blacklist = (variable_names_blacklist.split(',') if
variable_names_blacklist else None)
output_graph_def = graph_util.convert_variables_to_constants(
sess,
input_graph_def,
output_node_names.split(','),
variable_names_blacklist=variable_names_blacklist)
return output_graph_def
def get_frozen_graph_def(inference_graph_def, use_moving_averages,
input_checkpoint, output_node_names):
"""Freezes all variables in a graph definition."""
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
saver = tf.train.Saver()
frozen_graph_def = freeze_graph_with_def_protos(
input_graph_def=inference_graph_def,
input_saver_def=saver.as_saver_def(),
input_checkpoint=input_checkpoint,
output_node_names=output_node_names,
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
initializer_nodes='')
return frozen_graph_def
def _image_tensor_input_placeholder():
"""Returns placeholder and input node that accepts a batch of uint8 images."""
input_tensor = tf.placeholder(dtype=tf.uint8,
shape=(None, None, None, 3),
name='image_tensor')
return input_tensor, input_tensor
# TODO: Support batch tf example inputs.
def _tf_example_input_placeholder():
tf_example_placeholder = tf.placeholder(
tf.string, shape=[], name='tf_example')
tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
tf_example_placeholder)
image = tensor_dict[fields.InputDataFields.image]
return tf.expand_dims(image, axis=0)
"""Returns input that accepts a batch of strings with tf examples.
def _image_tensor_input_placeholder():
return tf.placeholder(dtype=tf.uint8,
shape=(1, None, None, 3),
name='image_tensor')
Returns:
a tuple of placeholder and input nodes that output decoded images.
"""
batch_tf_example_placeholder = tf.placeholder(
tf.string, shape=[None], name='tf_example')
def decode(tf_example_string_tensor):
tensor_dict = tf_example_decoder.TfExampleDecoder().decode(
tf_example_string_tensor)
image_tensor = tensor_dict[fields.InputDataFields.image]
return image_tensor
return (batch_tf_example_placeholder,
tf.map_fn(decode,
elems=batch_tf_example_placeholder,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False))
def _encoded_image_string_tensor_input_placeholder():
image_str = tf.placeholder(dtype=tf.string,
shape=[],
name='encoded_image_string_tensor')
image_tensor = tf.image.decode_image(image_str, channels=3)
image_tensor.set_shape((None, None, 3))
return tf.expand_dims(image_tensor, axis=0)
"""Returns input that accepts a batch of PNG or JPEG strings.
Returns:
a tuple of placeholder and input nodes that output decoded images.
"""
batch_image_str_placeholder = tf.placeholder(
dtype=tf.string,
shape=[None],
name='encoded_image_string_tensor')
def decode(encoded_image_string_tensor):
image_tensor = tf.image.decode_image(encoded_image_string_tensor,
channels=3)
image_tensor.set_shape((None, None, 3))
return image_tensor
return (batch_image_str_placeholder,
tf.map_fn(
decode,
elems=batch_image_str_placeholder,
dtype=tf.uint8,
parallel_iterations=32,
back_prop=False))
input_placeholder_fn_map = {
......@@ -151,7 +173,8 @@ input_placeholder_fn_map = {
}
def _add_output_tensor_nodes(postprocessed_tensors):
def _add_output_tensor_nodes(postprocessed_tensors,
output_collection_name='inference_op'):
"""Adds output nodes for detection boxes and scores.
Adds the following nodes for output tensors -
......@@ -174,6 +197,7 @@ def _add_output_tensor_nodes(postprocessed_tensors):
'detection_masks': [batch, max_detections, mask_height, mask_width]
(optional).
'num_detections': [batch]
output_collection_name: Name of collection to add output tensors to.
Returns:
A tensor dict containing the added output tensor nodes.
......@@ -191,53 +215,29 @@ def _add_output_tensor_nodes(postprocessed_tensors):
outputs['num_detections'] = tf.identity(num_detections, name='num_detections')
if masks is not None:
outputs['detection_masks'] = tf.identity(masks, name='detection_masks')
for output_key in outputs:
tf.add_to_collection(output_collection_name, outputs[output_key])
if masks is not None:
tf.add_to_collection(output_collection_name, outputs['detection_masks'])
return outputs
def _write_inference_graph(inference_graph_path,
checkpoint_path=None,
use_moving_averages=False,
output_node_names=(
'num_detections,detection_scores,'
'detection_boxes,detection_classes')):
"""Writes inference graph to disk with the option to bake in weights.
If checkpoint_path is not None bakes the weights into the graph thereby
eliminating the need of checkpoint files during inference. If the model
was trained with moving averages, setting use_moving_averages to true
restores the moving averages, otherwise the original set of variables
is restored.
def _write_frozen_graph(frozen_graph_path, frozen_graph_def):
"""Writes frozen graph to disk.
Args:
inference_graph_path: Path to write inference graph.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
output_node_names: Output tensor names, defaults are: num_detections,
detection_scores, detection_boxes, detection_classes.
frozen_graph_path: Path to write inference graph.
frozen_graph_def: tf.GraphDef holding frozen graph.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
if checkpoint_path:
output_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names,
)
with gfile.GFile(inference_graph_path, 'wb') as f:
f.write(output_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(output_graph_def.node))
return
tf.train.write_graph(inference_graph_def,
os.path.dirname(inference_graph_path),
os.path.basename(inference_graph_path),
as_text=False)
def _write_saved_model(inference_graph_path, inputs, outputs,
checkpoint_path=None, use_moving_averages=False):
with gfile.GFile(frozen_graph_path, 'wb') as f:
f.write(frozen_graph_def.SerializeToString())
logging.info('%d ops in the final graph.', len(frozen_graph_def.node))
def _write_saved_model(saved_model_path,
frozen_graph_def,
inputs,
outputs):
"""Writes SavedModel to disk.
If checkpoint_path is not None bakes the weights into the graph thereby
......@@ -247,30 +247,17 @@ def _write_saved_model(inference_graph_path, inputs, outputs,
is restored.
Args:
inference_graph_path: Path to write inference graph.
saved_model_path: Path to write SavedModel.
frozen_graph_def: tf.GraphDef holding frozen graph.
inputs: The input image tensor to use for detection.
outputs: A tensor dictionary containing the outputs of a DetectionModel.
checkpoint_path: Optional path to the checkpoint file.
use_moving_averages: Whether to export the original or the moving averages
of the trainable variables from the checkpoint.
"""
inference_graph_def = tf.get_default_graph().as_graph_def()
checkpoint_graph_def = None
if checkpoint_path:
output_node_names = ','.join(outputs.keys())
checkpoint_graph_def = get_frozen_graph_def(
inference_graph_def=inference_graph_def,
use_moving_averages=use_moving_averages,
input_checkpoint=checkpoint_path,
output_node_names=output_node_names
)
with tf.Graph().as_default():
with session.Session() as sess:
tf.import_graph_def(checkpoint_graph_def)
tf.import_graph_def(frozen_graph_def, name='')
builder = tf.saved_model.builder.SavedModelBuilder(inference_graph_path)
builder = tf.saved_model.builder.SavedModelBuilder(saved_model_path)
tensor_info_inputs = {
'inputs': tf.saved_model.utils.build_tensor_info(inputs)}
......@@ -294,46 +281,96 @@ def _write_saved_model(inference_graph_path, inputs, outputs,
builder.save()
def _write_graph_and_checkpoint(inference_graph_def,
model_path,
input_saver_def,
trained_checkpoint_prefix):
for node in inference_graph_def.node:
node.device = ''
with tf.Graph().as_default():
tf.import_graph_def(inference_graph_def, name='')
with session.Session() as sess:
saver = saver_lib.Saver(saver_def=input_saver_def,
save_relative_paths=True)
saver.restore(sess, trained_checkpoint_prefix)
saver.save(sess, model_path)
def _export_inference_graph(input_type,
detection_model,
use_moving_averages,
checkpoint_path,
inference_graph_path,
export_as_saved_model=False):
trained_checkpoint_prefix,
output_directory,
optimize_graph=False,
output_collection_name='inference_op'):
"""Export helper."""
tf.gfile.MakeDirs(output_directory)
frozen_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
saved_model_path = os.path.join(output_directory, 'saved_model')
model_path = os.path.join(output_directory, 'model.ckpt')
if input_type not in input_placeholder_fn_map:
raise ValueError('Unknown input type: {}'.format(input_type))
inputs = tf.to_float(input_placeholder_fn_map[input_type]())
placeholder_tensor, input_tensors = input_placeholder_fn_map[input_type]()
inputs = tf.to_float(input_tensors)
preprocessed_inputs = detection_model.preprocess(inputs)
output_tensors = detection_model.predict(preprocessed_inputs)
postprocessed_tensors = detection_model.postprocess(output_tensors)
outputs = _add_output_tensor_nodes(postprocessed_tensors)
out_node_names = list(outputs.keys())
if export_as_saved_model:
_write_saved_model(inference_graph_path, inputs, outputs, checkpoint_path,
use_moving_averages)
outputs = _add_output_tensor_nodes(postprocessed_tensors,
output_collection_name)
saver = None
if use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
else:
_write_inference_graph(inference_graph_path, checkpoint_path,
use_moving_averages,
output_node_names=','.join(out_node_names))
saver = tf.train.Saver()
input_saver_def = saver.as_saver_def()
_write_graph_and_checkpoint(
inference_graph_def=tf.get_default_graph().as_graph_def(),
model_path=model_path,
input_saver_def=input_saver_def,
trained_checkpoint_prefix=trained_checkpoint_prefix)
frozen_graph_def = freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=trained_checkpoint_prefix,
output_node_names=','.join(outputs.keys()),
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
optimize_graph=optimize_graph,
initializer_nodes='')
_write_frozen_graph(frozen_graph_path, frozen_graph_def)
_write_saved_model(saved_model_path, frozen_graph_def, placeholder_tensor,
outputs)
def export_inference_graph(input_type, pipeline_config, checkpoint_path,
inference_graph_path, export_as_saved_model=False):
def export_inference_graph(input_type,
pipeline_config,
trained_checkpoint_prefix,
output_directory,
optimize_graph=False,
output_collection_name='inference_op'):
"""Exports inference graph for the model specified in the pipeline config.
Args:
input_type: Type of input for the graph. Can be one of [`image_tensor`,
`tf_example`].
pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto.
checkpoint_path: Path to the checkpoint file to freeze.
inference_graph_path: Path to write inference graph to.
export_as_saved_model: If the model should be exported as a SavedModel. If
false, it is saved as an inference graph.
trained_checkpoint_prefix: Path to the trained checkpoint file.
output_directory: Path to write outputs.
optimize_graph: Whether to optimize graph using Grappler.
output_collection_name: Name of collection to add output tensors to.
If None, does not add output tensors to a collection.
"""
detection_model = model_builder.build(pipeline_config.model,
is_training=False)
_export_inference_graph(input_type, detection_model,
pipeline_config.eval_config.use_moving_averages,
checkpoint_path, inference_graph_path,
export_as_saved_model)
trained_checkpoint_prefix, output_directory,
optimize_graph, output_collection_name)
......@@ -43,18 +43,22 @@ class FakeModel(model.DetectionModel):
def postprocess(self, prediction_dict):
with tf.control_dependencies(prediction_dict.values()):
postprocessed_tensors = {
'detection_boxes': tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]], tf.float32),
'detection_scores': tf.constant([[0.7, 0.6]], tf.float32),
'detection_classes': tf.constant([[0, 1]], tf.float32),
'num_detections': tf.constant([2], tf.float32)
'detection_boxes': tf.constant([[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]], tf.float32),
'detection_scores': tf.constant([[0.7, 0.6],
[0.9, 0.0]], tf.float32),
'detection_classes': tf.constant([[0, 1],
[1, 0]], tf.float32),
'num_detections': tf.constant([2, 1], tf.float32)
}
if self._add_detection_masks:
postprocessed_tensors['detection_masks'] = tf.constant(
np.arange(32).reshape([2, 4, 4]), tf.float32)
np.arange(64).reshape([2, 2, 4, 4]), tf.float32)
return postprocessed_tensors
def restore_fn(self, checkpoint_path, from_detection_checkpoint):
def restore_map(self, checkpoint_path, from_detection_checkpoint):
pass
def loss(self, prediction_dict):
......@@ -69,7 +73,7 @@ class ExportInferenceGraphTest(tf.test.TestCase):
with g.as_default():
mock_model = FakeModel()
preprocessed_inputs = mock_model.preprocess(
tf.ones([1, 3, 4, 3], tf.float32))
tf.placeholder(tf.float32, shape=[None, None, None, 3]))
predictions = mock_model.predict(preprocessed_inputs)
mock_model.postprocess(predictions)
if use_moving_averages:
......@@ -103,71 +107,62 @@ class ExportInferenceGraphTest(tf.test.TestCase):
return example
def test_export_graph_with_image_tensor_input(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=False)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
output_directory = os.path.join(tmp_dir, 'output')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
def test_export_graph_with_tf_example_input(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=False)
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
output_directory = os.path.join(tmp_dir, 'output')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
def test_export_graph_with_encoded_image_string_input(self):
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pbtxt')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=None,
inference_graph_path=inference_graph_path)
def test_export_frozen_graph(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
output_directory = os.path.join(tmp_dir, 'output')
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='image_tensor',
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
def test_export_frozen_graph_with_moving_averages(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
def test_export_graph_with_moving_averages(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
output_directory = os.path.join(tmp_dir, 'output')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel()
......@@ -176,15 +171,17 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
def test_export_model_with_all_output_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
......@@ -192,8 +189,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
......@@ -204,11 +201,13 @@ class ExportInferenceGraphTest(tf.test.TestCase):
inference_graph.get_tensor_by_name('num_detections:0')
def test_export_model_with_detection_only_nodes(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=False)
......@@ -216,8 +215,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph):
inference_graph.get_tensor_by_name('image_tensor:0')
......@@ -229,11 +228,13 @@ class ExportInferenceGraphTest(tf.test.TestCase):
inference_graph.get_tensor_by_name('detection_masks:0')
def test_export_and_run_inference_with_image_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
......@@ -242,8 +243,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='image_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
with self.test_session(graph=inference_graph) as sess:
......@@ -253,15 +254,19 @@ class ExportInferenceGraphTest(tf.test.TestCase):
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
(boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_tensor: np.ones((1, 4, 4, 3)).astype(np.uint8)})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
feed_dict={image_tensor: np.ones((2, 4, 4, 3)).astype(np.uint8)})
self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]])
self.assertAllClose(scores_np, [[0.7, 0.6],
[0.9, 0.0]])
self.assertAllClose(classes_np, [[1, 2],
[2, 1]])
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def _create_encoded_image_string(self, image_array_np, encoding_format):
od_graph = tf.Graph()
......@@ -276,11 +281,13 @@ class ExportInferenceGraphTest(tf.test.TestCase):
return encoded_string.eval()
def test_export_and_run_inference_with_encoded_image_string_tensor(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
......@@ -289,8 +296,8 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
jpg_image_str = self._create_encoded_image_string(
......@@ -306,23 +313,69 @@ class ExportInferenceGraphTest(tf.test.TestCase):
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
for image_str in [jpg_image_str, png_image_str]:
image_str_batch_np = np.hstack([image_str]* 2)
(boxes_np, scores_np, classes_np, masks_np,
num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={image_str_tensor: image_str})
self.assertAllClose(boxes_np, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores_np, [[0.7, 0.6]])
self.assertAllClose(classes_np, [[1, 2]])
self.assertAllClose(masks_np, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections_np, [2])
feed_dict={image_str_tensor: image_str_batch_np})
self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]])
self.assertAllClose(scores_np, [[0.7, 0.6],
[0.9, 0.0]])
self.assertAllClose(classes_np, [[1, 2],
[2, 1]])
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def test_raise_runtime_error_on_images_with_different_sizes(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='encoded_image_string_tensor',
pipeline_config=pipeline_config,
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
large_image = self._create_encoded_image_string(
np.ones((4, 4, 3)).astype(np.uint8), 'jpg')
small_image = self._create_encoded_image_string(
np.ones((2, 2, 3)).astype(np.uint8), 'jpg')
image_str_batch_np = np.hstack([large_image, small_image])
with self.test_session(graph=inference_graph) as sess:
image_str_tensor = inference_graph.get_tensor_by_name(
'encoded_image_string_tensor:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
scores = inference_graph.get_tensor_by_name('detection_scores:0')
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
with self.assertRaisesRegexp(tf.errors.InvalidArgumentError,
'^TensorArray has inconsistent shapes.'):
sess.run([boxes, scores, classes, masks, num_detections],
feed_dict={image_str_tensor: image_str_batch_np})
def test_export_and_run_inference_with_tf_example(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'exported_graph.pb')
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=True)
output_directory = os.path.join(tmp_dir, 'output')
inference_graph_path = os.path.join(output_directory,
'frozen_inference_graph.pb')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
......@@ -331,10 +384,12 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
inference_graph = self._load_inference_graph(inference_graph_path)
tf_example_np = np.expand_dims(self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8)), axis=0)
with self.test_session(graph=inference_graph) as sess:
tf_example = inference_graph.get_tensor_by_name('tf_example:0')
boxes = inference_graph.get_tensor_by_name('detection_boxes:0')
......@@ -342,23 +397,27 @@ class ExportInferenceGraphTest(tf.test.TestCase):
classes = inference_graph.get_tensor_by_name('detection_classes:0')
masks = inference_graph.get_tensor_by_name('detection_masks:0')
num_detections = inference_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
(boxes_np, scores_np, classes_np, masks_np, num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
feed_dict={tf_example: tf_example_np})
self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]])
self.assertAllClose(scores_np, [[0.7, 0.6],
[0.9, 0.0]])
self.assertAllClose(classes_np, [[1, 2],
[2, 1]])
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def test_export_saved_model_and_run_inference(self):
checkpoint_path = os.path.join(self.get_temp_dir(), 'model-ckpt')
self._save_checkpoint_from_mock_model(checkpoint_path,
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=False)
inference_graph_path = os.path.join(self.get_temp_dir(),
'saved_model')
output_directory = os.path.join(tmp_dir, 'output')
saved_model_path = os.path.join(output_directory, 'saved_model')
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
......@@ -368,30 +427,84 @@ class ExportInferenceGraphTest(tf.test.TestCase):
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
checkpoint_path=checkpoint_path,
inference_graph_path=inference_graph_path,
export_as_saved_model=True)
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
tf_example_np = np.hstack([self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))] * 2)
with tf.Graph().as_default() as od_graph:
with self.test_session(graph=od_graph) as sess:
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING], inference_graph_path)
tf_example = od_graph.get_tensor_by_name('import/tf_example:0')
boxes = od_graph.get_tensor_by_name('import/detection_boxes:0')
scores = od_graph.get_tensor_by_name('import/detection_scores:0')
classes = od_graph.get_tensor_by_name('import/detection_classes:0')
masks = od_graph.get_tensor_by_name('import/detection_masks:0')
num_detections = od_graph.get_tensor_by_name('import/num_detections:0')
(boxes, scores, classes, masks, num_detections) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))})
self.assertAllClose(boxes, [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]])
self.assertAllClose(scores, [[0.7, 0.6]])
self.assertAllClose(classes, [[1, 2]])
self.assertAllClose(masks, np.arange(32).reshape([2, 4, 4]))
self.assertAllClose(num_detections, [2])
sess, [tf.saved_model.tag_constants.SERVING], saved_model_path)
tf_example = od_graph.get_tensor_by_name('tf_example:0')
boxes = od_graph.get_tensor_by_name('detection_boxes:0')
scores = od_graph.get_tensor_by_name('detection_scores:0')
classes = od_graph.get_tensor_by_name('detection_classes:0')
masks = od_graph.get_tensor_by_name('detection_masks:0')
num_detections = od_graph.get_tensor_by_name('num_detections:0')
(boxes_np, scores_np, classes_np, masks_np,
num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: tf_example_np})
self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]])
self.assertAllClose(scores_np, [[0.7, 0.6],
[0.9, 0.0]])
self.assertAllClose(classes_np, [[1, 2],
[2, 1]])
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
def test_export_checkpoint_and_run_inference(self):
tmp_dir = self.get_temp_dir()
trained_checkpoint_prefix = os.path.join(tmp_dir, 'model.ckpt')
self._save_checkpoint_from_mock_model(trained_checkpoint_prefix,
use_moving_averages=False)
output_directory = os.path.join(tmp_dir, 'output')
model_path = os.path.join(output_directory, 'model.ckpt')
meta_graph_path = model_path + '.meta'
with mock.patch.object(
model_builder, 'build', autospec=True) as mock_builder:
mock_builder.return_value = FakeModel(add_detection_masks=True)
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.eval_config.use_moving_averages = False
exporter.export_inference_graph(
input_type='tf_example',
pipeline_config=pipeline_config,
trained_checkpoint_prefix=trained_checkpoint_prefix,
output_directory=output_directory)
tf_example_np = np.hstack([self._create_tf_example(
np.ones((4, 4, 3)).astype(np.uint8))] * 2)
with tf.Graph().as_default() as od_graph:
with self.test_session(graph=od_graph) as sess:
new_saver = tf.train.import_meta_graph(meta_graph_path)
new_saver.restore(sess, model_path)
tf_example = od_graph.get_tensor_by_name('tf_example:0')
boxes = od_graph.get_tensor_by_name('detection_boxes:0')
scores = od_graph.get_tensor_by_name('detection_scores:0')
classes = od_graph.get_tensor_by_name('detection_classes:0')
masks = od_graph.get_tensor_by_name('detection_masks:0')
num_detections = od_graph.get_tensor_by_name('num_detections:0')
(boxes_np, scores_np, classes_np, masks_np,
num_detections_np) = sess.run(
[boxes, scores, classes, masks, num_detections],
feed_dict={tf_example: tf_example_np})
self.assertAllClose(boxes_np, [[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.8, 0.8]],
[[0.5, 0.5, 1.0, 1.0],
[0.0, 0.0, 0.0, 0.0]]])
self.assertAllClose(scores_np, [[0.7, 0.6],
[0.9, 0.0]])
self.assertAllClose(classes_np, [[1, 2],
[2, 1]])
self.assertAllClose(masks_np, np.arange(64).reshape([2, 2, 4, 4]))
self.assertAllClose(num_detections_np, [2, 1])
if __name__ == '__main__':
tf.test.main()
......@@ -157,6 +157,6 @@ number of workers, gpu type).
## Configuring the Evaluator
Currently evaluation is fixed to generating metrics as defined by the PASCAL
VOC challenge. The parameters for `eval_config` are set to reasonable defaults
and typically do not need to be configured.
Currently evaluation is fixed to generating metrics as defined by the PASCAL VOC
challenge. The parameters for `eval_config` are set to reasonable defaults and
typically do not need to be configured.
......@@ -74,6 +74,6 @@ to avoid running this manually, you can add it as a new line to the end of your
You can test that you have correctly installed the Tensorflow Object Detection\
API by running the following command:
``` bash
```bash
python object_detection/builders/model_builder_test.py
```
......@@ -7,39 +7,51 @@ TFRecords.
## Generating the PASCAL VOC TFRecord files.
The raw 2012 PASCAL VOC data set can be downloaded
The raw 2012 PASCAL VOC data set is located
[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar).
Extract the tar file and run the `create_pascal_tf_record` script:
To download, extract and convert it to TFRecords, run the following commands
below:
```bash
# From tensorflow/models/object_detection
# From tensorflow/models
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
tar -xvf VOCtrainval_11-May-2012.tar
python create_pascal_tf_record.py --data_dir=VOCdevkit \
--year=VOC2012 --set=train --output_path=pascal_train.record
python create_pascal_tf_record.py --data_dir=VOCdevkit \
--year=VOC2012 --set=val --output_path=pascal_val.record
python object_detection/create_pascal_tf_record.py \
--label_map_path=object_detection/data/pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=train \
--output_path=pascal_train.record
python object_detection/create_pascal_tf_record.py \
--label_map_path=object_detection/data/pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=val \
--output_path=pascal_val.record
```
You should end up with two TFRecord files named `pascal_train.record` and
`pascal_val.record` in the `tensorflow/models/object_detection` directory.
`pascal_val.record` in the `tensorflow/models` directory.
The label map for the PASCAL VOC data set can be found at
`data/pascal_label_map.pbtxt`.
`object_detection/data/pascal_label_map.pbtxt`.
## Generating the Oxford-IIIT Pet TFRecord files.
The Oxford-IIIT Pet data set can be downloaded from
[their website](http://www.robots.ox.ac.uk/~vgg/data/pets/). Extract the tar
file and run the `create_pet_tf_record` script to generate TFRecords.
The Oxford-IIIT Pet data set is located
[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). To download, extract and
convert it to TFRecrods, run the following commands below:
```bash
# From tensorflow/models/object_detection
# From tensorflow/models
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xvf annotations.tar.gz
tar -xvf images.tar.gz
python create_pet_tf_record.py --data_dir=`pwd` --output_dir=`pwd`
python object_detection/create_pet_tf_record.py \
--label_map_path=object_detection/data/pet_label_map.pbtxt \
--data_dir=`pwd` \
--output_dir=`pwd`
```
You should end up with two TFRecord files named `pet_train.record` and
`pet_val.record` in the `tensorflow/models/object_detection` directory.
`pet_val.record` in the `tensorflow/models` directory.
The label map for the Pet dataset can be found at `data/pet_label_map.pbtxt`.
The label map for the Pet dataset can be found at
`object_detection/data/pet_label_map.pbtxt`.
......@@ -77,5 +77,5 @@ tensorboard --logdir=${PATH_TO_MODEL_DIRECTORY}
```
where `${PATH_TO_MODEL_DIRECTORY}` points to the directory that contains the
train and eval directories. Please note it make take Tensorboard a couple
minutes to populate with data.
train and eval directories. Please note it may take Tensorboard a couple minutes
to populate with data.
......@@ -11,5 +11,5 @@ jupyter notebook
```
The notebook should open in your favorite web browser. Click the
[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link
to open the demo.
[`object_detection_tutorial.ipynb`](../object_detection_tutorial.ipynb) link to
open the demo.
......@@ -88,7 +88,7 @@ training checkpoints and events will be written to and
Google Cloud Storage.
Users can monitor the progress of their training job on the [ML Engine
Dashboard](https://pantheon.corp.google.com/mlengine/jobs).
Dashboard](https://console.cloud.google.com/mlengine/jobs).
## Running an Evaluation Job on Cloud
......
......@@ -51,29 +51,35 @@ dataset for Oxford-IIIT Pets lives
[here](http://www.robots.ox.ac.uk/~vgg/data/pets/). You will need to download
both the image dataset [`images.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz)
and the groundtruth data [`annotations.tar.gz`](http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz)
to the `tensorflow/models` directory. This may take some time. After downloading
the tarballs, your `object_detection` directory should appear as follows:
to the `tensorflow/models` directory and unzip them. This may take some time.
``` bash
# From tensorflow/models/
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xvf images.tar.gz
tar -xvf annotations.tar.gz
```
After downloading the tarballs, your `tensorflow/models` directory should appear
as follows:
```lang-none
- images.tar.gz
- annotations.tar.gz
+ images/
+ annotations/
+ object_detection/
+ data/
- images.tar.gz
- annotations.tar.gz
- create_pet_tf_record.py
... other files and directories
... other files and directories
```
The Tensorflow Object Detection API expects data to be in the TFRecord format,
so we'll now run the `create_pet_tf_record` script to convert from the raw
Oxford-IIIT Pet dataset into TFRecords. Run the following commands from the
`object_detection` directory:
`tensorflow/models` directory:
``` bash
# From tensorflow/models/
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
tar -xvf annotations.tar.gz
tar -xvf images.tar.gz
python object_detection/create_pet_tf_record.py \
--label_map_path=object_detection/data/pet_label_map.pbtxt \
--data_dir=`pwd` \
......@@ -83,8 +89,8 @@ python object_detection/create_pet_tf_record.py \
Note: It is normal to see some warnings when running this script. You may ignore
them.
Two TFRecord files named `pet_train.record` and `pet_val.record` should be generated
in the `object_detection` directory.
Two TFRecord files named `pet_train.record` and `pet_val.record` should be
generated in the `tensorflow/models` directory.
Now that the data has been generated, we'll need to upload it to Google Cloud
Storage so the data can be accessed by ML Engine. Run the following command to
......@@ -263,7 +269,10 @@ Note: It takes roughly 10 minutes for a job to get started on ML Engine, and
roughly an hour for the system to evaluate the validation dataset. It may take
some time to populate the dashboards. If you do not see any entries after half
an hour, check the logs from the [ML Engine
Dashboard](https://console.cloud.google.com/mlengine/jobs).
Dashboard](https://console.cloud.google.com/mlengine/jobs). Note that by default
the training jobs are configured to go for much longer than is necessary for
convergence. To save money, we recommend killing your jobs once you've seen
that they've converged.
## Exporting the Tensorflow Graph
......@@ -279,7 +288,7 @@ three files:
* `model.ckpt-${CHECKPOINT_NUMBER}.meta`
After you've identified a candidate checkpoint to export, run the following
command from `tensorflow/models/object_detection`:
command from `tensorflow/models`:
``` bash
# From tensorflow/models
......
# Preparing Inputs
To use your own dataset in Tensorflow Object Detection API, you must convert it
into the [TFRecord file format](https://www.tensorflow.org/api_guides/python/python_io#tfrecords_format_details).
This document outlines how to write a script to generate the TFRecord file.
## Label Maps
Each dataset is required to have a label map associated with it. This label map
defines a mapping from string class names to integer class Ids. The label map
should be a `StringIntLabelMap` text protobuf. Sample label maps can be found in
object_detection/data. Label maps should always start from id 1.
## Dataset Requirements
For every example in your dataset, you should have the following information:
1. An RGB image for the dataset encoded as jpeg or png.
2. A list of bounding boxes for the image. Each bounding box should contain:
1. A bounding box coordinates (with origin in top left corner) defined by 4
floating point numbers [ymin, xmin, ymax, xmax]. Note that we store the
_normalized_ coordinates (x / width, y / height) in the TFRecord dataset.
2. The class of the object in the bounding box.
# Example Image
Consider the following image:
![Example Image](img/example_cat.jpg "Example Image")
with the following label map:
```
item {
id: 1
name: 'Cat'
}
item {
id: 2
name: 'Dog'
}
```
We can generate a tf.Example proto for this image using the following code:
```python
def create_cat_tf_example(encoded_cat_image_data):
"""Creates a tf.Example proto from sample cat image.
Args:
encoded_cat_image_data: The jpg encoded data of the cat image.
Returns:
example: The created tf.Example.
"""
height = 1032.0
width = 1200.0
filename = 'example_cat.jpg'
image_format = b'jpg'
xmins = [322.0 / 1200.0]
xmaxs = [1062.0 / 1200.0]
ymins = [174.0 / 1032.0]
ymaxs = [761.0 / 1032.0]
classes_text = ['Cat']
classes = [1]
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
```
## Conversion Script Outline
A typical conversion script will look like the following:
```python
import tensorflow as tf
from object_detection.utils import dataset_util
flags = tf.app.flags
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
def create_tf_example(example):
# TODO(user): Populate the following variables from your example.
height = None # Image height
width = None # Image width
filename = None # Filename of the image. Empty if image is not from file
encoded_image_data = None # Encoded image bytes
image_format = None # b'jpeg' or b'png'
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
# TODO(user): Write code to read in your dataset to examples variable
for example in examples:
tf_example = create_tf_example(example)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
```
Note: You may notice additional fields in some other datasets. They are
currently unused by the API and are optional.
......@@ -13,12 +13,11 @@ py_library(
srcs = ["ssd_meta_arch.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/core:target_assigner",
"//tensorflow_models/object_detection/utils:variables_helper",
"//tensorflow_models/object_detection/utils:shape_utils",
],
)
......@@ -56,7 +55,7 @@ py_library(
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/core:target_assigner",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:variables_helper",
"//tensorflow_models/object_detection/utils:shape_utils",
],
)
......
......@@ -80,7 +80,7 @@ from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import ops
from object_detection.utils import variables_helper
from object_detection.utils import shape_utils
slim = tf.contrib.slim
......@@ -159,21 +159,19 @@ class FasterRCNNFeatureExtractor(object):
def restore_from_classification_checkpoint_fn(
self,
checkpoint_path,
first_stage_feature_extractor_scope,
second_stage_feature_extractor_scope):
"""Returns callable for loading a checkpoint into the tensorflow graph.
"""Returns a map of variables to load from a foreign checkpoint.
Args:
checkpoint_path: path to checkpoint to restore.
first_stage_feature_extractor_scope: A scope name for the first stage
feature extractor.
second_stage_feature_extractor_scope: A scope name for the second stage
feature extractor.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.global_variables():
......@@ -182,13 +180,7 @@ class FasterRCNNFeatureExtractor(object):
if variable.op.name.startswith(scope_name):
var_name = variable.op.name.replace(scope_name + '/', '')
variables_to_restore[var_name] = variable
variables_to_restore = (
variables_helper.get_variables_available_in_checkpoint(
variables_to_restore, checkpoint_path))
saver = tf.train.Saver(variables_to_restore)
def restore(sess):
saver.restore(sess, checkpoint_path)
return restore
return variables_to_restore
class FasterRCNNMetaArch(model.DetectionModel):
......@@ -774,10 +766,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
A float tensor with shape [A * B, ..., depth] (where the first and last
dimension are statically defined.
"""
inputs_shape = inputs.get_shape().as_list()
flattened_shape = tf.concat([
[inputs_shape[0]*inputs_shape[1]], tf.shape(inputs)[2:-1],
[inputs_shape[-1]]], 0)
combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
flattened_shape = tf.stack([combined_shape[0] * combined_shape[1]] +
combined_shape[2:])
return tf.reshape(inputs, flattened_shape)
def postprocess(self, prediction_dict):
......@@ -875,52 +866,128 @@ class FasterRCNNMetaArch(model.DetectionModel):
representing the number of proposals predicted for each image in
the batch.
"""
rpn_box_encodings_batch = tf.expand_dims(rpn_box_encodings_batch, axis=2)
rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape(
rpn_box_encodings_batch)
tiled_anchor_boxes = tf.tile(
tf.expand_dims(anchors, 0), [rpn_encodings_shape[0], 1, 1])
proposal_boxes = self._batch_decode_boxes(rpn_box_encodings_batch,
tiled_anchor_boxes)
proposal_boxes = tf.squeeze(proposal_boxes, axis=2)
rpn_objectness_softmax_without_background = tf.nn.softmax(
rpn_objectness_predictions_with_background_batch)[:, :, 1]
clip_window = tf.to_float(tf.stack([0, 0, image_shape[1], image_shape[2]]))
(proposal_boxes, proposal_scores, _, _,
num_proposals) = post_processing.batch_multiclass_non_max_suppression(
tf.expand_dims(proposal_boxes, axis=2),
tf.expand_dims(rpn_objectness_softmax_without_background,
axis=2),
self._first_stage_nms_score_threshold,
self._first_stage_nms_iou_threshold,
self._first_stage_max_proposals,
self._first_stage_max_proposals,
clip_window=clip_window)
if self._is_training:
(groundtruth_boxlists, groundtruth_classes_with_background_list
) = self._format_groundtruth_data(image_shape)
proposal_boxes_list = []
proposal_scores_list = []
num_proposals_list = []
for (batch_index,
(rpn_box_encodings,
rpn_objectness_predictions_with_background)) in enumerate(zip(
tf.unstack(rpn_box_encodings_batch),
tf.unstack(rpn_objectness_predictions_with_background_batch))):
decoded_boxes = self._box_coder.decode(
rpn_box_encodings, box_list.BoxList(anchors))
objectness_scores = tf.unstack(
tf.nn.softmax(rpn_objectness_predictions_with_background), axis=1)[1]
proposal_boxlist = post_processing.multiclass_non_max_suppression(
tf.expand_dims(decoded_boxes.get(), 1),
tf.expand_dims(objectness_scores, 1),
self._first_stage_nms_score_threshold,
self._first_stage_nms_iou_threshold, self._first_stage_max_proposals,
clip_window=clip_window)
if self._is_training:
proposal_boxlist.set(tf.stop_gradient(proposal_boxlist.get()))
if not self._hard_example_miner:
proposal_boxlist = self._sample_box_classifier_minibatch(
proposal_boxlist, groundtruth_boxlists[batch_index],
groundtruth_classes_with_background_list[batch_index])
normalized_proposals = box_list_ops.to_normalized_coordinates(
proposal_boxlist, image_shape[1], image_shape[2],
check_range=False)
# pad proposals to max_num_proposals
padded_proposals = box_list_ops.pad_or_clip_box_list(
normalized_proposals, num_boxes=self.max_num_proposals)
proposal_boxes_list.append(padded_proposals.get())
proposal_scores_list.append(
padded_proposals.get_field(fields.BoxListFields.scores))
num_proposals_list.append(tf.minimum(normalized_proposals.num_boxes(),
self.max_num_proposals))
return (tf.stack(proposal_boxes_list), tf.stack(proposal_scores_list),
tf.stack(num_proposals_list))
proposal_boxes = tf.stop_gradient(proposal_boxes)
if not self._hard_example_miner:
(groundtruth_boxlists, groundtruth_classes_with_background_list,
) = self._format_groundtruth_data(image_shape)
(proposal_boxes, proposal_scores,
num_proposals) = self._unpad_proposals_and_sample_box_classifier_batch(
proposal_boxes, proposal_scores, num_proposals,
groundtruth_boxlists, groundtruth_classes_with_background_list)
# normalize proposal boxes
proposal_boxes_reshaped = tf.reshape(proposal_boxes, [-1, 4])
normalized_proposal_boxes_reshaped = box_list_ops.to_normalized_coordinates(
box_list.BoxList(proposal_boxes_reshaped),
image_shape[1], image_shape[2], check_range=False).get()
proposal_boxes = tf.reshape(normalized_proposal_boxes_reshaped,
[-1, proposal_boxes.shape[1].value, 4])
return proposal_boxes, proposal_scores, num_proposals
def _unpad_proposals_and_sample_box_classifier_batch(
self,
proposal_boxes,
proposal_scores,
num_proposals,
groundtruth_boxlists,
groundtruth_classes_with_background_list):
"""Unpads proposals and samples a minibatch for second stage.
Args:
proposal_boxes: A float tensor with shape
[batch_size, num_proposals, 4] representing the (potentially zero
padded) proposal boxes for all images in the batch. These boxes are
represented as normalized coordinates.
proposal_scores: A float tensor with shape
[batch_size, num_proposals] representing the (potentially zero
padded) proposal objectness scores for all images in the batch.
num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
representing the number of proposals predicted for each image in
the batch.
groundtruth_boxlists: A list of BoxLists containing (absolute) coordinates
of the groundtruth boxes.
groundtruth_classes_with_background_list: A list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
class targets with the 0th index assumed to map to the background class.
Returns:
proposal_boxes: A float tensor with shape
[batch_size, second_stage_batch_size, 4] representing the (potentially
zero padded) proposal boxes for all images in the batch. These boxes
are represented as normalized coordinates.
proposal_scores: A float tensor with shape
[batch_size, second_stage_batch_size] representing the (potentially zero
padded) proposal objectness scores for all images in the batch.
num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
representing the number of proposals predicted for each image in
the batch.
"""
single_image_proposal_box_sample = []
single_image_proposal_score_sample = []
single_image_num_proposals_sample = []
for (single_image_proposal_boxes,
single_image_proposal_scores,
single_image_num_proposals,
single_image_groundtruth_boxlist,
single_image_groundtruth_classes_with_background) in zip(
tf.unstack(proposal_boxes),
tf.unstack(proposal_scores),
tf.unstack(num_proposals),
groundtruth_boxlists,
groundtruth_classes_with_background_list):
static_shape = single_image_proposal_boxes.get_shape()
sliced_static_shape = tf.TensorShape([tf.Dimension(None),
static_shape.dims[-1]])
single_image_proposal_boxes = tf.slice(
single_image_proposal_boxes,
[0, 0],
[single_image_num_proposals, -1])
single_image_proposal_boxes.set_shape(sliced_static_shape)
single_image_proposal_scores = tf.slice(single_image_proposal_scores,
[0],
[single_image_num_proposals])
single_image_boxlist = box_list.BoxList(single_image_proposal_boxes)
single_image_boxlist.add_field(fields.BoxListFields.scores,
single_image_proposal_scores)
sampled_boxlist = self._sample_box_classifier_minibatch(
single_image_boxlist,
single_image_groundtruth_boxlist,
single_image_groundtruth_classes_with_background)
sampled_padded_boxlist = box_list_ops.pad_or_clip_box_list(
sampled_boxlist,
num_boxes=self._second_stage_batch_size)
single_image_num_proposals_sample.append(tf.minimum(
sampled_boxlist.num_boxes(),
self._second_stage_batch_size))
bb = sampled_padded_boxlist.get()
single_image_proposal_box_sample.append(bb)
single_image_proposal_score_sample.append(
sampled_padded_boxlist.get_field(fields.BoxListFields.scores))
return (tf.stack(single_image_proposal_box_sample),
tf.stack(single_image_proposal_score_sample),
tf.stack(single_image_num_proposals_sample))
def _format_groundtruth_data(self, image_shape):
"""Helper function for preparing groundtruth data for target assignment.
......@@ -1074,7 +1141,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
class_predictions_with_background,
[-1, self.max_num_proposals, self.num_classes + 1]
)
refined_decoded_boxes_batch = self._batch_decode_refined_boxes(
refined_decoded_boxes_batch = self._batch_decode_boxes(
refined_box_encodings_batch, proposal_boxes)
class_predictions_with_background_batch = (
self._second_stage_score_conversion_fn(
......@@ -1092,19 +1159,26 @@ class FasterRCNNMetaArch(model.DetectionModel):
mask_predictions_batch = tf.reshape(
mask_predictions, [-1, self.max_num_proposals,
self.num_classes, mask_height, mask_width])
detections = self._second_stage_nms_fn(
refined_decoded_boxes_batch,
class_predictions_batch,
clip_window=clip_window,
change_coordinate_frame=True,
num_valid_boxes=num_proposals,
masks=mask_predictions_batch)
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = self._second_stage_nms_fn(
refined_decoded_boxes_batch,
class_predictions_batch,
clip_window=clip_window,
change_coordinate_frame=True,
num_valid_boxes=num_proposals,
masks=mask_predictions_batch)
detections = {'detection_boxes': nmsed_boxes,
'detection_scores': nmsed_scores,
'detection_classes': nmsed_classes,
'num_detections': tf.to_float(num_detections)}
if nmsed_masks is not None:
detections['detection_masks'] = nmsed_masks
if mask_predictions is not None:
detections['detection_masks'] = tf.to_float(
tf.greater_equal(detections['detection_masks'], mask_threshold))
return detections
def _batch_decode_refined_boxes(self, refined_box_encodings, proposal_boxes):
def _batch_decode_boxes(self, box_encodings, anchor_boxes):
"""Decode tensor of refined box encodings.
Args:
......@@ -1119,15 +1193,33 @@ class FasterRCNNMetaArch(model.DetectionModel):
float tensor representing (padded) refined bounding box predictions
(for each image in batch, proposal and class).
"""
tiled_proposal_boxes = tf.tile(
tf.expand_dims(proposal_boxes, 2), [1, 1, self.num_classes, 1])
tiled_proposals_boxlist = box_list.BoxList(
tf.reshape(tiled_proposal_boxes, [-1, 4]))
"""Decodes box encodings with respect to the anchor boxes.
Args:
box_encodings: a 4-D tensor with shape
[batch_size, num_anchors, num_classes, self._box_coder.code_size]
representing box encodings.
anchor_boxes: [batch_size, num_anchors, 4] representing
decoded bounding boxes.
Returns:
decoded_boxes: a [batch_size, num_anchors, num_classes, 4]
float tensor representing bounding box predictions
(for each image in batch, proposal and class).
"""
combined_shape = shape_utils.combined_static_and_dynamic_shape(
box_encodings)
num_classes = combined_shape[2]
tiled_anchor_boxes = tf.tile(
tf.expand_dims(anchor_boxes, 2), [1, 1, num_classes, 1])
tiled_anchors_boxlist = box_list.BoxList(
tf.reshape(tiled_anchor_boxes, [-1, 4]))
decoded_boxes = self._box_coder.decode(
tf.reshape(refined_box_encodings, [-1, self._box_coder.code_size]),
tiled_proposals_boxlist)
tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
tiled_anchors_boxlist)
return tf.reshape(decoded_boxes.get(),
[-1, self.max_num_proposals, self.num_classes, 4])
tf.stack([combined_shape[0], combined_shape[1],
num_classes, 4]))
def loss(self, prediction_dict, scope=None):
"""Compute scalar loss tensors given prediction tensors.
......@@ -1413,25 +1505,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
cls_losses=tf.expand_dims(single_image_cls_loss, 0),
decoded_boxlist_list=[proposal_boxlist])
def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
"""Returns callable for loading a checkpoint into the tensorflow graph.
def restore_map(self, from_detection_checkpoint=True):
"""Returns a map of variables to load from a foreign checkpoint.
See parent class for details.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a detection checkpoint
(with compatible variable names) or to restore from a classification
checkpoint for initialization prior to training. Note that when
from_detection_checkpoint=True, the current implementation only
supports restoration from an (exactly) identical model (with exception
of the num_classes parameter).
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
if not from_detection_checkpoint:
return self._feature_extractor.restore_from_classification_checkpoint_fn(
checkpoint_path,
self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope)
......@@ -1439,13 +1528,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
variables_to_restore.append(slim.get_or_create_global_step())
# Only load feature extractor variables to be consistent with loading from
# a classification checkpoint.
first_stage_variables = tf.contrib.framework.filter_variables(
feature_extractor_variables = tf.contrib.framework.filter_variables(
variables_to_restore,
include_patterns=[self.first_stage_feature_extractor_scope,
self.second_stage_feature_extractor_scope])
saver = tf.train.Saver(first_stage_variables)
def restore(sess):
saver.restore(sess, checkpoint_path)
return restore
return {var.op.name: var for var in feature_extractor_variables}
......@@ -226,61 +226,47 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
return self._get_model(self._get_second_stage_box_predictor(
num_classes=num_classes, is_training=is_training), **common_kwargs)
def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only(
def test_predict_correct_shapes_in_inference_mode_both_stages(
self):
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=False, first_stage_only=True, second_stage_batch_size=2)
batch_size = 2
height = 10
width = 12
input_image_shape = (batch_size, height, width, 3)
preprocessed_inputs = tf.placeholder(dtype=tf.float32,
shape=(batch_size, None, None, 3))
prediction_dict = model.predict(preprocessed_inputs)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors = height * width * 3 * 3
expected_output_keys = set([
'rpn_box_predictor_features', 'rpn_features_to_crop', 'image_shape',
'rpn_box_encodings', 'rpn_objectness_predictions_with_background',
'anchors'])
expected_output_shapes = {
'rpn_box_predictor_features': (batch_size, height, width, 512),
'rpn_features_to_crop': (batch_size, height, width, 3),
'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(batch_size, expected_num_anchors, 2),
'anchors': (expected_num_anchors, 4)
}
init_op = tf.global_variables_initializer()
with self.test_session() as sess:
batch_size = 2
image_size = 10
input_shapes = [(batch_size, image_size, image_size, 3),
(None, image_size, image_size, 3),
(batch_size, None, None, 3),
(None, None, None, 3)]
expected_num_anchors = image_size * image_size * 3 * 3
expected_shapes = {
'rpn_box_predictor_features':
(2, image_size, image_size, 512),
'rpn_features_to_crop': (2, image_size, image_size, 3),
'image_shape': (4,),
'rpn_box_encodings': (2, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(2, expected_num_anchors, 2),
'anchors': (expected_num_anchors, 4),
'refined_box_encodings': (2 * 8, 2, 4),
'class_predictions_with_background': (2 * 8, 2 + 1),
'num_proposals': (2,),
'proposal_boxes': (2, 8, 4),
}
for input_shape in input_shapes:
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=False, first_stage_only=False,
second_stage_batch_size=2)
preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
result_tensor_dict = model.predict(preprocessed_inputs)
init_op = tf.global_variables_initializer()
with self.test_session(graph=test_graph) as sess:
sess.run(init_op)
prediction_out = sess.run(prediction_dict,
feed_dict={
preprocessed_inputs:
np.zeros(input_image_shape)
})
self.assertEqual(set(prediction_out.keys()), expected_output_keys)
self.assertAllEqual(prediction_out['image_shape'], input_image_shape)
for output_key, expected_shape in expected_output_shapes.iteritems():
self.assertAllEqual(prediction_out[output_key].shape, expected_shape)
# Check that anchors are clipped to window.
anchors = prediction_out['anchors']
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
tensor_dict_out = sess.run(result_tensor_dict, feed_dict={
preprocessed_inputs:
np.zeros((batch_size, image_size, image_size, 3))})
self.assertEqual(set(tensor_dict_out.keys()),
set(expected_shapes.keys()))
for key in expected_shapes:
self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
def test_predict_gives_valid_anchors_in_training_mode_first_stage_only(self):
test_graph = tf.Graph()
......@@ -535,35 +521,67 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
expected_num_proposals)
def test_postprocess_second_stage_only_inference_mode(self):
model = self._build_model(
is_training=False, first_stage_only=False, second_stage_batch_size=6)
num_proposals_shapes = [(2), (None)]
refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)]
class_predictions_with_background_shapes = [(16, 3), (None, 3)]
proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)]
batch_size = 2
total_num_padded_proposals = batch_size * model.max_num_proposals
proposal_boxes = tf.constant(
[[[1, 1, 2, 3],
[0, 0, 1, 1],
[.5, .5, .6, .6],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
[[2, 3, 6, 8],
[1, 2, 5, 3],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
num_proposals = tf.constant([3, 2], dtype=tf.int32)
refined_box_encodings = tf.zeros(
[total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32)
class_predictions_with_background = tf.ones(
[total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)
detections = model.postprocess({
'refined_box_encodings': refined_box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'num_proposals': num_proposals,
'proposal_boxes': proposal_boxes,
'image_shape': image_shape
})
with self.test_session() as sess:
detections_out = sess.run(detections)
image_shape = np.array((2, 36, 48, 3), dtype=np.int32)
for (num_proposals_shape, refined_box_encoding_shape,
class_predictions_with_background_shape,
proposal_boxes_shape) in zip(num_proposals_shapes,
refined_box_encodings_shapes,
class_predictions_with_background_shapes,
proposal_boxes_shapes):
tf_graph = tf.Graph()
with tf_graph.as_default():
model = self._build_model(
is_training=False, first_stage_only=False,
second_stage_batch_size=6)
total_num_padded_proposals = batch_size * model.max_num_proposals
proposal_boxes = np.array(
[[[1, 1, 2, 3],
[0, 0, 1, 1],
[.5, .5, .6, .6],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
[[2, 3, 6, 8],
[1, 2, 5, 3],
4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]])
num_proposals = np.array([3, 2], dtype=np.int32)
refined_box_encodings = np.zeros(
[total_num_padded_proposals, model.num_classes, 4])
class_predictions_with_background = np.ones(
[total_num_padded_proposals, model.num_classes+1])
num_proposals_placeholder = tf.placeholder(tf.int32,
shape=num_proposals_shape)
refined_box_encodings_placeholder = tf.placeholder(
tf.float32, shape=refined_box_encoding_shape)
class_predictions_with_background_placeholder = tf.placeholder(
tf.float32, shape=class_predictions_with_background_shape)
proposal_boxes_placeholder = tf.placeholder(
tf.float32, shape=proposal_boxes_shape)
image_shape_placeholder = tf.placeholder(tf.int32, shape=(4))
detections = model.postprocess({
'refined_box_encodings': refined_box_encodings_placeholder,
'class_predictions_with_background':
class_predictions_with_background_placeholder,
'num_proposals': num_proposals_placeholder,
'proposal_boxes': proposal_boxes_placeholder,
'image_shape': image_shape_placeholder,
})
with self.test_session(graph=tf_graph) as sess:
detections_out = sess.run(
detections,
feed_dict={
refined_box_encodings_placeholder: refined_box_encodings,
class_predictions_with_background_placeholder:
class_predictions_with_background,
num_proposals_placeholder: num_proposals,
proposal_boxes_placeholder: proposal_boxes,
image_shape_placeholder: image_shape
})
self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
self.assertAllClose(detections_out['detection_scores'],
[[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
......@@ -571,6 +589,17 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
self.assertAllClose(detections_out['num_detections'], [5, 4])
def test_preprocess_preserves_input_shapes(self):
image_shapes = [(3, None, None, 3),
(None, 10, 10, 3),
(None, None, None, 3)]
for image_shape in image_shapes:
model = self._build_model(
is_training=False, first_stage_only=False, second_stage_batch_size=6)
image_placeholder = tf.placeholder(tf.float32, shape=image_shape)
preprocessed_inputs = model.preprocess(image_placeholder)
self.assertAllEqual(preprocessed_inputs.shape.as_list(), image_shape)
def test_loss_first_stage_only_mode(self):
model = self._build_model(
is_training=True, first_stage_only=True, second_stage_batch_size=6)
......@@ -957,7 +986,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
exp_loc_loss)
self.assertAllClose(loss_dict_out['second_stage_classification_loss'], 0)
def test_restore_fn_classification(self):
def test_restore_map_for_classification_ckpt(self):
# Define mock tensorflow classification graph and save variables.
test_graph_classification = tf.Graph()
with test_graph_classification.as_default():
......@@ -986,12 +1015,17 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
preprocessed_inputs = model.preprocess(inputs)
prediction_dict = model.predict(preprocessed_inputs)
model.postprocess(prediction_dict)
restore_fn = model.restore_fn(saved_model_path,
from_detection_checkpoint=False)
var_map = model.restore_map(from_detection_checkpoint=False)
self.assertIsInstance(var_map, dict)
saver = tf.train.Saver(var_map)
with self.test_session() as sess:
restore_fn(sess)
saver.restore(sess, saved_model_path)
for var in sess.run(tf.report_uninitialized_variables()):
self.assertNotIn(model.first_stage_feature_extractor_scope, var.name)
self.assertNotIn(model.second_stage_feature_extractor_scope,
var.name)
def test_restore_fn_detection(self):
def test_restore_map_for_detection_ckpt(self):
# Define first detection graph and save variables.
test_graph_detection1 = tf.Graph()
with test_graph_detection1.as_default():
......@@ -1022,10 +1056,11 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
preprocessed_inputs2 = model2.preprocess(inputs2)
prediction_dict2 = model2.predict(preprocessed_inputs2)
model2.postprocess(prediction_dict2)
restore_fn = model2.restore_fn(saved_model_path,
from_detection_checkpoint=True)
var_map = model2.restore_map(from_detection_checkpoint=True)
self.assertIsInstance(var_map, dict)
saver = tf.train.Saver(var_map)
with self.test_session() as sess:
restore_fn(sess)
saver.restore(sess, saved_model_path)
for var in sess.run(tf.report_uninitialized_variables()):
self.assertNotIn(model2.first_stage_feature_extractor_scope, var.name)
self.assertNotIn(model2.second_stage_feature_extractor_scope,
......
......@@ -23,13 +23,12 @@ from abc import abstractmethod
import re
import tensorflow as tf
from object_detection.core import box_coder as bcoder
from object_detection.core import box_list
from object_detection.core import box_predictor as bpredictor
from object_detection.core import model
from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner
from object_detection.utils import variables_helper
from object_detection.utils import shape_utils
slim = tf.contrib.slim
......@@ -324,7 +323,8 @@ class SSDMetaArch(model.DetectionModel):
a list of pairs (height, width) for each feature map in feature_maps
"""
feature_map_shapes = [
feature_map.get_shape().as_list() for feature_map in feature_maps
shape_utils.combined_static_and_dynamic_shape(
feature_map) for feature_map in feature_maps
]
return [(shape[1], shape[2]) for shape in feature_map_shapes]
......@@ -365,8 +365,7 @@ class SSDMetaArch(model.DetectionModel):
with tf.name_scope('Postprocessor'):
box_encodings = prediction_dict['box_encodings']
class_predictions = prediction_dict['class_predictions_with_background']
detection_boxes = bcoder.batch_decode(box_encodings, self._box_coder,
self.anchors)
detection_boxes = self._batch_decode(box_encodings)
detection_boxes = tf.expand_dims(detection_boxes, axis=2)
class_predictions_without_background = tf.slice(class_predictions,
......@@ -375,10 +374,14 @@ class SSDMetaArch(model.DetectionModel):
detection_scores = self._score_conversion_fn(
class_predictions_without_background)
clip_window = tf.constant([0, 0, 1, 1], tf.float32)
detections = self._non_max_suppression_fn(detection_boxes,
detection_scores,
clip_window=clip_window)
return detections
(nmsed_boxes, nmsed_scores, nmsed_classes, _,
num_detections) = self._non_max_suppression_fn(detection_boxes,
detection_scores,
clip_window=clip_window)
return {'detection_boxes': nmsed_boxes,
'detection_scores': nmsed_scores,
'detection_classes': nmsed_classes,
'num_detections': tf.to_float(num_detections)}
def loss(self, prediction_dict, scope=None):
"""Compute scalar loss tensors with respect to provided groundtruth.
......@@ -546,8 +549,7 @@ class SSDMetaArch(model.DetectionModel):
tf.slice(prediction_dict['class_predictions_with_background'],
[0, 0, 1], class_pred_shape), class_pred_shape)
decoded_boxes = bcoder.batch_decode(prediction_dict['box_encodings'],
self._box_coder, self.anchors)
decoded_boxes = self._batch_decode(prediction_dict['box_encodings'])
decoded_box_tensors_list = tf.unstack(decoded_boxes)
class_prediction_list = tf.unstack(class_predictions)
decoded_boxlist_list = []
......@@ -562,33 +564,51 @@ class SSDMetaArch(model.DetectionModel):
decoded_boxlist_list=decoded_boxlist_list,
match_list=match_list)
def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
"""Return callable for loading a checkpoint into the tensorflow graph.
def _batch_decode(self, box_encodings):
"""Decodes a batch of box encodings with respect to the anchors.
Args:
box_encodings: A float32 tensor of shape
[batch_size, num_anchors, box_code_size] containing box encodings.
Returns:
decoded_boxes: A float32 tensor of shape
[batch_size, num_anchors, 4] containing the decoded boxes.
"""
combined_shape = shape_utils.combined_static_and_dynamic_shape(
box_encodings)
batch_size = combined_shape[0]
tiled_anchor_boxes = tf.tile(
tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
tiled_anchors_boxlist = box_list.BoxList(
tf.reshape(tiled_anchor_boxes, [-1, self._box_coder.code_size]))
decoded_boxes = self._box_coder.decode(
tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
tiled_anchors_boxlist)
return tf.reshape(decoded_boxes.get(),
tf.stack([combined_shape[0], combined_shape[1],
4]))
def restore_map(self, from_detection_checkpoint=True):
"""Returns a map of variables to load from a foreign checkpoint.
See parent class for details.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore = {}
for variable in tf.all_variables():
if variable.op.name.startswith(self._extract_features_scope):
var_name = variable.op.name
if not from_detection_checkpoint:
var_name = (
re.split('^' + self._extract_features_scope + '/', var_name)[-1])
var_name = (re.split('^' + self._extract_features_scope + '/',
var_name)[-1])
variables_to_restore[var_name] = variable
# TODO: Load variables selectively using scopes.
variables_to_restore = (
variables_helper.get_variables_available_in_checkpoint(
variables_to_restore, checkpoint_path))
saver = tf.train.Saver(variables_to_restore)
def restore(sess):
saver.restore(sess, checkpoint_path)
return restore
return variables_to_restore
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment