Commit 6766e6dd authored by pkulzc's avatar pkulzc Committed by Sergio Guadarrama
Browse files

Merged commit includes the following changes: (#7690)

275538818  by Sergio Guadarrama:

    Support grayscale input images in Slim model training

--
275355841  by Sergio Guadarrama:

    Fixed cases where tf.TensorShape was constructed with float dimensions

    This is a prerequisite for making TensorShape and Dimension more strict
    about the types of their arguments.

--
275131829  by Sergio Guadarrama:

    updates mobilenet/README.md to be github compatible adds V2+ reference to mobilenet_v1.md file and fixes invalid markdown

--

PiperOrigin-RevId: 275538818
parent 800d1dbb
...@@ -39,7 +39,7 @@ _FILE_PATTERN = '%s.record-*' ...@@ -39,7 +39,7 @@ _FILE_PATTERN = '%s.record-*'
_SPLITS_TO_SIZES = { _SPLITS_TO_SIZES = {
'train': 82783, 'train': 82783,
'validation': 40504, 'val': 40504,
} }
......
...@@ -82,6 +82,9 @@ tf.app.flags.DEFINE_integer( ...@@ -82,6 +82,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_bool( tf.app.flags.DEFINE_bool(
'quantize', False, 'whether to use quantized graph or not.') 'quantize', False, 'whether to use quantized graph or not.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
...@@ -124,7 +127,8 @@ def main(_): ...@@ -124,7 +127,8 @@ def main(_):
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
image_preprocessing_fn = preprocessing_factory.get_preprocessing( image_preprocessing_fn = preprocessing_factory.get_preprocessing(
preprocessing_name, preprocessing_name,
is_training=False) is_training=False,
use_grayscale=FLAGS.use_grayscale)
eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
......
...@@ -110,6 +110,9 @@ tf.app.flags.DEFINE_integer( ...@@ -110,6 +110,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_bool('write_text_graphdef', False, tf.app.flags.DEFINE_bool('write_text_graphdef', False,
'Whether to write a text version of graphdef.') 'Whether to write a text version of graphdef.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
...@@ -128,11 +131,14 @@ def main(_): ...@@ -128,11 +131,14 @@ def main(_):
num_classes=(dataset.num_classes - FLAGS.labels_offset), num_classes=(dataset.num_classes - FLAGS.labels_offset),
is_training=FLAGS.is_training) is_training=FLAGS.is_training)
image_size = FLAGS.image_size or network_fn.default_image_size image_size = FLAGS.image_size or network_fn.default_image_size
num_channels = 1 if FLAGS.use_grayscale else 3
if FLAGS.is_video_model: if FLAGS.is_video_model:
input_shape = [FLAGS.batch_size, FLAGS.num_frames, input_shape = [
image_size, image_size, 3] FLAGS.batch_size, FLAGS.num_frames, image_size, image_size,
num_channels
]
else: else:
input_shape = [FLAGS.batch_size, image_size, image_size, 3] input_shape = [FLAGS.batch_size, image_size, image_size, num_channels]
placeholder = tf.placeholder(name='input', dtype=tf.float32, placeholder = tf.placeholder(name='input', dtype=tf.float32,
shape=input_shape) shape=input_shape)
network_fn(placeholder) network_fn(placeholder)
......
...@@ -157,7 +157,8 @@ class MobilenetV2Test(tf.test.TestCase): ...@@ -157,7 +157,8 @@ class MobilenetV2Test(tf.test.TestCase):
new_def = copy.deepcopy(mobilenet_v2.V2_DEF) new_def = copy.deepcopy(mobilenet_v2.V2_DEF)
def inverse_multiplier(output_params, multiplier): def inverse_multiplier(output_params, multiplier):
output_params['num_outputs'] /= multiplier output_params['num_outputs'] = int(
output_params['num_outputs'] / multiplier)
new_def['spec'][0] = op( new_def['spec'][0] = op(
slim.conv2d, slim.conv2d,
......
...@@ -128,9 +128,9 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): ...@@ -128,9 +128,9 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
network_fn: A function that applies the model to a batch of images. It has network_fn: A function that applies the model to a batch of images. It has
the following signature: the following signature:
net, end_points = network_fn(images) net, end_points = network_fn(images)
The `images` input is a tensor of shape [batch_size, height, width, 3] The `images` input is a tensor of shape [batch_size, height, width, 3 or
with height = width = network_fn.default_image_size. (The permissibility 1] with height = width = network_fn.default_image_size. (The
and treatment of other sizes depends on the network_fn.) permissibility and treatment of other sizes depends on the network_fn.)
The returned `end_points` are a dictionary of intermediate activations. The returned `end_points` are a dictionary of intermediate activations.
The returned `net` is the topmost layer, depending on `num_classes`: The returned `net` is the topmost layer, depending on `num_classes`:
If `num_classes` was a non-zero integer, `net` is a logits tensor If `num_classes` was a non-zero integer, `net` is a logits tensor
......
...@@ -31,7 +31,8 @@ def preprocess_for_train(image, ...@@ -31,7 +31,8 @@ def preprocess_for_train(image,
output_height, output_height,
output_width, output_width,
padding=_PADDING, padding=_PADDING,
add_image_summaries=True): add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image for training. """Preprocesses the given image for training.
Note that the actual resizing scale is sampled from Note that the actual resizing scale is sampled from
...@@ -43,6 +44,7 @@ def preprocess_for_train(image, ...@@ -43,6 +44,7 @@ def preprocess_for_train(image,
output_width: The width of the image after preprocessing. output_width: The width of the image after preprocessing.
padding: The amound of padding before and after each dimension of the image. padding: The amound of padding before and after each dimension of the image.
add_image_summaries: Enable image summaries. add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
...@@ -52,6 +54,8 @@ def preprocess_for_train(image, ...@@ -52,6 +54,8 @@ def preprocess_for_train(image,
# Transform the image to floats. # Transform the image to floats.
image = tf.to_float(image) image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
if padding > 0: if padding > 0:
image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
# Randomly crop a [height, width] section of the image. # Randomly crop a [height, width] section of the image.
...@@ -74,8 +78,11 @@ def preprocess_for_train(image, ...@@ -74,8 +78,11 @@ def preprocess_for_train(image,
return tf.image.per_image_standardization(distorted_image) return tf.image.per_image_standardization(distorted_image)
def preprocess_for_eval(image, output_height, output_width, def preprocess_for_eval(image,
add_image_summaries=True): output_height,
output_width,
add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image for evaluation. """Preprocesses the given image for evaluation.
Args: Args:
...@@ -83,6 +90,7 @@ def preprocess_for_eval(image, output_height, output_width, ...@@ -83,6 +90,7 @@ def preprocess_for_eval(image, output_height, output_width,
output_height: The height of the image after preprocessing. output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing. output_width: The width of the image after preprocessing.
add_image_summaries: Enable image summaries. add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
...@@ -91,6 +99,8 @@ def preprocess_for_eval(image, output_height, output_width, ...@@ -91,6 +99,8 @@ def preprocess_for_eval(image, output_height, output_width,
tf.summary.image('image', tf.expand_dims(image, 0)) tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats. # Transform the image to floats.
image = tf.to_float(image) image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
# Resize and crop if needed. # Resize and crop if needed.
resized_image = tf.image.resize_image_with_crop_or_pad(image, resized_image = tf.image.resize_image_with_crop_or_pad(image,
...@@ -103,8 +113,12 @@ def preprocess_for_eval(image, output_height, output_width, ...@@ -103,8 +113,12 @@ def preprocess_for_eval(image, output_height, output_width,
return tf.image.per_image_standardization(resized_image) return tf.image.per_image_standardization(resized_image)
def preprocess_image(image, output_height, output_width, is_training=False, def preprocess_image(image,
add_image_summaries=True): output_height,
output_width,
is_training=False,
add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image. """Preprocesses the given image.
Args: Args:
...@@ -114,15 +128,22 @@ def preprocess_image(image, output_height, output_width, is_training=False, ...@@ -114,15 +128,22 @@ def preprocess_image(image, output_height, output_width, is_training=False,
is_training: `True` if we're preprocessing the image for training and is_training: `True` if we're preprocessing the image for training and
`False` otherwise. `False` otherwise.
add_image_summaries: Enable image summaries. add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
""" """
if is_training: if is_training:
return preprocess_for_train( return preprocess_for_train(
image, output_height, output_width, image,
add_image_summaries=add_image_summaries) output_height,
output_width,
add_image_summaries=add_image_summaries,
use_grayscale=use_grayscale)
else: else:
return preprocess_for_eval( return preprocess_for_eval(
image, output_height, output_width, image,
add_image_summaries=add_image_summaries) output_height,
output_width,
add_image_summaries=add_image_summaries,
use_grayscale=use_grayscale)
...@@ -160,7 +160,8 @@ def preprocess_for_train(image, ...@@ -160,7 +160,8 @@ def preprocess_for_train(image,
fast_mode=True, fast_mode=True,
scope=None, scope=None,
add_image_summaries=True, add_image_summaries=True,
random_crop=True): random_crop=True,
use_grayscale=False):
"""Distort one image for training a network. """Distort one image for training a network.
Distorting images provides a useful technique for augmenting the data Distorting images provides a useful technique for augmenting the data
...@@ -186,6 +187,7 @@ def preprocess_for_train(image, ...@@ -186,6 +187,7 @@ def preprocess_for_train(image,
add_image_summaries: Enable image summaries. add_image_summaries: Enable image summaries.
random_crop: Enable random cropping of images during preprocessing for random_crop: Enable random cropping of images during preprocessing for
training. training.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
3-D float Tensor of distorted image used for training with range [-1, 1]. 3-D float Tensor of distorted image used for training with range [-1, 1].
""" """
...@@ -242,6 +244,9 @@ def preprocess_for_train(image, ...@@ -242,6 +244,9 @@ def preprocess_for_train(image,
lambda x, ordering: distort_color(x, ordering, fast_mode), lambda x, ordering: distort_color(x, ordering, fast_mode),
num_cases=num_distort_cases) num_cases=num_distort_cases)
if use_grayscale:
distorted_image = tf.image.rgb_to_grayscale(distorted_image)
if add_image_summaries: if add_image_summaries:
tf.summary.image('final_distorted_image', tf.summary.image('final_distorted_image',
tf.expand_dims(distorted_image, 0)) tf.expand_dims(distorted_image, 0))
...@@ -255,7 +260,8 @@ def preprocess_for_eval(image, ...@@ -255,7 +260,8 @@ def preprocess_for_eval(image,
width, width,
central_fraction=0.875, central_fraction=0.875,
scope=None, scope=None,
central_crop=True): central_crop=True,
use_grayscale=False):
"""Prepare one image for evaluation. """Prepare one image for evaluation.
If height and width are specified it would output an image with that size by If height and width are specified it would output an image with that size by
...@@ -275,12 +281,15 @@ def preprocess_for_eval(image, ...@@ -275,12 +281,15 @@ def preprocess_for_eval(image,
scope: Optional scope for name_scope. scope: Optional scope for name_scope.
central_crop: Enable central cropping of images during preprocessing for central_crop: Enable central cropping of images during preprocessing for
evaluation. evaluation.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
3-D float Tensor of prepared image. 3-D float Tensor of prepared image.
""" """
with tf.name_scope(scope, 'eval_image', [image, height, width]): with tf.name_scope(scope, 'eval_image', [image, height, width]):
if image.dtype != tf.float32: if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.image.convert_image_dtype(image, dtype=tf.float32)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
# Crop the central region of the image with an area containing 87.5% of # Crop the central region of the image with an area containing 87.5% of
# the original image. # the original image.
if central_crop and central_fraction: if central_crop and central_fraction:
...@@ -304,7 +313,8 @@ def preprocess_image(image, ...@@ -304,7 +313,8 @@ def preprocess_image(image,
bbox=None, bbox=None,
fast_mode=True, fast_mode=True,
add_image_summaries=True, add_image_summaries=True,
crop_image=True): crop_image=True,
use_grayscale=False):
"""Pre-process one image for training or evaluation. """Pre-process one image for training or evaluation.
Args: Args:
...@@ -324,6 +334,7 @@ def preprocess_image(image, ...@@ -324,6 +334,7 @@ def preprocess_image(image,
add_image_summaries: Enable image summaries. add_image_summaries: Enable image summaries.
crop_image: Whether to enable cropping of images during preprocessing for crop_image: Whether to enable cropping of images during preprocessing for
both training and evaluation. both training and evaluation.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
3-D float Tensor containing an appropriately scaled image 3-D float Tensor containing an appropriately scaled image
...@@ -339,6 +350,12 @@ def preprocess_image(image, ...@@ -339,6 +350,12 @@ def preprocess_image(image,
bbox, bbox,
fast_mode, fast_mode,
add_image_summaries=add_image_summaries, add_image_summaries=add_image_summaries,
random_crop=crop_image) random_crop=crop_image,
use_grayscale=use_grayscale)
else: else:
return preprocess_for_eval(image, height, width, central_crop=crop_image) return preprocess_for_eval(
image,
height,
width,
central_crop=crop_image,
use_grayscale=use_grayscale)
...@@ -23,7 +23,11 @@ import tensorflow as tf ...@@ -23,7 +23,11 @@ import tensorflow as tf
slim = tf.contrib.slim slim = tf.contrib.slim
def preprocess_image(image, output_height, output_width, is_training): def preprocess_image(image,
output_height,
output_width,
is_training,
use_grayscale=False):
"""Preprocesses the given image. """Preprocesses the given image.
Args: Args:
...@@ -32,11 +36,15 @@ def preprocess_image(image, output_height, output_width, is_training): ...@@ -32,11 +36,15 @@ def preprocess_image(image, output_height, output_width, is_training):
output_width: The width of the image after preprocessing. output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and is_training: `True` if we're preprocessing the image for training and
`False` otherwise. `False` otherwise.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
""" """
del is_training # Unused argument
image = tf.to_float(image) image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
image = tf.image.resize_image_with_crop_or_pad( image = tf.image.resize_image_with_crop_or_pad(
image, output_width, output_height) image, output_width, output_height)
image = tf.subtract(image, 128.0) image = tf.subtract(image, 128.0)
......
...@@ -28,13 +28,14 @@ from preprocessing import vgg_preprocessing ...@@ -28,13 +28,14 @@ from preprocessing import vgg_preprocessing
slim = tf.contrib.slim slim = tf.contrib.slim
def get_preprocessing(name, is_training=False): def get_preprocessing(name, is_training=False, use_grayscale=False):
"""Returns preprocessing_fn(image, height, width, **kwargs). """Returns preprocessing_fn(image, height, width, **kwargs).
Args: Args:
name: The name of the preprocessing function. name: The name of the preprocessing function.
is_training: `True` if the model is being used for training and `False` is_training: `True` if the model is being used for training and `False`
otherwise. otherwise.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
preprocessing_fn: A function that preprocessing a single image (pre-batch). preprocessing_fn: A function that preprocessing a single image (pre-batch).
...@@ -80,6 +81,11 @@ def get_preprocessing(name, is_training=False): ...@@ -80,6 +81,11 @@ def get_preprocessing(name, is_training=False):
def preprocessing_fn(image, output_height, output_width, **kwargs): def preprocessing_fn(image, output_height, output_width, **kwargs):
return preprocessing_fn_map[name].preprocess_image( return preprocessing_fn_map[name].preprocess_image(
image, output_height, output_width, is_training=is_training, **kwargs) image,
output_height,
output_width,
is_training=is_training,
use_grayscale=use_grayscale,
**kwargs)
return preprocessing_fn return preprocessing_fn
...@@ -287,7 +287,8 @@ def preprocess_for_train(image, ...@@ -287,7 +287,8 @@ def preprocess_for_train(image,
output_height, output_height,
output_width, output_width,
resize_side_min=_RESIZE_SIDE_MIN, resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX): resize_side_max=_RESIZE_SIDE_MAX,
use_grayscale=False):
"""Preprocesses the given image for training. """Preprocesses the given image for training.
Note that the actual resizing scale is sampled from Note that the actual resizing scale is sampled from
...@@ -301,6 +302,7 @@ def preprocess_for_train(image, ...@@ -301,6 +302,7 @@ def preprocess_for_train(image,
aspect-preserving resizing. aspect-preserving resizing.
resize_side_max: The upper bound for the smallest side of the image for resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing. aspect-preserving resizing.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
...@@ -312,11 +314,17 @@ def preprocess_for_train(image, ...@@ -312,11 +314,17 @@ def preprocess_for_train(image,
image = _random_crop([image], output_height, output_width)[0] image = _random_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3]) image.set_shape([output_height, output_width, 3])
image = tf.to_float(image) image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
image = tf.image.random_flip_left_right(image) image = tf.image.random_flip_left_right(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_for_eval(image, output_height, output_width, resize_side): def preprocess_for_eval(image,
output_height,
output_width,
resize_side,
use_grayscale=False):
"""Preprocesses the given image for evaluation. """Preprocesses the given image for evaluation.
Args: Args:
...@@ -324,6 +332,7 @@ def preprocess_for_eval(image, output_height, output_width, resize_side): ...@@ -324,6 +332,7 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
output_height: The height of the image after preprocessing. output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing. output_width: The width of the image after preprocessing.
resize_side: The smallest side of the image for aspect-preserving resizing. resize_side: The smallest side of the image for aspect-preserving resizing.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
...@@ -332,12 +341,18 @@ def preprocess_for_eval(image, output_height, output_width, resize_side): ...@@ -332,12 +341,18 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
image = _central_crop([image], output_height, output_width)[0] image = _central_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3]) image.set_shape([output_height, output_width, 3])
image = tf.to_float(image) image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_image(image, output_height, output_width, is_training=False, def preprocess_image(image,
output_height,
output_width,
is_training=False,
resize_side_min=_RESIZE_SIDE_MIN, resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX): resize_side_max=_RESIZE_SIDE_MAX,
use_grayscale=False):
"""Preprocesses the given image. """Preprocesses the given image.
Args: Args:
...@@ -353,13 +368,15 @@ def preprocess_image(image, output_height, output_width, is_training=False, ...@@ -353,13 +368,15 @@ def preprocess_image(image, output_height, output_width, is_training=False,
aspect-preserving resizing. If `is_training` is `False`, this value is aspect-preserving resizing. If `is_training` is `False`, this value is
ignored. Otherwise, the resize side is sampled from ignored. Otherwise, the resize side is sampled from
[resize_size_min, resize_size_max]. [resize_size_min, resize_size_max].
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns: Returns:
A preprocessed image. A preprocessed image.
""" """
if is_training: if is_training:
return preprocess_for_train(image, output_height, output_width, return preprocess_for_train(image, output_height, output_width,
resize_side_min, resize_side_max) resize_side_min, resize_side_max,
use_grayscale)
else: else:
return preprocess_for_eval(image, output_height, output_width, return preprocess_for_eval(image, output_height, output_width,
resize_side_min) resize_side_min, use_grayscale)
...@@ -206,6 +206,9 @@ tf.app.flags.DEFINE_integer( ...@@ -206,6 +206,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_integer('max_number_of_steps', None, tf.app.flags.DEFINE_integer('max_number_of_steps', None,
'The maximum number of training steps.') 'The maximum number of training steps.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
##################### #####################
# Fine-Tuning Flags # # Fine-Tuning Flags #
##################### #####################
...@@ -433,7 +436,8 @@ def main(_): ...@@ -433,7 +436,8 @@ def main(_):
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
image_preprocessing_fn = preprocessing_factory.get_preprocessing( image_preprocessing_fn = preprocessing_factory.get_preprocessing(
preprocessing_name, preprocessing_name,
is_training=True) is_training=True,
use_grayscale=FLAGS.use_grayscale)
############################################################## ##############################################################
# Create a dataset provider that loads data from the dataset # # Create a dataset provider that loads data from the dataset #
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment