Commit 6766e6dd authored by pkulzc's avatar pkulzc Committed by Sergio Guadarrama
Browse files

Merged commit includes the following changes: (#7690)

275538818  by Sergio Guadarrama:

    Support grayscale input images in Slim model training

--
275355841  by Sergio Guadarrama:

    Fixed cases where tf.TensorShape was constructed with float dimensions

    This is a prerequisite for making TensorShape and Dimension more strict
    about the types of their arguments.

--
275131829  by Sergio Guadarrama:

    updates mobilenet/README.md to be github compatible adds V2+ reference to mobilenet_v1.md file and fixes invalid markdown

--

PiperOrigin-RevId: 275538818
parent 800d1dbb
......@@ -39,7 +39,7 @@ _FILE_PATTERN = '%s.record-*'
_SPLITS_TO_SIZES = {
'train': 82783,
'validation': 40504,
'val': 40504,
}
......
......@@ -82,6 +82,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_bool(
'quantize', False, 'whether to use quantized graph or not.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS
......@@ -124,7 +127,8 @@ def main(_):
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
image_preprocessing_fn = preprocessing_factory.get_preprocessing(
preprocessing_name,
is_training=False)
is_training=False,
use_grayscale=FLAGS.use_grayscale)
eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
......
......@@ -110,6 +110,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_bool('write_text_graphdef', False,
'Whether to write a text version of graphdef.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS
......@@ -128,11 +131,14 @@ def main(_):
num_classes=(dataset.num_classes - FLAGS.labels_offset),
is_training=FLAGS.is_training)
image_size = FLAGS.image_size or network_fn.default_image_size
num_channels = 1 if FLAGS.use_grayscale else 3
if FLAGS.is_video_model:
input_shape = [FLAGS.batch_size, FLAGS.num_frames,
image_size, image_size, 3]
input_shape = [
FLAGS.batch_size, FLAGS.num_frames, image_size, image_size,
num_channels
]
else:
input_shape = [FLAGS.batch_size, image_size, image_size, 3]
input_shape = [FLAGS.batch_size, image_size, image_size, num_channels]
placeholder = tf.placeholder(name='input', dtype=tf.float32,
shape=input_shape)
network_fn(placeholder)
......
......@@ -157,7 +157,8 @@ class MobilenetV2Test(tf.test.TestCase):
new_def = copy.deepcopy(mobilenet_v2.V2_DEF)
def inverse_multiplier(output_params, multiplier):
output_params['num_outputs'] /= multiplier
output_params['num_outputs'] = int(
output_params['num_outputs'] / multiplier)
new_def['spec'][0] = op(
slim.conv2d,
......
......@@ -128,9 +128,9 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
network_fn: A function that applies the model to a batch of images. It has
the following signature:
net, end_points = network_fn(images)
The `images` input is a tensor of shape [batch_size, height, width, 3]
with height = width = network_fn.default_image_size. (The permissibility
and treatment of other sizes depends on the network_fn.)
The `images` input is a tensor of shape [batch_size, height, width, 3 or
1] with height = width = network_fn.default_image_size. (The
permissibility and treatment of other sizes depends on the network_fn.)
The returned `end_points` are a dictionary of intermediate activations.
The returned `net` is the topmost layer, depending on `num_classes`:
If `num_classes` was a non-zero integer, `net` is a logits tensor
......
......@@ -31,7 +31,8 @@ def preprocess_for_train(image,
output_height,
output_width,
padding=_PADDING,
add_image_summaries=True):
add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
......@@ -43,6 +44,7 @@ def preprocess_for_train(image,
output_width: The width of the image after preprocessing.
padding: The amound of padding before and after each dimension of the image.
add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
......@@ -52,6 +54,8 @@ def preprocess_for_train(image,
# Transform the image to floats.
image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
if padding > 0:
image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
# Randomly crop a [height, width] section of the image.
......@@ -74,8 +78,11 @@ def preprocess_for_train(image,
return tf.image.per_image_standardization(distorted_image)
def preprocess_for_eval(image, output_height, output_width,
add_image_summaries=True):
def preprocess_for_eval(image,
output_height,
output_width,
add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image for evaluation.
Args:
......@@ -83,6 +90,7 @@ def preprocess_for_eval(image, output_height, output_width,
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
......@@ -91,6 +99,8 @@ def preprocess_for_eval(image, output_height, output_width,
tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
# Resize and crop if needed.
resized_image = tf.image.resize_image_with_crop_or_pad(image,
......@@ -103,8 +113,12 @@ def preprocess_for_eval(image, output_height, output_width,
return tf.image.per_image_standardization(resized_image)
def preprocess_image(image, output_height, output_width, is_training=False,
add_image_summaries=True):
def preprocess_image(image,
output_height,
output_width,
is_training=False,
add_image_summaries=True,
use_grayscale=False):
"""Preprocesses the given image.
Args:
......@@ -114,15 +128,22 @@ def preprocess_image(image, output_height, output_width, is_training=False,
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
add_image_summaries: Enable image summaries.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
"""
if is_training:
return preprocess_for_train(
image, output_height, output_width,
add_image_summaries=add_image_summaries)
image,
output_height,
output_width,
add_image_summaries=add_image_summaries,
use_grayscale=use_grayscale)
else:
return preprocess_for_eval(
image, output_height, output_width,
add_image_summaries=add_image_summaries)
image,
output_height,
output_width,
add_image_summaries=add_image_summaries,
use_grayscale=use_grayscale)
......@@ -160,7 +160,8 @@ def preprocess_for_train(image,
fast_mode=True,
scope=None,
add_image_summaries=True,
random_crop=True):
random_crop=True,
use_grayscale=False):
"""Distort one image for training a network.
Distorting images provides a useful technique for augmenting the data
......@@ -186,6 +187,7 @@ def preprocess_for_train(image,
add_image_summaries: Enable image summaries.
random_crop: Enable random cropping of images during preprocessing for
training.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
3-D float Tensor of distorted image used for training with range [-1, 1].
"""
......@@ -242,6 +244,9 @@ def preprocess_for_train(image,
lambda x, ordering: distort_color(x, ordering, fast_mode),
num_cases=num_distort_cases)
if use_grayscale:
distorted_image = tf.image.rgb_to_grayscale(distorted_image)
if add_image_summaries:
tf.summary.image('final_distorted_image',
tf.expand_dims(distorted_image, 0))
......@@ -255,7 +260,8 @@ def preprocess_for_eval(image,
width,
central_fraction=0.875,
scope=None,
central_crop=True):
central_crop=True,
use_grayscale=False):
"""Prepare one image for evaluation.
If height and width are specified it would output an image with that size by
......@@ -275,12 +281,15 @@ def preprocess_for_eval(image,
scope: Optional scope for name_scope.
central_crop: Enable central cropping of images during preprocessing for
evaluation.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
3-D float Tensor of prepared image.
"""
with tf.name_scope(scope, 'eval_image', [image, height, width]):
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
if central_crop and central_fraction:
......@@ -304,7 +313,8 @@ def preprocess_image(image,
bbox=None,
fast_mode=True,
add_image_summaries=True,
crop_image=True):
crop_image=True,
use_grayscale=False):
"""Pre-process one image for training or evaluation.
Args:
......@@ -324,6 +334,7 @@ def preprocess_image(image,
add_image_summaries: Enable image summaries.
crop_image: Whether to enable cropping of images during preprocessing for
both training and evaluation.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
3-D float Tensor containing an appropriately scaled image
......@@ -339,6 +350,12 @@ def preprocess_image(image,
bbox,
fast_mode,
add_image_summaries=add_image_summaries,
random_crop=crop_image)
random_crop=crop_image,
use_grayscale=use_grayscale)
else:
return preprocess_for_eval(image, height, width, central_crop=crop_image)
return preprocess_for_eval(
image,
height,
width,
central_crop=crop_image,
use_grayscale=use_grayscale)
......@@ -23,7 +23,11 @@ import tensorflow as tf
slim = tf.contrib.slim
def preprocess_image(image, output_height, output_width, is_training):
def preprocess_image(image,
output_height,
output_width,
is_training,
use_grayscale=False):
"""Preprocesses the given image.
Args:
......@@ -32,11 +36,15 @@ def preprocess_image(image, output_height, output_width, is_training):
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
"""
del is_training # Unused argument
image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
image = tf.image.resize_image_with_crop_or_pad(
image, output_width, output_height)
image = tf.subtract(image, 128.0)
......
......@@ -28,13 +28,14 @@ from preprocessing import vgg_preprocessing
slim = tf.contrib.slim
def get_preprocessing(name, is_training=False):
def get_preprocessing(name, is_training=False, use_grayscale=False):
"""Returns preprocessing_fn(image, height, width, **kwargs).
Args:
name: The name of the preprocessing function.
is_training: `True` if the model is being used for training and `False`
otherwise.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
preprocessing_fn: A function that preprocessing a single image (pre-batch).
......@@ -80,6 +81,11 @@ def get_preprocessing(name, is_training=False):
def preprocessing_fn(image, output_height, output_width, **kwargs):
return preprocessing_fn_map[name].preprocess_image(
image, output_height, output_width, is_training=is_training, **kwargs)
image,
output_height,
output_width,
is_training=is_training,
use_grayscale=use_grayscale,
**kwargs)
return preprocessing_fn
......@@ -287,7 +287,8 @@ def preprocess_for_train(image,
output_height,
output_width,
resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX):
resize_side_max=_RESIZE_SIDE_MAX,
use_grayscale=False):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
......@@ -301,6 +302,7 @@ def preprocess_for_train(image,
aspect-preserving resizing.
resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
......@@ -312,11 +314,17 @@ def preprocess_for_train(image,
image = _random_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
image = tf.image.random_flip_left_right(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_for_eval(image, output_height, output_width, resize_side):
def preprocess_for_eval(image,
output_height,
output_width,
resize_side,
use_grayscale=False):
"""Preprocesses the given image for evaluation.
Args:
......@@ -324,6 +332,7 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side: The smallest side of the image for aspect-preserving resizing.
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
......@@ -332,12 +341,18 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
image = _central_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.to_float(image)
if use_grayscale:
image = tf.image.rgb_to_grayscale(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_image(image, output_height, output_width, is_training=False,
def preprocess_image(image,
output_height,
output_width,
is_training=False,
resize_side_min=_RESIZE_SIDE_MIN,
resize_side_max=_RESIZE_SIDE_MAX):
resize_side_max=_RESIZE_SIDE_MAX,
use_grayscale=False):
"""Preprocesses the given image.
Args:
......@@ -353,13 +368,15 @@ def preprocess_image(image, output_height, output_width, is_training=False,
aspect-preserving resizing. If `is_training` is `False`, this value is
ignored. Otherwise, the resize side is sampled from
[resize_size_min, resize_size_max].
use_grayscale: Whether to convert the image from RGB to grayscale.
Returns:
A preprocessed image.
"""
if is_training:
return preprocess_for_train(image, output_height, output_width,
resize_side_min, resize_side_max)
resize_side_min, resize_side_max,
use_grayscale)
else:
return preprocess_for_eval(image, output_height, output_width,
resize_side_min)
resize_side_min, use_grayscale)
......@@ -206,6 +206,9 @@ tf.app.flags.DEFINE_integer(
tf.app.flags.DEFINE_integer('max_number_of_steps', None,
'The maximum number of training steps.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
#####################
# Fine-Tuning Flags #
#####################
......@@ -433,7 +436,8 @@ def main(_):
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
image_preprocessing_fn = preprocessing_factory.get_preprocessing(
preprocessing_name,
is_training=True)
is_training=True,
use_grayscale=FLAGS.use_grayscale)
##############################################################
# Create a dataset provider that loads data from the dataset #
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment