Merged commit includes the following changes: (#7690)

275538818 by Sergio Guadarrama: Support grayscale input images in Slim model training -- 275355841 by Sergio Guadarrama: Fixed cases where tf.TensorShape was constructed with float dimensions This is a prerequisite for making TensorShape and Dimension more strict about the types of their arguments. -- 275131829 by Sergio Guadarrama: updates mobilenet/README.md to be github compatible adds V2+ reference to mobilenet_v1.md file and fixes invalid markdown -- PiperOrigin-RevId: 275538818

Merged commit includes the following changes: (#7690)
275538818 by Sergio Guadarrama: Support grayscale input images in Slim model training -- 275355841 by Sergio Guadarrama: Fixed cases where tf.TensorShape was constructed with float dimensions This is a prerequisite for making TensorShape and Dimension more strict about the types of their arguments. -- 275131829 by Sergio Guadarrama: updates mobilenet/README.md to be github compatible adds V2+ reference to mobilenet_v1.md file and fixes invalid markdown -- PiperOrigin-RevId: 275538818
6766e6dd · pkulzc · Sergio Guadarrama · 800d1dbb · 6766e6dd · 6766e6dd
Commit 6766e6dd authored Oct 21, 2019 by pkulzc Committed by Sergio Guadarrama Oct 21, 2019
11 changed files
--- a/research/slim/datasets/visualwakewords.py
+++ b/research/slim/datasets/visualwakewords.py
@@ -39,7 +39,7 @@ _FILE_PATTERN = '%s.record-*'

 _SPLITS_TO_SIZES = {
    'train': 82783,
-    'validation': 40504,
+    'val': 40504,
 }



--- a/research/slim/eval_image_classifier.py
+++ b/research/slim/eval_image_classifier.py
@@ -82,6 +82,9 @@ tf.app.flags.DEFINE_integer(
 tf.app.flags.DEFINE_bool(
    'quantize', False, 'whether to use quantized graph or not.')

+tf.app.flags.DEFINE_bool('use_grayscale', False,
+                         'Whether to convert input images to grayscale.')
+
 FLAGS = tf.app.flags.FLAGS


@@ -124,7 +127,8 @@ def main(_):
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
-        is_training=False)
+        is_training=False,
+        use_grayscale=FLAGS.use_grayscale)

    eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size


--- a/research/slim/export_inference_graph.py
+++ b/research/slim/export_inference_graph.py
@@ -110,6 +110,9 @@ tf.app.flags.DEFINE_integer(
 tf.app.flags.DEFINE_bool('write_text_graphdef', False,
                         'Whether to write a text version of graphdef.')

+tf.app.flags.DEFINE_bool('use_grayscale', False,
+                         'Whether to convert input images to grayscale.')
+
 FLAGS = tf.app.flags.FLAGS


@@ -128,11 +131,14 @@ def main(_):
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=FLAGS.is_training)
    image_size = FLAGS.image_size or network_fn.default_image_size
+    num_channels = 1 if FLAGS.use_grayscale else 3
    if FLAGS.is_video_model:
-      input_shape = [FLAGS.batch_size, FLAGS.num_frames,
-                     image_size, image_size, 3]
+      input_shape = [
+          FLAGS.batch_size, FLAGS.num_frames, image_size, image_size,
+          num_channels
+      ]
    else:
-      input_shape = [FLAGS.batch_size, image_size, image_size, 3]
+      input_shape = [FLAGS.batch_size, image_size, image_size, num_channels]
    placeholder = tf.placeholder(name='input', dtype=tf.float32,
                                 shape=input_shape)
    network_fn(placeholder)

--- a/research/slim/nets/mobilenet/mobilenet_v2_test.py
+++ b/research/slim/nets/mobilenet/mobilenet_v2_test.py
@@ -157,7 +157,8 @@ class MobilenetV2Test(tf.test.TestCase):
    new_def = copy.deepcopy(mobilenet_v2.V2_DEF)

    def inverse_multiplier(output_params, multiplier):
-      output_params['num_outputs'] /= multiplier
+      output_params['num_outputs'] = int(
+          output_params['num_outputs'] / multiplier)

    new_def['spec'][0] = op(
        slim.conv2d,

--- a/research/slim/nets/nets_factory.py
+++ b/research/slim/nets/nets_factory.py
@@ -128,9 +128,9 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
    network_fn: A function that applies the model to a batch of images. It has
      the following signature:
          net, end_points = network_fn(images)
-      The `images` input is a tensor of shape [batch_size, height, width, 3]
-      with height = width = network_fn.default_image_size. (The permissibility
-      and treatment of other sizes depends on the network_fn.)
+      The `images` input is a tensor of shape [batch_size, height, width, 3 or
+       1] with height = width = network_fn.default_image_size. (The
+      permissibility and treatment of other sizes depends on the network_fn.)
      The returned `end_points` are a dictionary of intermediate activations.
      The returned `net` is the topmost layer, depending on `num_classes`:
      If `num_classes` was a non-zero integer, `net` is a logits tensor

--- a/research/slim/preprocessing/cifarnet_preprocessing.py
+++ b/research/slim/preprocessing/cifarnet_preprocessing.py
@@ -31,7 +31,8 @@ def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING,
-                         add_image_summaries=True):
+                         add_image_summaries=True,
+                         use_grayscale=False):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
@@ -43,6 +44,7 @@ def preprocess_for_train(image,
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.
    add_image_summaries: Enable image summaries.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
@@ -52,6 +54,8 @@ def preprocess_for_train(image,

  # Transform the image to floats.
  image = tf.to_float(image)
+  if use_grayscale:
+    image = tf.image.rgb_to_grayscale(image)
  if padding > 0:
    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  # Randomly crop a [height, width] section of the image.
@@ -74,8 +78,11 @@ def preprocess_for_train(image,
  return tf.image.per_image_standardization(distorted_image)


-def preprocess_for_eval(image, output_height, output_width,
-                        add_image_summaries=True):
+def preprocess_for_eval(image,
+                        output_height,
+                        output_width,
+                        add_image_summaries=True,
+                        use_grayscale=False):
  """Preprocesses the given image for evaluation.

  Args:
@@ -83,6 +90,7 @@ def preprocess_for_eval(image, output_height, output_width,
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    add_image_summaries: Enable image summaries.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
@@ -91,6 +99,8 @@ def preprocess_for_eval(image, output_height, output_width,
    tf.summary.image('image', tf.expand_dims(image, 0))
  # Transform the image to floats.
  image = tf.to_float(image)
+  if use_grayscale:
+    image = tf.image.rgb_to_grayscale(image)

  # Resize and crop if needed.
  resized_image = tf.image.resize_image_with_crop_or_pad(image,
@@ -103,8 +113,12 @@ def preprocess_for_eval(image, output_height, output_width,
  return tf.image.per_image_standardization(resized_image)


-def preprocess_image(image, output_height, output_width, is_training=False,
-                     add_image_summaries=True):
+def preprocess_image(image,
+                     output_height,
+                     output_width,
+                     is_training=False,
+                     add_image_summaries=True,
+                     use_grayscale=False):
  """Preprocesses the given image.

  Args:
@@ -114,15 +128,22 @@ def preprocess_image(image, output_height, output_width, is_training=False,
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    add_image_summaries: Enable image summaries.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
  """
  if is_training:
    return preprocess_for_train(
-        image, output_height, output_width,
-        add_image_summaries=add_image_summaries)
+        image,
+        output_height,
+        output_width,
+        add_image_summaries=add_image_summaries,
+        use_grayscale=use_grayscale)
  else:
    return preprocess_for_eval(
-        image, output_height, output_width,
-        add_image_summaries=add_image_summaries)
+        image,
+        output_height,
+        output_width,
+        add_image_summaries=add_image_summaries,
+        use_grayscale=use_grayscale)
--- a/research/slim/preprocessing/inception_preprocessing.py
+++ b/research/slim/preprocessing/inception_preprocessing.py
@@ -160,7 +160,8 @@ def preprocess_for_train(image,
                         fast_mode=True,
                         scope=None,
                         add_image_summaries=True,
-                         random_crop=True):
+                         random_crop=True,
+                         use_grayscale=False):
  """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
@@ -186,6 +187,7 @@ def preprocess_for_train(image,
    add_image_summaries: Enable image summaries.
    random_crop: Enable random cropping of images during preprocessing for
      training.
+    use_grayscale: Whether to convert the image from RGB to grayscale.
  Returns:
    3-D float Tensor of distorted image used for training with range [-1, 1].
  """
@@ -242,6 +244,9 @@ def preprocess_for_train(image,
        lambda x, ordering: distort_color(x, ordering, fast_mode),
        num_cases=num_distort_cases)

+    if use_grayscale:
+      distorted_image = tf.image.rgb_to_grayscale(distorted_image)
+
    if add_image_summaries:
      tf.summary.image('final_distorted_image',
                       tf.expand_dims(distorted_image, 0))
@@ -255,7 +260,8 @@ def preprocess_for_eval(image,
                        width,
                        central_fraction=0.875,
                        scope=None,
-                        central_crop=True):
+                        central_crop=True,
+                        use_grayscale=False):
  """Prepare one image for evaluation.

  If height and width are specified it would output an image with that size by
@@ -275,12 +281,15 @@ def preprocess_for_eval(image,
    scope: Optional scope for name_scope.
    central_crop: Enable central cropping of images during preprocessing for
      evaluation.
+    use_grayscale: Whether to convert the image from RGB to grayscale.
  Returns:
    3-D float Tensor of prepared image.
  """
  with tf.name_scope(scope, 'eval_image', [image, height, width]):
    if image.dtype != tf.float32:
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+    if use_grayscale:
+      image = tf.image.rgb_to_grayscale(image)
    # Crop the central region of the image with an area containing 87.5% of
    # the original image.
    if central_crop and central_fraction:
@@ -304,7 +313,8 @@ def preprocess_image(image,
                     bbox=None,
                     fast_mode=True,
                     add_image_summaries=True,
-                     crop_image=True):
+                     crop_image=True,
+                     use_grayscale=False):
  """Pre-process one image for training or evaluation.

  Args:
@@ -324,6 +334,7 @@ def preprocess_image(image,
    add_image_summaries: Enable image summaries.
    crop_image: Whether to enable cropping of images during preprocessing for
      both training and evaluation.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    3-D float Tensor containing an appropriately scaled image
@@ -339,6 +350,12 @@ def preprocess_image(image,
        bbox,
        fast_mode,
        add_image_summaries=add_image_summaries,
-        random_crop=crop_image)
+        random_crop=crop_image,
+        use_grayscale=use_grayscale)
  else:
-    return preprocess_for_eval(image, height, width, central_crop=crop_image)
+    return preprocess_for_eval(
+        image,
+        height,
+        width,
+        central_crop=crop_image,
+        use_grayscale=use_grayscale)
--- a/research/slim/preprocessing/lenet_preprocessing.py
+++ b/research/slim/preprocessing/lenet_preprocessing.py
@@ -23,7 +23,11 @@ import tensorflow as tf
 slim = tf.contrib.slim


-def preprocess_image(image, output_height, output_width, is_training):
+def preprocess_image(image,
+                     output_height,
+                     output_width,
+                     is_training,
+                     use_grayscale=False):
  """Preprocesses the given image.

  Args:
@@ -32,11 +36,15 @@ def preprocess_image(image, output_height, output_width, is_training):
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
  """
+  del is_training  # Unused argument
  image = tf.to_float(image)
+  if use_grayscale:
+    image = tf.image.rgb_to_grayscale(image)
  image = tf.image.resize_image_with_crop_or_pad(
      image, output_width, output_height)
  image = tf.subtract(image, 128.0)

--- a/research/slim/preprocessing/preprocessing_factory.py
+++ b/research/slim/preprocessing/preprocessing_factory.py
@@ -28,13 +28,14 @@ from preprocessing import vgg_preprocessing
 slim = tf.contrib.slim


-def get_preprocessing(name, is_training=False):
+def get_preprocessing(name, is_training=False, use_grayscale=False):
  """Returns preprocessing_fn(image, height, width, **kwargs).

  Args:
    name: The name of the preprocessing function.
    is_training: `True` if the model is being used for training and `False`
      otherwise.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    preprocessing_fn: A function that preprocessing a single image (pre-batch).
@@ -80,6 +81,11 @@ def get_preprocessing(name, is_training=False):

  def preprocessing_fn(image, output_height, output_width, **kwargs):
    return preprocessing_fn_map[name].preprocess_image(
-        image, output_height, output_width, is_training=is_training, **kwargs)
+        image,
+        output_height,
+        output_width,
+        is_training=is_training,
+        use_grayscale=use_grayscale,
+        **kwargs)

  return preprocessing_fn
--- a/research/slim/preprocessing/vgg_preprocessing.py
+++ b/research/slim/preprocessing/vgg_preprocessing.py
@@ -287,7 +287,8 @@ def preprocess_for_train(image,
                         output_height,
                         output_width,
                         resize_side_min=_RESIZE_SIDE_MIN,
-                         resize_side_max=_RESIZE_SIDE_MAX):
+                         resize_side_max=_RESIZE_SIDE_MAX,
+                         use_grayscale=False):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
@@ -301,6 +302,7 @@ def preprocess_for_train(image,
      aspect-preserving resizing.
    resize_side_max: The upper bound for the smallest side of the image for
      aspect-preserving resizing.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
@@ -312,11 +314,17 @@ def preprocess_for_train(image,
  image = _random_crop([image], output_height, output_width)[0]
  image.set_shape([output_height, output_width, 3])
  image = tf.to_float(image)
+  if use_grayscale:
+    image = tf.image.rgb_to_grayscale(image)
  image = tf.image.random_flip_left_right(image)
  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])


-def preprocess_for_eval(image, output_height, output_width, resize_side):
+def preprocess_for_eval(image,
+                        output_height,
+                        output_width,
+                        resize_side,
+                        use_grayscale=False):
  """Preprocesses the given image for evaluation.

  Args:
@@ -324,6 +332,7 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    resize_side: The smallest side of the image for aspect-preserving resizing.
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
@@ -332,12 +341,18 @@ def preprocess_for_eval(image, output_height, output_width, resize_side):
  image = _central_crop([image], output_height, output_width)[0]
  image.set_shape([output_height, output_width, 3])
  image = tf.to_float(image)
+  if use_grayscale:
+    image = tf.image.rgb_to_grayscale(image)
  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])


-def preprocess_image(image, output_height, output_width, is_training=False,
+def preprocess_image(image,
+                     output_height,
+                     output_width,
+                     is_training=False,
                     resize_side_min=_RESIZE_SIDE_MIN,
-                     resize_side_max=_RESIZE_SIDE_MAX):
+                     resize_side_max=_RESIZE_SIDE_MAX,
+                     use_grayscale=False):
  """Preprocesses the given image.

  Args:
@@ -353,13 +368,15 @@ def preprocess_image(image, output_height, output_width, is_training=False,
      aspect-preserving resizing. If `is_training` is `False`, this value is
      ignored. Otherwise, the resize side is sampled from
        [resize_size_min, resize_size_max].
+    use_grayscale: Whether to convert the image from RGB to grayscale.

  Returns:
    A preprocessed image.
  """
  if is_training:
    return preprocess_for_train(image, output_height, output_width,
-                                resize_side_min, resize_side_max)
+                                resize_side_min, resize_side_max,
+                                use_grayscale)
  else:
    return preprocess_for_eval(image, output_height, output_width,
-                               resize_side_min)
+                               resize_side_min, use_grayscale)
--- a/research/slim/train_image_classifier.py
+++ b/research/slim/train_image_classifier.py
@@ -206,6 +206,9 @@ tf.app.flags.DEFINE_integer(
 tf.app.flags.DEFINE_integer('max_number_of_steps', None,
                            'The maximum number of training steps.')

+tf.app.flags.DEFINE_bool('use_grayscale', False,
+                         'Whether to convert input images to grayscale.')
+
 #####################
 # Fine-Tuning Flags #
 #####################
@@ -433,7 +436,8 @@ def main(_):
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
-        is_training=True)
+        is_training=True,
+        use_grayscale=FLAGS.use_grayscale)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #