Add VisualWakeWords Dataset to Slim dataset_factory (#6661)

* Merged commit includes the following changes: 244869387 by Sergio Guadarrama: This CL adds script/code to generate Visual WakeWords Dataset annotation files and TF records starting from COCO dataset. -- 244866660 by Sergio Guadarrama: Add VisualWakeWords Dataset to Slim dataset_factory to train MobileNets on it. -- 244836000 by Sergio Guadarrama: n/a -- 244104396 by Sergio Guadarrama: Add an option whether to enable / disable image cropping in inception_preprocessing. -- 242040128 by Sergio Guadarrama: Internal change 241793677 by Sergio Guadarrama: Internal change 241073081 by Sergio Guadarrama: Internal change 240131189 by Sergio Guadarrama: Internal change PiperOrigin-RevId: 244869387 * Merged commit includes the following changes: 245431876 by Sergio Guadarrama: Internal cleanup -- PiperOrigin-RevId: 245431876 * Merged commit includes the following changes: 245454983 by Sergio Guadarrama: Internal Cleanup -- PiperOrigin-RevId: 245454983

Add VisualWakeWords Dataset to Slim dataset_factory (#6661)
* Merged commit includes the following changes: 244869387 by Sergio Guadarrama: This CL adds script/code to generate Visual WakeWords Dataset annotation files and TF records starting from COCO dataset. -- 244866660 by Sergio Guadarrama: Add VisualWakeWords Dataset to Slim dataset_factory to train MobileNets on it. -- 244836000 by Sergio Guadarrama: n/a -- 244104396 by Sergio Guadarrama: Add an option whether to enable / disable image cropping in inception_preprocessing. -- 242040128 by Sergio Guadarrama: Internal change 241793677 by Sergio Guadarrama: Internal change 241073081 by Sergio Guadarrama: Internal change 240131189 by Sergio Guadarrama: Internal change PiperOrigin-RevId: 244869387 * Merged commit includes the following changes: 245431876 by Sergio Guadarrama: Internal cleanup -- PiperOrigin-RevId: 245431876 * Merged commit includes the following changes: 245454983 by Sergio Guadarrama: Internal Cleanup -- PiperOrigin-RevId: 245454983
67c403fc · pkulzc · GitHub · 4a1fba0b · 67c403fc · 67c403fc
Unverified Commit 67c403fc authored Apr 26, 2019 by pkulzc Committed by GitHub Apr 26, 2019
14 changed files
--- a/research/slim/BUILD
+++ b/research/slim/BUILD
@@ -13,6 +13,7 @@ py_library(
    name = "dataset_utils",
    srcs = ["datasets/dataset_utils.py"],
    deps = [
+        "//third_party/py/six",
        # "//tensorflow",
    ],
 )
@@ -34,6 +35,7 @@ sh_binary(
 py_binary(
    name = "build_imagenet_data",
    srcs = ["datasets/build_imagenet_data.py"],
+    python_version = "PY2",
    deps = [
        # "//numpy",
        # "//tensorflow",
@@ -72,6 +74,7 @@ py_library(
 py_binary(
    name = "download_and_convert_data",
    srcs = ["download_and_convert_data.py"],
+    python_version = "PY2",
    deps = [
        ":download_and_convert_cifar10",
        ":download_and_convert_flowers",
@@ -80,6 +83,31 @@ py_binary(
    ],
 )
+sh_binary(
+    name = "download_mscoco",
+    srcs = ["datasets/download_mscoco.sh"],
+)
+py_binary(
+    name = "build_visualwakewords_data",
+    srcs = ["datasets/build_visualwakewords_data.py"],
+    deps = [
+        ":build_visualwakewords_data_lib",
+        # "//tensorflow",
+    ],
+)
+py_library(
+    name = "build_visualwakewords_data_lib",
+    srcs = ["datasets/build_visualwakewords_data_lib.py"],
+    deps = [
+        ":dataset_utils",
+        "//third_party/py/PIL:pil",
+        "//third_party/py/contextlib2",
+        # "//tensorflow",
+    ],
+)
 py_library(
    name = "cifar10",
    srcs = ["datasets/cifar10.py"],
@@ -116,6 +144,15 @@ py_library(
    ],
 )
+py_library(
+    name = "visualwakewords",
+    srcs = ["datasets/visualwakewords.py"],
+    deps = [
+        ":dataset_utils",
+        # "//tensorflow",
+    ],
+)
 py_library(
    name = "dataset_factory",
    srcs = ["datasets/dataset_factory.py"],
@@ -124,6 +161,7 @@ py_library(
        ":flowers",
        ":imagenet",
        ":mnist",
+        ":visualwakewords",
    ],
 )
@@ -138,6 +176,7 @@ py_library(
 py_test(
    name = "model_deploy_test",
    srcs = ["deployment/model_deploy_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":model_deploy",
@@ -227,6 +266,7 @@ py_test(
    name = "alexnet_test",
    size = "medium",
    srcs = ["nets/alexnet_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":alexnet",
@@ -254,6 +294,7 @@ py_library(
 py_test(
    name = "cyclegan_test",
    srcs = ["nets/cyclegan_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -273,6 +314,7 @@ py_library(
 py_test(
    name = "dcgan_test",
    srcs = ["nets/dcgan_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -296,6 +338,7 @@ py_test(
    name = "i3d_test",
    size = "large",
    srcs = ["nets/i3d_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -388,6 +431,7 @@ py_test(
    name = "inception_v1_test",
    size = "large",
    srcs = ["nets/inception_v1_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -401,6 +445,7 @@ py_test(
    name = "inception_v2_test",
    size = "large",
    srcs = ["nets/inception_v2_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -414,6 +459,7 @@ py_test(
    name = "inception_v3_test",
    size = "large",
    srcs = ["nets/inception_v3_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -427,6 +473,7 @@ py_test(
    name = "inception_v4_test",
    size = "large",
    srcs = ["nets/inception_v4_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -439,6 +486,7 @@ py_test(
    name = "inception_resnet_v2_test",
    size = "large",
    srcs = ["nets/inception_resnet_v2_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -476,6 +524,7 @@ py_library(
 py_test(
    name = "mobilenet_v2_test",
    srcs = ["nets/mobilenet/mobilenet_v2_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":mobilenet",
@@ -495,6 +544,7 @@ py_test(
    name = "mobilenet_v1_test",
    size = "large",
    srcs = ["nets/mobilenet_v1_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -507,6 +557,7 @@ py_test(
 py_binary(
    name = "mobilenet_v1_train",
    srcs = ["nets/mobilenet_v1_train.py"],
+    python_version = "PY2",
    deps = [
        ":dataset_factory",
        ":mobilenet_v1",
@@ -518,6 +569,7 @@ py_binary(
 py_binary(
    name = "mobilenet_v1_eval",
    srcs = ["nets/mobilenet_v1_eval.py"],
+    python_version = "PY2",
    deps = [
        ":dataset_factory",
        ":mobilenet_v1",
@@ -549,6 +601,7 @@ py_test(
    name = "nasnet_utils_test",
    size = "medium",
    srcs = ["nets/nasnet/nasnet_utils_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":nasnet_utils",
@@ -560,6 +613,7 @@ py_test(
    name = "nasnet_test",
    size = "large",
    srcs = ["nets/nasnet/nasnet_test.py"],
+    python_version = "PY2",
    shard_count = 10,
    srcs_version = "PY2AND3",
    deps = [
@@ -583,6 +637,7 @@ py_test(
    name = "pnasnet_test",
    size = "large",
    srcs = ["nets/nasnet/pnasnet_test.py"],
+    python_version = "PY2",
    shard_count = 4,
    srcs_version = "PY2AND3",
    deps = [
@@ -604,6 +659,7 @@ py_test(
    name = "overfeat_test",
    size = "medium",
    srcs = ["nets/overfeat_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":overfeat",
@@ -623,6 +679,7 @@ py_library(
 py_test(
    name = "pix2pix_test",
    srcs = ["nets/pix2pix_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":pix2pix",
@@ -653,6 +710,7 @@ py_test(
    name = "resnet_v1_test",
    size = "medium",
    srcs = ["nets/resnet_v1_test.py"],
+    python_version = "PY2",
    shard_count = 2,
    srcs_version = "PY2AND3",
    deps = [
@@ -677,6 +735,7 @@ py_test(
    name = "resnet_v2_test",
    size = "medium",
    srcs = ["nets/resnet_v2_test.py"],
+    python_version = "PY2",
    shard_count = 2,
    srcs_version = "PY2AND3",
    deps = [
@@ -701,6 +760,7 @@ py_test(
    name = "s3dg_test",
    size = "large",
    srcs = ["nets/s3dg_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -722,6 +782,7 @@ py_test(
    name = "vgg_test",
    size = "medium",
    srcs = ["nets/vgg_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    deps = [
        ":vgg",
@@ -742,6 +803,7 @@ py_test(
    name = "nets_factory_test",
    size = "large",
    srcs = ["nets/nets_factory_test.py"],
+    python_version = "PY2",
    shard_count = 3,
    srcs_version = "PY2AND3",
    deps = [
@@ -767,6 +829,7 @@ py_binary(
    srcs = ["train_image_classifier.py"],
    # WARNING: not supported in bazel; will be commented out by copybara.
    # paropts = ["--compress"],
+    python_version = "PY2",
    deps = [
        ":train_image_classifier_lib",
    ],
@@ -786,6 +849,7 @@ py_library(
 py_binary(
    name = "eval_image_classifier",
    srcs = ["eval_image_classifier.py"],
+    python_version = "PY2",
    deps = [
        ":eval_image_classifier_lib",
    ],
@@ -796,6 +860,7 @@ py_binary(
    srcs = ["export_inference_graph.py"],
    # WARNING: not supported in bazel; will be commented out by copybara.
    # paropts = ["--compress"],
+    python_version = "PY2",
    deps = [":export_inference_graph_lib"],
 )
@@ -814,6 +879,7 @@ py_test(
    name = "export_inference_graph_test",
    size = "medium",
    srcs = ["export_inference_graph_test.py"],
+    python_version = "PY2",
    srcs_version = "PY2AND3",
    tags = [
        "manual",

--- a/research/slim/README.md
+++ b/research/slim/README.md
@@ -96,6 +96,7 @@ Flowers|2500 | 2500 | 5 | Various sizes (source: Flickr)
 [Cifar10](https://www.cs.toronto.edu/~kriz/cifar.html) | 60k| 10k | 10 |32x32 color
 [MNIST](http://yann.lecun.com/exdb/mnist/)| 60k | 10k | 10 | 28x28 gray
 [ImageNet](http://www.image-net.org/challenges/LSVRC/2012/)|1.2M| 50k | 1000 | Various sizes
+VisualWakeWords|82783 | 40504 | 2 | Various sizes (source: MS COCO)
 ## Downloading and converting to TFRecord format
@@ -135,6 +136,9 @@ However, for ImageNet, you have to follow the instructions
 [here](https://github.com/tensorflow/models/blob/master/research/inception/README.md#getting-started).
 Note that you first have to sign up for an account at image-net.org.
 Also, the download can take several hours, and could use up to 500GB.
+For the visualwakewords dataset, you need to download the MSCOCO dataset [here](https://github.com/tensorflow/models/blob/master/research/slim/datasets/download_mscoco.sh)
+and build TFRecords with the following instructions
+[here](https://github.com/tensorflow/models/blob/master/research/slim/datasets/build_visualwakewords_data.py).
 ## Creating a TF-Slim Dataset Descriptor.
@@ -148,6 +152,7 @@ for
 [Cifar10](https://github.com/tensorflow/models/blob/master/research/slim/datasets/cifar10.py),
 [ImageNet](https://github.com/tensorflow/models/blob/master/research/slim/datasets/imagenet.py),
 [Flowers](https://github.com/tensorflow/models/blob/master/research/slim/datasets/flowers.py),
+[VisualWakeWords](https://github.com/tensorflow/models/blob/master/research/slim/datasets/visualwakewords.py),
 and
 [MNIST](https://github.com/tensorflow/models/blob/master/research/slim/datasets/mnist.py).
 An example of how to load data using a TF-Slim dataset descriptor using a

--- a/research/slim/datasets/build_imagenet_data.py
+++ b/research/slim/datasets/build_imagenet_data.py
@@ -314,7 +314,7 @@ def _process_image(filename, coder):
    width: integer, image width in pixels.
  """
  # Read the image file.
-  image_data = tf.gfile.FastGFile(filename, 'r').read()
+  image_data = tf.gfile.GFile(filename, 'r').read()
  # Clean the dirty data.
  if _is_png(filename):
@@ -497,8 +497,9 @@ def _find_image_files(data_dir, labels_file):
    labels: list of integer; each integer identifies the ground truth.
  """
  print('Determining list of input files and labels from %s.' % data_dir)
-  challenge_synsets = [l.strip() for l in
+  challenge_synsets = [
-                       tf.gfile.FastGFile(labels_file, 'r').readlines()]
+      l.strip() for l in tf.gfile.GFile(labels_file, 'r').readlines()
+  ]
  labels = []
  filenames = []
@@ -621,7 +622,7 @@ def _build_synset_lookup(imagenet_metadata_file):
    Dictionary of synset to human labels, such as:
      'n02119022' --> 'red fox, Vulpes vulpes'
  """
-  lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines()
+  lines = tf.gfile.GFile(imagenet_metadata_file, 'r').readlines()
  synset_to_human = {}
  for l in lines:
    if l:
@@ -655,7 +656,7 @@ def _build_bounding_box_lookup(bounding_box_file):
    Dictionary mapping image file names to a list of bounding boxes. This list
    contains 0+ bounding boxes.
  """
-  lines = tf.gfile.FastGFile(bounding_box_file, 'r').readlines()
+  lines = tf.gfile.GFile(bounding_box_file, 'r').readlines()
  images_to_bboxes = {}
  num_bbox = 0
  num_image = 0

--- a/research/slim/datasets/build_visualwakewords_data.py
+++ b/research/slim/datasets/build_visualwakewords_data.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Build Visual WakeWords Dataset with images and labels for person/not-person.
+This script generates the Visual WakeWords dataset annotations from
+the raw COCO dataset and converts them to TFRecord.
+Visual WakeWords Dataset derives from the COCO dataset to design tiny models
+classifying two classes, such as person/not-person. The COCO annotations
+are filtered to two classes: foreground_class_of_interest and background
+( for e.g. person and not-person). Bounding boxes for small objects
+with area less than 5% of the image area are filtered out.
+The resulting annotations file has the following fields, where
+the image and categories fields are same as COCO dataset, while the annotation
+field corresponds to the foreground_class_of_interest/background class and
+bounding boxes for the foreground_class_of_interest class.
+  images{"id", "width", "height", "file_name", "license", "flickr_url",
+  "coco_url", "date_captured",}
+  annotations{
+  "image_id", object[{"category_id", "area", "bbox" : [x,y,width,height],}]
+  "count",
+  "label"
+  }
+  categories[{
+  "id", "name", "supercategory",
+  }]
+The TFRecord file contains the following features:
+{ image/height, image/width, image/source_id, image/encoded,
+  image/class/label_text, image/class/label,
+  image/object/class/text,
+  image/object/bbox/ymin, image/object/bbox/xmin, image/object/bbox/ymax,
+  image/object/bbox/xmax, image/object/area
+  image/filename, image/format, image/key/sha256}
+For classification models, you need the image/encoded and image/class/label.
+Please note that this tool creates sharded output files.
+Example usage:
+Add folder tensorflow/models/research/slim to your PYTHONPATH,
+and from this folder, run the following commands:
+    bash download_mscoco.sh path-to-mscoco-dataset
+    TRAIN_IMAGE_DIR="path-to-mscoco-dataset/train2014"
+    VAL_IMAGE_DIR="path-to-mscoco-dataset/val2014"
+    TRAIN_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_train2014.json"
+    VAL_ANNOTATIONS_FILE="path-to-mscoco-dataset/annotations/instances_val2014.json"
+    python datasets/build_visualwakewords_data.py --logtostderr \
+      --train_image_dir="${TRAIN_IMAGE_DIR}" \
+      --val_image_dir="${VAL_IMAGE_DIR}" \
+      --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+      --output_dir="${OUTPUT_DIR}" \
+      --small_object_area_threshold=0.005 \
+      --foreground_class_of_interest='person'
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import tensorflow as tf
+from datasets import build_visualwakewords_data_lib
+flags = tf.app.flags
+tf.flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
+tf.flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
+tf.flags.DEFINE_string('train_annotations_file', '',
+                       'Training annotations JSON file.')
+tf.flags.DEFINE_string('val_annotations_file', '',
+                       'Validation annotations JSON file.')
+tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+tf.flags.DEFINE_float(
+    'small_object_area_threshold', 0.005,
+    'Threshold of fraction of image area below which small'
+    'objects are filtered')
+tf.flags.DEFINE_string(
+    'foreground_class_of_interest', 'person',
+    'Build a binary classifier based on the presence or absence'
+    'of this object in the scene (default is person/not-person)')
+FLAGS = flags.FLAGS
+tf.logging.set_verbosity(tf.logging.INFO)
+def main(unused_argv):
+  # Path to COCO dataset images and annotations
+  assert FLAGS.train_image_dir, '`train_image_dir` missing.'
+  assert FLAGS.val_image_dir, '`val_image_dir` missing.'
+  assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
+  assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
+  visualwakewords_annotations_train = os.path.join(
+      FLAGS.output_dir, 'instances_visualwakewords_train2014.json')
+  visualwakewords_annotations_val = os.path.join(
+      FLAGS.output_dir, 'instances_visualwakewords_val2014.json')
+  visualwakewords_labels_filename = os.path.join(FLAGS.output_dir,
+                                                 'labels.txt')
+  small_object_area_threshold = FLAGS.small_object_area_threshold
+  foreground_class_of_interest = FLAGS.foreground_class_of_interest
+  # Create the Visual WakeWords annotations from COCO annotations
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+  build_visualwakewords_data_lib.create_visual_wakeword_annotations(
+      FLAGS.train_annotations_file, visualwakewords_annotations_train,
+      small_object_area_threshold, foreground_class_of_interest,
+      visualwakewords_labels_filename)
+  build_visualwakewords_data_lib.create_visual_wakeword_annotations(
+      FLAGS.val_annotations_file, visualwakewords_annotations_val,
+      small_object_area_threshold, foreground_class_of_interest,
+      visualwakewords_labels_filename)
+  # Create the TF Records for Visual WakeWords Dataset
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+  train_output_path = os.path.join(FLAGS.output_dir, 'train.record')
+  val_output_path = os.path.join(FLAGS.output_dir, 'val.record')
+  build_visualwakewords_data_lib.create_tf_record_for_visualwakewords_dataset(
+      visualwakewords_annotations_train,
+      FLAGS.train_image_dir,
+      train_output_path,
+      num_shards=100)
+  build_visualwakewords_data_lib.create_tf_record_for_visualwakewords_dataset(
+      visualwakewords_annotations_val,
+      FLAGS.val_image_dir,
+      val_output_path,
+      num_shards=10)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/slim/datasets/build_visualwakewords_data_lib.py
+++ b/research/slim/datasets/build_visualwakewords_data_lib.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Generate Visual Wakewords Dataset.
+    Helper functions to generate the Visual WakeWords dataset. It filters raw
+    COCO annotations file to Visual WakeWords Dataset annotations.
+    The resulting annotations and COCO images are then
+    converted to TF records.
+    See build_visualwakewords_data.py for the sample usage.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import hashlib
+import io
+import json
+import os
+import contextlib2
+import PIL.Image
+import tensorflow as tf
+from datasets import dataset_utils
+tf.logging.set_verbosity(tf.logging.INFO)
+def create_visual_wakeword_annotations(annotations_file,
+                                       visualwakewords_annotations_path,
+                                       small_object_area_threshold,
+                                       foreground_class_of_interest,
+                                       visualwakewords_labels_filename):
+  """Generate visual wakewords annotations file.
+  Loads COCO annotation json files and filters to person/not-person
+  class (or user-specified class) to generate visual wakewords annotations file.
+  Each image is assigned a label 1 or 0. The label 1 is assigned as long
+  as it has at least one foreground_class_of_interest (e.g. person)
+  bounding box greater than 5% of the image area.
+  Args:
+    annotations_file: JSON file containing COCO bounding box annotations
+    visualwakewords_annotations_path: output path to annotations file
+    small_object_area_threshold: threshold on fraction of image area below which
+      small object bounding boxes are filtered
+    foreground_class_of_interest: category from COCO dataset that is filtered by
+      the visual wakewords dataset
+    visualwakewords_labels_filename: The filename to write the visual wakewords
+      label file
+  """
+  # default object of interest is person
+  foreground_class_of_interest_id = 1
+  with tf.gfile.GFile(annotations_file, 'r') as fid:
+    groundtruth_data = json.load(fid)
+    images = groundtruth_data['images']
+    # Create category index
+    category_index = {}
+    for category in groundtruth_data['categories']:
+      if category['name'] == foreground_class_of_interest:
+        foreground_class_of_interest_id = category['id']
+        category_index[category['id']] = category
+    # Create annotations index
+    annotations_index = {}
+    annotations_index_filtered = {}
+    if 'annotations' in groundtruth_data:
+      tf.logging.info(
+          'Found groundtruth annotations. Building annotations index.')
+      for annotation in groundtruth_data['annotations']:
+        image_id = annotation['image_id']
+        if image_id not in annotations_index:
+          annotations_index[image_id] = []
+          annotations_index_filtered[image_id] = []
+        annotations_index[image_id].append(annotation)
+      missing_annotation_count = 0
+      for image in images:
+        image_id = image['id']
+        if image_id not in annotations_index:
+          missing_annotation_count += 1
+          annotations_index[image_id] = []
+          annotations_index_filtered[image_id] = []
+      tf.logging.info('%d images are missing annotations.',
+                      missing_annotation_count)
+    # Create filtered annotations index
+    for idx, image in enumerate(images):
+      if idx % 100 == 0:
+        tf.logging.info('On image %d of %d', idx, len(images))
+      annotations_list = annotations_index[image['id']]
+      annotations_list_filtered = _filter_annotations_list(
+          annotations_list, image, small_object_area_threshold,
+          foreground_class_of_interest_id)
+      annotations_index_filtered[image['id']].append(annotations_list_filtered)
+    # Output Visual WakeWords annotations and labels
+    labels_to_class_names = {0: 'background', 1: foreground_class_of_interest}
+    with open(visualwakewords_labels_filename, 'w') as fp:
+      for label in labels_to_class_names:
+        fp.write(str(label) + ':' + str(labels_to_class_names[label]) + '\n')
+    with open(visualwakewords_annotations_path, 'w') as fp:
+      json.dump(
+          {
+              'images': images,
+              'annotations': annotations_index_filtered,
+              'categories': category_index
+          }, fp)
+def _filter_annotations_list(annotations_list, image,
+                             small_object_area_threshold,
+                             foreground_class_of_interest_id):
+  """Filters COCO annotations_list to visual wakewords annotations_list.
+  Each image is assigned a label 1 or 0. The label 1 is assigned as long
+  as it has at least one foreground_class_of_interest (e.g. person)
+  bounding box greater than 5% of the image area.
+  Args:
+    annotations_list: list of dicts with keys: [ u'id', u'image_id',
+    u'category_id', u'segmentation', u'area', u'bbox' : [x,y,width,height],
+      u'iscrowd']. Notice that bounding box coordinates in the official COCO
+      dataset are given as [x, y, width, height] tuples using absolute
+      coordinates where x, y represent the top-left (0-indexed) corner.
+    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
+      u'width', u'date_captured', u'flickr_url', u'id']
+    small_object_area_threshold: threshold on fraction of image area below which
+      small objects are filtered
+    foreground_class_of_interest_id: category of COCO dataset which visual
+      wakewords filters
+  Returns:
+    filtered_annotations_list: list of dicts with keys: [ u'image_id',
+    u'label', u'category_id', u'count',
+    u'object':[{"category_id", "area", "bbox" : [x,y,width,height],}]
+  """
+  category_ids = []
+  area = []
+  flag_small_object = []
+  num_ann = 0
+  image_height = image['height']
+  image_width = image['width']
+  image_area = image_height * image_width
+  bbox = []
+  # count of filtered object
+  count = 0
+  for object_annotations in annotations_list:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+    category_id = int(object_annotations['category_id'])
+    category_ids.append(category_id)
+    obj_area = object_annotations['area']
+    normalized_object_area = obj_area / image_area
+    # Filter small object bounding boxes
+    if category_id == foreground_class_of_interest_id:
+      if normalized_object_area < small_object_area_threshold:
+        flag_small_object.append(True)
+      else:
+        flag_small_object.append(False)
+        bbox.append({
+            u'bbox': [x, y, width, height],
+            u'area': obj_area,
+            u'category_id': category_id
+        })
+        count = count + 1
+    area.append(obj_area)
+    num_ann = num_ann + 1
+  # Filtered annotations_list with two classes corresponding to
+  # foreground_class_of_interest_id (e.g. person) and
+  # background (e.g. not-person)
+  if (foreground_class_of_interest_id in category_ids) and (
+      False in flag_small_object):
+    return {
+        u'image_id': image['id'],
+        u'label': 1,
+        u'object': bbox,
+        u'count': count
+    }
+  else:
+    return {u'image_id': image['id'], u'label': 0, u'object': [], u'count': 0}
+def create_tf_record_for_visualwakewords_dataset(annotations_file, image_dir,
+                                                 output_path, num_shards):
+  """Loads Visual WakeWords annotations/images and converts to tf.Record format.
+  Args:
+    annotations_file: JSON file containing bounding box annotations.
+    image_dir: Directory containing the image files.
+    output_path: Path to output tf.Record file.
+    num_shards: number of output file shards.
+  """
+  with contextlib2.ExitStack() as tf_record_close_stack, \
+      tf.gfile.GFile(annotations_file, 'r') as fid:
+    output_tfrecords = dataset_utils.open_sharded_output_tfrecords(
+        tf_record_close_stack, output_path, num_shards)
+    groundtruth_data = json.load(fid)
+    images = groundtruth_data['images']
+    category_index = {}
+    for category in groundtruth_data['categories'].values():
+      # if not background class
+      if category['id'] != 0:
+        category_index[category['id']] = category
+    annotations_index = {}
+    if 'annotations' in groundtruth_data:
+      tf.logging.info(
+          'Found groundtruth annotations. Building annotations index.')
+      for annotation in groundtruth_data['annotations'].values():
+        image_id = annotation[0]['image_id']
+        if image_id not in annotations_index:
+          annotations_index[image_id] = []
+        annotations_index[image_id].append(annotation[0])
+    missing_annotation_count = 0
+    for image in images:
+      image_id = image['id']
+      if image_id not in annotations_index:
+        missing_annotation_count += 1
+        annotations_index[image_id] = []
+    tf.logging.info('%d images are missing annotations.',
+                    missing_annotation_count)
+    total_num_annotations_skipped = 0
+    for idx, image in enumerate(images):
+      if idx % 100 == 0:
+        tf.logging.info('On image %d of %d', idx, len(images))
+      annotations_list = annotations_index[image['id']]
+      _, tf_example, num_annotations_skipped = _create_tf_example(
+          image, annotations_list[0], image_dir)
+      total_num_annotations_skipped += num_annotations_skipped
+      shard_idx = idx % num_shards
+      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+    tf.logging.info('Finished writing, skipped %d annotations.',
+                    total_num_annotations_skipped)
+def _create_tf_example(image, annotations_list, image_dir):
+  """Converts image and annotations to a tf.Example proto.
+  Args:
+    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
+      u'width', u'date_captured', u'flickr_url', u'id']
+    annotations_list:
+      list of dicts with keys: [u'image_id', u'bbox', u'label',
+      object[{"category_id", "area", "bbox" : [x,y,width,height],}]]. Notice
+        that bounding box coordinates in the COCO dataset are given as [x, y,
+        width, height] tuples using absolute coordinates where x, y represent
+        the top-left (0-indexed) corner. This function converts to the format
+        that can be used by the Tensorflow Object Detection API (which is [ymin,
+        xmin, ymax, xmax] with coordinates normalized relative to image size).
+    image_dir: directory containing the image files.
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+  full_path = os.path.join(image_dir, filename)
+  with tf.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+  xmin = []
+  xmax = []
+  ymin = []
+  ymax = []
+  category_ids = []
+  area = []
+  num_annotations_skipped = 0
+  label = annotations_list['label']
+  for object_annotations in annotations_list['object']:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+    if width <= 0 or height <= 0:
+      num_annotations_skipped += 1
+      continue
+    if x + width > image_width or y + height > image_height:
+      num_annotations_skipped += 1
+      continue
+    xmin.append(float(x) / image_width)
+    xmax.append(float(x + width) / image_width)
+    ymin.append(float(y) / image_height)
+    ymax.append(float(y + height) / image_height)
+    category_id = int(object_annotations['category_id'])
+    category_ids.append(category_id)
+    area.append(object_annotations['area'])
+  feature_dict = {
+      'image/height':
+          dataset_utils.int64_feature(image_height),
+      'image/width':
+          dataset_utils.int64_feature(image_width),
+      'image/filename':
+          dataset_utils.bytes_feature(filename.encode('utf8')),
+      'image/source_id':
+          dataset_utils.bytes_feature(str(image_id).encode('utf8')),
+      'image/key/sha256':
+          dataset_utils.bytes_feature(key.encode('utf8')),
+      'image/encoded':
+          dataset_utils.bytes_feature(encoded_jpg),
+      'image/format':
+          dataset_utils.bytes_feature('jpeg'.encode('utf8')),
+      'image/class/label':
+          dataset_utils.int64_feature(label),
+      'image/object/bbox/xmin':
+          dataset_utils.float_list_feature(xmin),
+      'image/object/bbox/xmax':
+          dataset_utils.float_list_feature(xmax),
+      'image/object/bbox/ymin':
+          dataset_utils.float_list_feature(ymin),
+      'image/object/bbox/ymax':
+          dataset_utils.float_list_feature(ymax),
+      'image/object/class/label':
+          dataset_utils.int64_feature(label),
+      'image/object/area':
+          dataset_utils.float_list_feature(area),
+  }
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return key, example, num_annotations_skipped
--- a/research/slim/datasets/dataset_factory.py
+++ b/research/slim/datasets/dataset_factory.py
@@ -22,12 +22,14 @@ from datasets import cifar10
 from datasets import flowers
 from datasets import imagenet
 from datasets import mnist
+from datasets import visualwakewords
 datasets_map = {
    'cifar10': cifar10,
    'flowers': flowers,
    'imagenet': imagenet,
    'mnist': mnist,
+    'visualwakewords': visualwakewords,
 }

--- a/research/slim/datasets/dataset_utils.py
+++ b/research/slim/datasets/dataset_utils.py
@@ -41,6 +41,30 @@ def int64_feature(values):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
+def bytes_list_feature(values):
+  """Returns a TF-Feature of list of bytes.
+  Args:
+    values: A string or list of strings.
+  Returns:
+    A TF-Feature.
+  """
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=values))
+def float_list_feature(values):
+  """Returns a TF-Feature of list of floats.
+  Args:
+    values: A float or list of floats.
+  Returns:
+    A TF-Feature.
+  """
+  return tf.train.Feature(float_list=tf.train.FloatList(value=values))
 def bytes_feature(values):
  """Returns a TF-Feature of bytes.
@@ -148,3 +172,28 @@ def read_label_file(dataset_dir, filename=LABELS_FILENAME):
    index = line.index(':')
    labels_to_class_names[int(line[:index])] = line[index+1:]
  return labels_to_class_names
+def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
+  """Opens all TFRecord shards for writing and adds them to an exit stack.
+  Args:
+    exit_stack: A context2.ExitStack used to automatically closed the TFRecords
+      opened in this function.
+    base_path: The base path for all shards
+    num_shards: The number of shards
+  Returns:
+    The list of opened TFRecords. Position k in the list corresponds to shard k.
+  """
+  tf_record_output_filenames = [
+      '{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
+      for idx in range(num_shards)
+  ]
+  tfrecords = [
+      exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
+      for file_name in tf_record_output_filenames
+  ]
+  return tfrecords
--- a/research/slim/datasets/download_and_convert_flowers.py
+++ b/research/slim/datasets/download_and_convert_flowers.py
@@ -136,7 +136,7 @@ def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):
            sys.stdout.flush()
            # Read the filename:
-            image_data = tf.gfile.FastGFile(filenames[i], 'rb').read()
+            image_data = tf.gfile.GFile(filenames[i], 'rb').read()
            height, width = image_reader.read_image_dims(sess, image_data)
            class_name = os.path.basename(os.path.dirname(filenames[i]))

--- a/research/slim/datasets/download_mscoco.sh
+++ b/research/slim/datasets/download_mscoco.sh
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Script to download the COCO dataset. See
+# http://cocodataset.org/#overview for an overview of the dataset.
+#
+# usage:
+#  bash datasets/download_mscoco.sh path-to-COCO-dataset
+#
+set -e
+if [ -z "$1" ]; then
+  echo "usage download_mscoco.sh [data dir]"
+  exit
+fi
+if [ "$(uname)" == "Darwin" ]; then
+  UNZIP="tar -xf"
+else
+  UNZIP="unzip -nq"
+fi
+# Create the output directories.
+OUTPUT_DIR="${1%/}"
+SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
+mkdir -p "${OUTPUT_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+CURRENT_DIR=$(pwd)
+# Helper function to download and unpack a .zip file.
+function download_and_unzip() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to $(pwd)"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  else
+    echo "Skipping download of ${FILENAME}"
+  fi
+  echo "Unzipping ${FILENAME}"
+  ${UNZIP} ${FILENAME}
+}
+cd ${SCRATCH_DIR}
+# Download the images.
+BASE_IMAGE_URL="http://images.cocodataset.org/zips"
+TRAIN_IMAGE_FILE="train2014.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
+TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2014"
+VAL_IMAGE_FILE="val2014.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
+VAL_IMAGE_DIR="${SCRATCH_DIR}/val2014"
+# Download the annotations.
+BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
+INSTANCES_FILE="annotations_trainval2014.zip"
+download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
--- a/research/slim/datasets/visualwakewords.py
+++ b/research/slim/datasets/visualwakewords.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides data for Visual WakeWords Dataset with images+labels.
+Visual WakeWords Dataset derives from the COCO dataset to design tiny models
+classifying two classes, such as person/not-person. The COCO annotations
+are filtered to two classes: person and not-person (or another user-defined
+category). Bounding boxes for small objects with area less than 5% of the image
+area are filtered out.
+See build_visualwakewords_data.py which generates the Visual WakeWords dataset
+annotations from the raw COCO dataset and converts them to TFRecord.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import tensorflow as tf
+from datasets import dataset_utils
+slim = tf.contrib.slim
+_FILE_PATTERN = '%s.record-*'
+_SPLITS_TO_SIZES = {
+    'train': 82783,
+    'validation': 40504,
+}
+_ITEMS_TO_DESCRIPTIONS = {
+    'image': 'A color image of varying height and width.',
+    'label': 'The label id of the image, an integer in {0, 1}',
+    'object/bbox': 'A list of bounding boxes.',
+    'object/label': 'A list of labels, all objects belong to the same class.',
+}
+_NUM_CLASSES = 2
+# labels file
+LABELS_FILENAME = 'labels.txt'
+def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
+  """Gets a dataset tuple with instructions for reading ImageNet.
+  Args:
+    split_name: A train/test split name.
+    dataset_dir: The base directory of the dataset sources.
+    file_pattern: The file pattern to use when matching the dataset sources. It
+      is assumed that the pattern contains a '%s' string so that the split name
+      can be inserted.
+    reader: The TensorFlow reader type.
+  Returns:
+    A `Dataset` namedtuple.
+  Raises:
+    ValueError: if `split_name` is not a valid train/test split.
+  """
+  if split_name not in _SPLITS_TO_SIZES:
+    raise ValueError('split name %s was not recognized.' % split_name)
+  if not file_pattern:
+    file_pattern = _FILE_PATTERN
+  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
+  # Allowing None in the signature so that dataset_factory can use the default.
+  if reader is None:
+    reader = tf.TFRecordReader
+  keys_to_features = {
+      'image/encoded':
+          tf.FixedLenFeature((), tf.string, default_value=''),
+      'image/format':
+          tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+      'image/class/label':
+          tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
+      'image/object/bbox/xmin':
+          tf.VarLenFeature(dtype=tf.float32),
+      'image/object/bbox/ymin':
+          tf.VarLenFeature(dtype=tf.float32),
+      'image/object/bbox/xmax':
+          tf.VarLenFeature(dtype=tf.float32),
+      'image/object/bbox/ymax':
+          tf.VarLenFeature(dtype=tf.float32),
+      'image/object/class/label':
+          tf.VarLenFeature(dtype=tf.int64),
+  }
+  items_to_handlers = {
+      'image':
+          slim.tfexample_decoder.Image('image/encoded', 'image/format'),
+      'label':
+          slim.tfexample_decoder.Tensor('image/class/label'),
+      'object/bbox':
+          slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
+                                             'image/object/bbox/'),
+      'object/label':
+          slim.tfexample_decoder.Tensor('image/object/class/label'),
+  }
+  decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
+                                                    items_to_handlers)
+  labels_to_names = None
+  labels_file = os.path.join(dataset_dir, LABELS_FILENAME)
+  if tf.gfile.Exists(labels_file):
+    labels_to_names = dataset_utils.read_label_file(dataset_dir)
+  return slim.dataset.Dataset(
+      data_sources=file_pattern,
+      reader=reader,
+      decoder=decoder,
+      num_samples=_SPLITS_TO_SIZES[split_name],
+      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
+      num_classes=_NUM_CLASSES,
+      labels_to_names=labels_to_names)
--- a/research/slim/nets/mobilenet/conv_blocks.py
+++ b/research/slim/nets/mobilenet/conv_blocks.py
@@ -171,6 +171,7 @@ def expanded_conv(input_tensor,
                  project_activation_fn=tf.identity,
                  split_projection=1,
                  split_expansion=1,
+                  split_divisible_by=8,
                  expansion_transform=None,
                  depthwise_location='expansion',
                  depthwise_channel_multiplier=1,
@@ -202,6 +203,7 @@ def expanded_conv(input_tensor,
    split_expansion: how many ways to split expansion op
      (that is conv bottleneck->expansion) ops will keep depth divisible
      by this value.
+    split_divisible_by: make sure every split group is divisible by this number.
    expansion_transform: Optional function that takes expansion
      as a single input and returns output.
    depthwise_location: where to put depthwise covnvolutions supported
@@ -268,6 +270,7 @@ def expanded_conv(input_tensor,
          inner_size,
          num_ways=split_expansion,
          scope='expand',
+          divisible_by=split_divisible_by,
          stride=1,
          normalizer_fn=normalizer_fn)
      net = tf.identity(net, 'expansion_output')
@@ -292,6 +295,7 @@ def expanded_conv(input_tensor,
        num_ways=split_projection,
        stride=1,
        scope='project',
+        divisible_by=split_divisible_by,
        normalizer_fn=normalizer_fn,
        activation_fn=project_activation_fn)
    if endpoints is not None:

--- a/research/slim/nets/mobilenet/mobilenet.py
+++ b/research/slim/nets/mobilenet/mobilenet.py
@@ -110,7 +110,7 @@ _Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
 def op(opfunc, multiplier_func=depth_multiplier, **params):
-  multiplier = params.pop('multiplier_transorm', multiplier_func)
+  multiplier = params.pop('multiplier_transform', multiplier_func)
  return _Op(opfunc, params=params, multiplier_func=multiplier)

--- a/research/slim/nets/mobilenet/mobilenet_v2.py
+++ b/research/slim/nets/mobilenet/mobilenet_v2.py
@@ -105,8 +105,7 @@ def mobilenet(input_tensor,
    input_tensor: The input tensor
    num_classes: number of classes
    depth_multiplier: The multiplier applied to scale number of
-    channels in each layer. Note: this is called depth multiplier in the
+    channels in each layer.
-    paper but the name is kept for consistency with slim's model builder.
    scope: Scope of the operator
    conv_defs: Allows to override default conv def.
    finegrain_classification_mode: When set to True, the model

--- a/research/slim/preprocessing/inception_preprocessing.py
+++ b/research/slim/preprocessing/inception_preprocessing.py
@@ -153,10 +153,14 @@ def distorted_bounding_box_crop(image,
    return cropped_image, distort_bbox
-def preprocess_for_train(image, height, width, bbox,
+def preprocess_for_train(image,
+                         height,
+                         width,
+                         bbox,
                         fast_mode=True,
                         scope=None,
-                         add_image_summaries=True):
+                         add_image_summaries=True,
+                         random_crop=True):
  """Distort one image for training a network.
  Distorting images provides a useful technique for augmenting the data
@@ -180,6 +184,8 @@ def preprocess_for_train(image, height, width, bbox,
      bi-cubic resizing, random_hue or random_contrast).
    scope: Optional scope for name_scope.
    add_image_summaries: Enable image summaries.
+    random_crop: Enable random cropping of images during preprocessing for
+      training.
  Returns:
    3-D float Tensor of distorted image used for training with range [-1, 1].
  """
@@ -197,15 +203,18 @@ def preprocess_for_train(image, height, width, bbox,
    if add_image_summaries:
      tf.summary.image('image_with_bounding_boxes', image_with_box)
-    distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
+    if not random_crop:
-    # Restore the shape since the dynamic slice based upon the bbox_size loses
+      distorted_image = image
-    # the third dimension.
+    else:
-    distorted_image.set_shape([None, None, 3])
+      distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
-    image_with_distorted_box = tf.image.draw_bounding_boxes(
+      # Restore the shape since the dynamic slice based upon the bbox_size loses
-        tf.expand_dims(image, 0), distorted_bbox)
+      # the third dimension.
-    if add_image_summaries:
+      distorted_image.set_shape([None, None, 3])
-      tf.summary.image('images_with_distorted_bounding_box',
+      image_with_distorted_box = tf.image.draw_bounding_boxes(
-                       image_with_distorted_box)
+          tf.expand_dims(image, 0), distorted_bbox)
+      if add_image_summaries:
+        tf.summary.image('images_with_distorted_bounding_box',
+                         image_with_distorted_box)
    # This resizing operation may distort the images because the aspect
    # ratio is not respected. We select a resize method in a round robin
@@ -220,7 +229,7 @@ def preprocess_for_train(image, height, width, bbox,
        num_cases=num_resize_cases)
    if add_image_summaries:
-      tf.summary.image('cropped_resized_image',
+      tf.summary.image(('cropped_' if random_crop else '') + 'resized_image',
                       tf.expand_dims(distorted_image, 0))
    # Randomly flip the image horizontally.
@@ -241,8 +250,12 @@ def preprocess_for_train(image, height, width, bbox,
    return distorted_image
-def preprocess_for_eval(image, height, width,
+def preprocess_for_eval(image,
-                        central_fraction=0.875, scope=None):
+                        height,
+                        width,
+                        central_fraction=0.875,
+                        scope=None,
+                        central_crop=True):
  """Prepare one image for evaluation.
  If height and width are specified it would output an image with that size by
@@ -260,6 +273,8 @@ def preprocess_for_eval(image, height, width,
    width: integer
    central_fraction: Optional Float, fraction of the image to crop.
    scope: Optional scope for name_scope.
+    central_crop: Enable central cropping of images during preprocessing for
+      evaluation.
  Returns:
    3-D float Tensor of prepared image.
  """
@@ -268,7 +283,7 @@ def preprocess_for_eval(image, height, width,
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    # Crop the central region of the image with an area containing 87.5% of
    # the original image.
-    if central_fraction:
+    if central_crop and central_fraction:
      image = tf.image.central_crop(image, central_fraction=central_fraction)
    if height and width:
@@ -282,11 +297,14 @@ def preprocess_for_eval(image, height, width,
    return image
-def preprocess_image(image, height, width,
+def preprocess_image(image,
+                     height,
+                     width,
                     is_training=False,
                     bbox=None,
                     fast_mode=True,
-                     add_image_summaries=True):
+                     add_image_summaries=True,
+                     crop_image=True):
  """Pre-process one image for training or evaluation.
  Args:
@@ -304,6 +322,8 @@ def preprocess_image(image, height, width,
      [ymin, xmin, ymax, xmax].
    fast_mode: Optional boolean, if True avoids slower transformations.
    add_image_summaries: Enable image summaries.
+    crop_image: Whether to enable cropping of images during preprocessing for
+      both training and evaluation.
  Returns:
    3-D float Tensor containing an appropriately scaled image
@@ -312,7 +332,13 @@ def preprocess_image(image, height, width,
    ValueError: if user does not provide bounding box
  """
  if is_training:
-    return preprocess_for_train(image, height, width, bbox, fast_mode,
+    return preprocess_for_train(
-                                add_image_summaries=add_image_summaries)
+        image,
+        height,
+        width,
+        bbox,
+        fast_mode,
+        add_image_summaries=add_image_summaries,
+        random_crop=crop_image)
  else:
-    return preprocess_for_eval(image, height, width)
+    return preprocess_for_eval(image, height, width, central_crop=crop_image)