Merged commit includes the following changes: (#7797)

279978375 by Sergio Guadarrama: Pass s=2 to the expanded_conv block so it can apply residual correctly in case of fused convolutions. (Before it was relying on channel mismatch only) -- 279788358 by Sergio Guadarrama: Update README to add mobilenet-edgetpu details -- 279774392 by Sergio Guadarrama: Adds MobilenetV3-EdgeTpu definition. -- 278917344 by Sergio Guadarrama: Create visualwakewords dataset using slim scripts instead of custom scripts. -- 277940048 by Sergio Guadarrama: Internal changes to tf.contrib symbols -- PiperOrigin-RevId: 279978375

Merged commit includes the following changes: (#7797)
279978375 by Sergio Guadarrama: Pass s=2 to the expanded_conv block so it can apply residual correctly in case of fused convolutions. (Before it was relying on channel mismatch only) -- 279788358 by Sergio Guadarrama: Update README to add mobilenet-edgetpu details -- 279774392 by Sergio Guadarrama: Adds MobilenetV3-EdgeTpu definition. -- 278917344 by Sergio Guadarrama: Create visualwakewords dataset using slim scripts instead of custom scripts. -- 277940048 by Sergio Guadarrama: Internal changes to tf.contrib symbols -- PiperOrigin-RevId: 279978375
ba87e2c6 · Mark Sandler · Sergio Guadarrama · c2902cfb · ba87e2c6 · ba87e2c6
Commit ba87e2c6 authored Nov 12, 2019 by Mark Sandler Committed by Sergio Guadarrama Nov 12, 2019
20 changed files
--- a/research/slim/nets/cyclegan.py
+++ b/research/slim/nets/cyclegan.py
@@ -20,8 +20,11 @@ from __future__ import print_function
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
+from tensorflow.contrib import framework as contrib_framework
+from tensorflow.contrib import layers as contrib_layers
+from tensorflow.contrib import util as contrib_util

-layers = tf.contrib.layers
+layers = contrib_layers


 def cyclegan_arg_scope(instance_norm_center=True,
@@ -54,7 +57,7 @@ def cyclegan_arg_scope(instance_norm_center=True,
  if weight_decay and weight_decay > 0.0:
    weights_regularizer = layers.l2_regularizer(weight_decay)

-  with tf.contrib.framework.arg_scope(
+  with contrib_framework.arg_scope(
      [layers.conv2d],
      normalizer_fn=layers.instance_norm,
      normalizer_params=instance_norm_params,
@@ -124,7 +127,7 @@ def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',

 def _dynamic_or_static_shape(tensor):
  shape = tf.shape(tensor)
-  static_shape = tf.contrib.util.constant_value(shape)
+  static_shape = contrib_util.constant_value(shape)
  return static_shape if static_shape is not None else shape


@@ -196,7 +199,7 @@ def cyclegan_generator_resnet(images,
      dtype=np.int32)
  spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]])

-  with tf.contrib.framework.arg_scope(arg_scope_fn()):
+  with contrib_framework.arg_scope(arg_scope_fn()):

    ###########
    # Encoder #
@@ -208,12 +211,11 @@ def cyclegan_generator_resnet(images,
      end_points['encoder_0'] = net

    with tf.variable_scope('encoder'):
-      with tf.contrib.framework.arg_scope(
-          [layers.conv2d],
-          kernel_size=kernel_size,
-          stride=2,
-          activation_fn=tf.nn.relu,
-          padding='VALID'):
+      with contrib_framework.arg_scope([layers.conv2d],
+                                       kernel_size=kernel_size,
+                                       stride=2,
+                                       activation_fn=tf.nn.relu,
+                                       padding='VALID'):

        net = tf.pad(net, paddings, 'REFLECT')
        net = layers.conv2d(net, num_filters * 2)
@@ -226,12 +228,11 @@ def cyclegan_generator_resnet(images,
    # Residual Blocks #
    ###################
    with tf.variable_scope('residual_blocks'):
-      with tf.contrib.framework.arg_scope(
-          [layers.conv2d],
-          kernel_size=kernel_size,
-          stride=1,
-          activation_fn=tf.nn.relu,
-          padding='VALID'):
+      with contrib_framework.arg_scope([layers.conv2d],
+                                       kernel_size=kernel_size,
+                                       stride=1,
+                                       activation_fn=tf.nn.relu,
+                                       padding='VALID'):
        for block_id in xrange(num_resnet_blocks):
          with tf.variable_scope('block_{}'.format(block_id)):
            res_net = tf.pad(net, paddings, 'REFLECT')
@@ -248,11 +249,10 @@ def cyclegan_generator_resnet(images,
    ###########
    with tf.variable_scope('decoder'):

-      with tf.contrib.framework.arg_scope(
-          [layers.conv2d],
-          kernel_size=kernel_size,
-          stride=1,
-          activation_fn=tf.nn.relu):
+      with contrib_framework.arg_scope([layers.conv2d],
+                                       kernel_size=kernel_size,
+                                       stride=1,
+                                       activation_fn=tf.nn.relu):

        with tf.variable_scope('decoder1'):
          net = upsample_fn(net, num_outputs=num_filters * 2, stride=[2, 2])

--- a/research/slim/nets/dcgan.py
+++ b/research/slim/nets/dcgan.py
@@ -21,8 +21,9 @@ from math import log

 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-slim = tf.contrib.slim
+slim = contrib_slim


 def _validate_image_inputs(inputs):

--- a/research/slim/nets/i3d.py
+++ b/research/slim/nets/i3d.py
@@ -25,11 +25,12 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import i3d_utils
 from nets import s3dg

-slim = tf.contrib.slim
+slim = contrib_slim
 trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 conv3d_spatiotemporal = i3d_utils.conv3d_spatiotemporal


--- a/research/slim/nets/i3d_utils.py
+++ b/research/slim/nets/i3d_utils.py
@@ -20,12 +20,14 @@ from __future__ import print_function

 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib import framework as contrib_framework
+from tensorflow.contrib import layers as contrib_layers


 # Orignaly, add_arg_scope = slim.add_arg_scope and layers = slim, now switch to
 # more update-to-date tf.contrib.* API.
-add_arg_scope = tf.contrib.framework.add_arg_scope
-layers = tf.contrib.layers
+add_arg_scope = contrib_framework.add_arg_scope
+layers = contrib_layers


 def center_initializer():

--- a/research/slim/nets/inception_resnet_v2.py
+++ b/research/slim/nets/inception_resnet_v2.py
@@ -26,8 +26,9 @@ from __future__ import print_function


 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-slim = tf.contrib.slim
+slim = contrib_slim


 def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):

--- a/research/slim/nets/inception_resnet_v2_test.py
+++ b/research/slim/nets/inception_resnet_v2_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception

@@ -310,7 +311,7 @@ class InceptionTest(tf.test.TestCase):
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
-    with tf.contrib.slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
+    with contrib_slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
      inception.inception_resnet_v2(inputs, num_classes, is_training=False)

    self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
@@ -319,7 +320,7 @@ class InceptionTest(tf.test.TestCase):
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
-    with tf.contrib.slim.arg_scope(
+    with contrib_slim.arg_scope(
        inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
      inception.inception_resnet_v2(inputs, num_classes, is_training=False)


--- a/research/slim/nets/inception_utils.py
+++ b/research/slim/nets/inception_utils.py
@@ -25,8 +25,9 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-slim = tf.contrib.slim
+slim = contrib_slim


 def inception_arg_scope(weight_decay=0.00004,

--- a/research/slim/nets/inception_v1.py
+++ b/research/slim/nets/inception_v1.py
@@ -19,10 +19,11 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception_utils

-slim = tf.contrib.slim
+slim = contrib_slim
 trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)



--- a/research/slim/nets/inception_v1_test.py
+++ b/research/slim/nets/inception_v1_test.py
@@ -20,10 +20,11 @@ from __future__ import print_function

 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception

-slim = tf.contrib.slim
+slim = contrib_slim


 class InceptionV1Test(tf.test.TestCase):

--- a/research/slim/nets/inception_v2.py
+++ b/research/slim/nets/inception_v2.py
@@ -19,10 +19,11 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception_utils

-slim = tf.contrib.slim
+slim = contrib_slim
 trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)



--- a/research/slim/nets/inception_v2_test.py
+++ b/research/slim/nets/inception_v2_test.py
@@ -20,10 +20,11 @@ from __future__ import print_function

 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception

-slim = tf.contrib.slim
+slim = contrib_slim


 class InceptionV2Test(tf.test.TestCase):

--- a/research/slim/nets/inception_v3.py
+++ b/research/slim/nets/inception_v3.py
@@ -19,10 +19,11 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception_utils

-slim = tf.contrib.slim
+slim = contrib_slim
 trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)



--- a/research/slim/nets/inception_v3_test.py
+++ b/research/slim/nets/inception_v3_test.py
@@ -20,10 +20,11 @@ from __future__ import print_function

 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception

-slim = tf.contrib.slim
+slim = contrib_slim


 class InceptionV3Test(tf.test.TestCase):

--- a/research/slim/nets/inception_v4.py
+++ b/research/slim/nets/inception_v4.py
@@ -25,10 +25,11 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception_utils

-slim = tf.contrib.slim
+slim = contrib_slim


 def block_inception_a(inputs, scope=None, reuse=None):

--- a/research/slim/nets/inception_v4_test.py
+++ b/research/slim/nets/inception_v4_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

 from nets import inception

@@ -259,7 +260,7 @@ class InceptionTest(tf.test.TestCase):
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
-    with tf.contrib.slim.arg_scope(inception.inception_v4_arg_scope()):
+    with contrib_slim.arg_scope(inception.inception_v4_arg_scope()):
      inception.inception_v4(inputs, num_classes, is_training=False)

    self.assertEqual(tf.global_variables('.*/BatchNorm/gamma:0$'), [])
@@ -268,7 +269,7 @@ class InceptionTest(tf.test.TestCase):
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
-    with tf.contrib.slim.arg_scope(
+    with contrib_slim.arg_scope(
        inception.inception_v4_arg_scope(batch_norm_scale=True)):
      inception.inception_v4(inputs, num_classes, is_training=False)


--- a/research/slim/nets/lenet.py
+++ b/research/slim/nets/lenet.py
@@ -19,8 +19,9 @@ from __future__ import division
 from __future__ import print_function

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-slim = tf.contrib.slim
+slim = contrib_slim


 def lenet(images, num_classes=10, is_training=False,

--- a/research/slim/nets/mobilenet/README.md
+++ b/research/slim/nets/mobilenet/README.md
-# MobilenNet
+# MobileNet

 This folder contains building code for
 [MobileNetV2](https://arxiv.org/abs/1801.04381) and
@@ -8,6 +8,12 @@ definition for each model is located in [mobilenet_v2.py](mobilenet_v2.py) and

 For MobilenetV1 please refer to this [page](../mobilenet_v1.md)

+We have also introduced a family of MobileNets customized for the Edge TPU
+accelerator found in
+[Google Pixel4](https://blog.google/products/pixel/pixel-4/) devices. The
+architectural definition for MobileNetEdgeTPU is located in
+[mobilenet_v3.py](mobilenet_v3.py)
+
 ## Performance

 ### Mobilenet V3 latency
@@ -33,6 +39,14 @@ numbers. We estimate it to be comparable to MobileNetV2 numbers.

 ![madds_top1_accuracy](g3doc/madds_top1_accuracy.png)

+### Mobilenet EdgeTPU latency
+
+The figure below shows the Pixel 4 Edge TPU latency of int8-quantized Mobilenet
+EdgeTPU compared with MobilenetV2 and the minimalistic variants of MobilenetV3
+(see below).
+
+![Mobilenet Edge TPU latency for Pixel 4 Edge TPU.png](g3doc/edgetpu_latency.png)
+
 ## Pretrained models

 ### Mobilenet V3 Imagenet Checkpoints
@@ -43,7 +57,7 @@ large and small models this page also contains so-called minimalistic models,
 these models have the same per-layer dimensions characteristic as MobilenetV3
 however, they don't utilize any of the advanced blocks (squeeze-and-excite
 units, hard-swish, and 5x5 convolutions). While these models are less efficient
-on CPU, we find that they are much more performant on GPU/DSP/EdgeTpu.
+on CPU, we find that they are much more performant on GPU/DSP.

 | Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 1 | Pixel 2 | Pixel 3 |
 | ------------------ | -------- | ---------- | ---- | ------- | ------- | ------- |
@@ -62,6 +76,20 @@ on CPU, we find that they are much more performant on GPU/DSP/EdgeTpu.
 | [Large minimalistic (8-bit)][lm8]   | 209      | 3.9        | 71.3 | 37      | 35      | 27      |
 | [Small minimalistic (float)]   | 65       | 2.0        | 61.9 | 12.2    | 15.1    | 11      |

+#### Edge TPU checkpoints:
+
+| Imagenet          | MACs (M) | Params (M) | Top1 | Pixel 4 | Pixel 4 CPU |
+: Checkpoint        :          :            :      : EdgeTPU :             :
+| ----------------- | -------- | ---------- | ---- | ------- | ----------- |
+| [MobilenetEdgeTPU | 624      | 2.9        | 73.5 | 3.1     | 13.8        |
+: dm=0.75 (8-bit)]  :          :            :      :         :             :
+| [MobilenetEdgeTPU | 990      | 4.0        | 75.6 | 3.6     | 20.6        |
+: dm=1 (8-bit)]     :          :            :      :         :             :
+
+Note: 8-bit quantized versions of the MobilenetEdgeTPU models were obtained
+using Tensorflow Lite's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)
+tool.

 [Small minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small-minimalistic_224_1.0_float.tgz
 [Large minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large-minimalistic_224_1.0_float.tgz
@@ -72,6 +100,8 @@ on CPU, we find that they are much more performant on GPU/DSP/EdgeTpu.
 [Small dm=1 (8-bit)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_1.0_uint8.tgz
 [Large dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_0.75_float.tgz
 [Small dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_0.75_float.tgz
+[MobilenetEdgeTPU dm=0.75 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_0.75.tgz
+[MobilenetEdgeTPU dm=1 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_1.0.tgz

 ### Mobilenet V2 Imagenet Checkpoints


--- a/research/slim/nets/mobilenet/conv_blocks.py
+++ b/research/slim/nets/mobilenet/conv_blocks.py
@@ -17,8 +17,9 @@ import contextlib
 import functools

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-slim = tf.contrib.slim
+slim = contrib_slim


 def _fixed_padding(inputs, kernel_size, rate=1):
@@ -463,4 +464,3 @@ def squeeze_excite(input_tensor,
          align_corners=True)
    result = input_tensor * excite
  return result
-
--- a/research/slim/nets/mobilenet/g3doc/edgetpu_latency.png
+++ b/research/slim/nets/mobilenet/g3doc/edgetpu_latency.png
--- a/research/slim/nets/mobilenet/mobilenet.py
+++ b/research/slim/nets/mobilenet/mobilenet.py
@@ -23,9 +23,9 @@ import copy
 import os

 import tensorflow as tf
+from tensorflow.contrib import slim as contrib_slim

-
-slim = tf.contrib.slim
+slim = contrib_slim


 @slim.add_arg_scope