Commit 376dc8dd authored by Mark Sandler's avatar Mark Sandler Committed by Sergio Guadarrama
Browse files

Pulling internal changes to github (#3583)

* Internal change.

PiperOrigin-RevId: 187042423

* Internal change.

PiperOrigin-RevId: 187072380

* Opensource float and eight-bit fixed-point mobilenet_v1 training and eval scripts.

PiperOrigin-RevId: 187106140

* Initial check-in for Mobilenet V2

PiperOrigin-RevId: 187213595

* Allow configuring batch normalization decay and epsilon in MobileNet v1

PiperOrigin-RevId: 187425294

* Allow overriding NASNet model HParams.

This is a change to the API that will allow users to pass in their own configs
to the building functions, which should make these APIs much more customizable
for end-user cases.

This change removes the use_aux_head argument from the model construction
functions, which is no longer necessary given that the use_aux_head option is
configurable in the model config. For example, for the mobile ImageNet model,
the auxiliary head can be disabled using:

config = nasnet.mobile_imagenet_config()
config.set_hparam('use_aux_head', 0)
logits, endpoints = nasnet.build_nasnet_mobile(
    inputs, num_classes, config=config)
PiperOrigin-RevId: 188617685

* Automated g4 rollback of changelist 188617685

PiperOrigin-RevId: 188619139

* Removes spurious comment
parent 81a34cbe
...@@ -201,7 +201,7 @@ py_library( ...@@ -201,7 +201,7 @@ py_library(
":cyclegan", ":cyclegan",
":inception", ":inception",
":lenet", ":lenet",
":mobilenet_v1", ":mobilenet",
":nasnet", ":nasnet",
":overfeat", ":overfeat",
":pix2pix", ":pix2pix",
...@@ -429,25 +429,31 @@ py_library( ...@@ -429,25 +429,31 @@ py_library(
], ],
) )
py_binary( py_library(
name = "mobilenet_v1_train", name = "mobilenet_v2",
srcs = ["nets/mobilenet_v1_train.py"], srcs = glob(["nets/mobilenet/*.py"]),
srcs_version = "PY2AND3",
deps = [ deps = [
":dataset_factory", "//third_party/py/contextlib2",
":mobilenet_v1", # "//tensorflow",
":preprocessing_factory",
# "//tensorflow"
], ],
) )
py_binary( py_test(
name = "mobilenet_v1_eval", name = "mobilenet_v2_test",
srcs = ["nets/mobilenet_v1_eval.py"], srcs = ["nets/mobilenet/mobilenet_v2_test.py"],
srcs_version = "PY2AND3",
deps = [
":mobilenet",
# "//tensorflow",
],
)
py_library(
name = "mobilenet",
deps = [ deps = [
":dataset_factory",
":mobilenet_v1", ":mobilenet_v1",
":preprocessing_factory", ":mobilenet_v2",
# "//tensorflow"
], ],
) )
...@@ -464,6 +470,28 @@ py_test( ...@@ -464,6 +470,28 @@ py_test(
], ],
) )
py_binary(
name = "mobilenet_v1_train",
srcs = ["nets/mobilenet_v1_train.py"],
deps = [
":dataset_factory",
":mobilenet_v1",
":preprocessing_factory",
# "//tensorflow",
],
)
py_binary(
name = "mobilenet_v1_eval",
srcs = ["nets/mobilenet_v1_eval.py"],
deps = [
":dataset_factory",
":mobilenet_v1",
":preprocessing_factory",
# "//tensorflow",
],
)
py_library( py_library(
name = "nasnet_utils", name = "nasnet_utils",
srcs = ["nets/nasnet/nasnet_utils.py"], srcs = ["nets/nasnet/nasnet_utils.py"],
......
...@@ -259,6 +259,7 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy | ...@@ -259,6 +259,7 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy |
[MobileNet_v1_1.0_224](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_1.0_224.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz)|70.9|89.9| [MobileNet_v1_1.0_224](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_1.0_224.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz)|70.9|89.9|
[MobileNet_v1_0.50_160](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.50_160.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_160.tgz)|59.1|81.9| [MobileNet_v1_0.50_160](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.50_160.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_160.tgz)|59.1|81.9|
[MobileNet_v1_0.25_128](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.25_128.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_128.tgz)|41.5|66.3| [MobileNet_v1_0.25_128](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.25_128.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_128.tgz)|41.5|66.3|
[MobileNet_v2_1.0_224^*](https://arxiv.org/abs/1801.04381)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py)|[Checkpoint TBA]()|72.2|91.0|
[NASNet-A_Mobile_224](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_mobile_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz)|74.0|91.6| [NASNet-A_Mobile_224](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_mobile_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz)|74.0|91.6|
[NASNet-A_Large_331](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_large_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz)|82.7|96.2| [NASNet-A_Large_331](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_large_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz)|82.7|96.2|
...@@ -273,6 +274,8 @@ All 16 float MobileNet V1 models reported in the [MobileNet Paper](https://arxiv ...@@ -273,6 +274,8 @@ All 16 float MobileNet V1 models reported in the [MobileNet Paper](https://arxiv
16 quantized [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) compatible MobileNet V1 models can be found 16 quantized [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) compatible MobileNet V1 models can be found
[here](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet_v1.md). [here](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet_v1.md).
(^#) More details on Mobilenet V2 models can be found [here](nets/mobilenet/README.md).
(\*): Results quoted from the [paper](https://arxiv.org/abs/1603.05027). (\*): Results quoted from the [paper](https://arxiv.org/abs/1603.05027).
Here is an example of how to download the Inception V3 checkpoint: Here is an example of how to download the Inception V3 checkpoint:
...@@ -517,4 +520,3 @@ image_preprocessing_fn = preprocessing_factory.get_preprocessing( ...@@ -517,4 +520,3 @@ image_preprocessing_fn = preprocessing_factory.get_preprocessing(
See See
[Hardware Specifications](https://github.com/tensorflow/models/tree/master/research/inception#what-hardware-specification-are-these-hyper-parameters-targeted-for). [Hardware Specifications](https://github.com/tensorflow/models/tree/master/research/inception#what-hardware-specification-are-these-hyper-parameters-targeted-for).
...@@ -93,7 +93,6 @@ import sys ...@@ -93,7 +93,6 @@ import sys
import threading import threading
import numpy as np import numpy as np
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
tf.app.flags.DEFINE_string('train_directory', '/tmp/', tf.app.flags.DEFINE_string('train_directory', '/tmp/',
......
...@@ -43,7 +43,7 @@ SYNSETS_FILE="${2:-./synsets.txt}" ...@@ -43,7 +43,7 @@ SYNSETS_FILE="${2:-./synsets.txt}"
echo "Saving downloaded files to $OUTDIR" echo "Saving downloaded files to $OUTDIR"
mkdir -p "${OUTDIR}" mkdir -p "${OUTDIR}"
INITIAL_DIR=$(pwd) CURRENT_DIR=$(pwd)
BBOX_DIR="${OUTDIR}bounding_boxes" BBOX_DIR="${OUTDIR}bounding_boxes"
mkdir -p "${BBOX_DIR}" mkdir -p "${BBOX_DIR}"
cd "${OUTDIR}" cd "${OUTDIR}"
...@@ -96,4 +96,4 @@ while read SYNSET; do ...@@ -96,4 +96,4 @@ while read SYNSET; do
rm -f "${SYNSET}.tar" rm -f "${SYNSET}.tar"
echo "Finished processing: ${SYNSET}" echo "Finished processing: ${SYNSET}"
done < "${INITIAL_DIR}/${SYNSETS_FILE}" done < "${SYNSETS_FILE}"
...@@ -52,8 +52,6 @@ import os ...@@ -52,8 +52,6 @@ import os
import os.path import os.path
import sys import sys
from six.moves import xrange
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) < 3: if len(sys.argv) < 3:
......
...@@ -86,8 +86,6 @@ import os.path ...@@ -86,8 +86,6 @@ import os.path
import sys import sys
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from six.moves import xrange
class BoundingBox(object): class BoundingBox(object):
pass pass
......
...@@ -18,7 +18,7 @@ from __future__ import division ...@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
layers = tf.contrib.layers layers = tf.contrib.layers
......
...@@ -19,8 +19,6 @@ from __future__ import print_function ...@@ -19,8 +19,6 @@ from __future__ import print_function
from math import log from math import log
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
slim = tf.contrib.slim slim = tf.contrib.slim
......
...@@ -18,7 +18,6 @@ from __future__ import absolute_import ...@@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from six.moves import xrange
import tensorflow as tf import tensorflow as tf
from nets import dcgan from nets import dcgan
......
# Mobilenet V2
This folder contains building code for Mobilenet V2, based on
[Inverted Residuals and Linear Bottlenecks: Mobile Networks for Classification, Detection and Segmentation]
(https://arxiv.org/abs/1801.04381)
# Pretrained model
TODO
# Example
TODO
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Convolution blocks for mobilenet."""
import functools
import contextlib2
import tensorflow as tf
slim = tf.contrib.slim
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
[pad_beg[1], pad_end[1]], [0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _split_divisible(num, num_ways, divisible_by=8):
"""Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
assert num % divisible_by == 0
assert num / num_ways >= divisible_by
# Note: want to round down, we adjust each split to match the total.
base = num // num_ways // divisible_by * divisible_by
result = []
accumulated = 0
for i in range(num_ways):
r = base
while accumulated + r < num * (i + 1) / num_ways:
r += divisible_by
result.append(r)
accumulated += r
assert accumulated == num
return result
@slim.add_arg_scope
def split_separable_conv2d(input_tensor,
num_outputs,
scope=None,
normalizer_fn=None,
stride=1,
rate=1,
endpoints=None,
use_explicit_padding=False):
"""Separable mobilenet V1 style convolution.
Depthwise convolution, with default non-linearity,
followed by 1x1 depthwise convolution. This is similar to
slim.separable_conv2d, but differs in tha it applies batch
normalization and non-linearity to depthwise. This matches
the basic building of Mobilenet Paper
(https://arxiv.org/abs/1704.04861)
Args:
input_tensor: input
num_outputs: number of outputs
scope: optional name of the scope. Note if provided it will use
scope_depthwise for deptwhise, and scope_pointwise for pointwise.
normalizer_fn: which normalizer function to use for depthwise/pointwise
stride: stride
rate: output rate (also known as dilation rate)
endpoints: optional, if provided, will export additional tensors to it.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
output tesnor
"""
with contextlib2.ExitStack() as stack:
if scope is None: # Create uniqified separable blocks.
s = stack.enter_context(tf.variable_scope(None, default_name='separable'))
stack.enter_context(tf.name_scope(s.original_name_scope))
scope = ''
else:
# We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
scope += '_'
dw_scope = scope + 'depthwise'
endpoints = endpoints if endpoints is not None else {}
kernel_size = [3, 3]
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
net = slim.separable_conv2d(
input_tensor,
None,
kernel_size,
depth_multiplier=1,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope=dw_scope)
endpoints[dw_scope] = net
pw_scope = scope + 'pointwise'
net = slim.conv2d(
net,
num_outputs, [1, 1],
stride=1,
normalizer_fn=normalizer_fn,
scope=pw_scope)
endpoints[pw_scope] = net
return net
def expand_input_by_factor(n, divisible_by=8):
return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
@slim.add_arg_scope
def expanded_conv(input_tensor,
num_outputs,
expansion_size=expand_input_by_factor(6),
stride=1,
rate=1,
kernel_size=(3, 3),
residual=True,
normalizer_fn=None,
split_projection=1,
split_expansion=1,
expansion_transform=None,
depthwise_location='expansion',
depthwise_channel_multiplier=1,
endpoints=None,
use_explicit_padding=False,
scope=None):
"""Depthwise Convolution Block with expansion.
Builds a composite convolution that has the following structure
expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
Args:
input_tensor: input
num_outputs: number of outputs in the final layer.
expansion_size: the size of expansion, could be a constant or a callable.
If latter it will be provided 'num_inputs' as an input. For forward
compatibility it should accept arbitrary keyword arguments.
Default will expand the input by factor of 6.
stride: depthwise stride
rate: depthwise rate
kernel_size: depthwise kernel
residual: whether to include residual connection between input
and output.
normalizer_fn: batchnorm or otherwise
split_projection: how many ways to split projection operator
(that is conv expansion->bottleneck)
split_expansion: how many ways to split expansion op
(that is conv bottleneck->expansion) ops will keep depth divisible
by this value.
expansion_transform: Optional function that takes expansion
as a single input and returns output.
depthwise_location: where to put depthwise covnvolutions supported
values None, 'input', 'output', 'expansion'
depthwise_channel_multiplier: depthwise channel multiplier:
each input will replicated (with different filters)
that many times. So if input had c channels,
output will have c x depthwise_channel_multpilier.
endpoints: An optional dictionary into which intermediate endpoints are
placed. The keys "expansion_output", "depthwise_output",
"projection_output" and "expansion_transform" are always populated, even
if the corresponding functions are not invoked.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
scope: optional scope.
Returns:
Tensor of depth num_outputs
Raises:
TypeError: on inval
"""
with tf.variable_scope(scope, default_name='expanded_conv') as s, \
tf.name_scope(s.original_name_scope):
prev_depth = input_tensor.get_shape().as_list()[3]
if depthwise_location not in [None, 'input', 'output', 'expansion']:
raise TypeError('%r is unknown value for depthwise_location' %
depthwise_location)
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
depthwise_func = functools.partial(
slim.separable_conv2d,
num_outputs=None,
kernel_size=kernel_size,
depth_multiplier=depthwise_channel_multiplier,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope='depthwise')
# b1 -> b2 * r -> b2
# i -> (o * r) (bottleneck) -> o
input_tensor = tf.identity(input_tensor, 'input')
net = input_tensor
if depthwise_location == 'input':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
if callable(expansion_size):
inner_size = expansion_size(num_inputs=prev_depth)
else:
inner_size = expansion_size
if inner_size > net.shape[3]:
net = split_conv(
net,
inner_size,
num_ways=split_expansion,
scope='expand',
stride=1,
normalizer_fn=normalizer_fn)
net = tf.identity(net, 'expansion_output')
if endpoints is not None:
endpoints['expansion_output'] = net
if depthwise_location == 'expansion':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net)
net = tf.identity(net, name='depthwise_output')
if endpoints is not None:
endpoints['depthwise_output'] = net
if expansion_transform:
net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
# Note in contrast with expansion, we always have
# projection to produce the desired output size.
net = split_conv(
net,
num_outputs,
num_ways=split_projection,
stride=1,
scope='project',
normalizer_fn=normalizer_fn,
activation_fn=tf.identity)
if endpoints is not None:
endpoints['projection_output'] = net
if depthwise_location == 'output':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
if callable(residual): # custom residual
net = residual(input_tensor=input_tensor, output_tensor=net)
elif (residual and
# stride check enforces that we don't add residuals when spatial
# dimensions are None
stride == 1 and
# Depth matches
net.get_shape().as_list()[3] ==
input_tensor.get_shape().as_list()[3]):
net += input_tensor
return tf.identity(net, name='output')
def split_conv(input_tensor,
num_outputs,
num_ways,
scope,
divisible_by=8,
**kwargs):
"""Creates a split convolution.
Split convolution splits the input and output into
'num_blocks' blocks of approximately the same size each,
and only connects $i$-th input to $i$ output.
Args:
input_tensor: input tensor
num_outputs: number of output filters
num_ways: num blocks to split by.
scope: scope for all the operators.
divisible_by: make sure that every part is divisiable by this.
**kwargs: will be passed directly into conv2d operator
Returns:
tensor
"""
b = input_tensor.get_shape().as_list()[3]
if num_ways == 1 or min(b // num_ways,
num_outputs // num_ways) < divisible_by:
# Don't do any splitting if we end up with less than 8 filters
# on either side.
return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
outs = []
input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
output_splits = _split_divisible(
num_outputs, num_ways, divisible_by=divisible_by)
inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
base = scope
for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
scope = base + '_part_%d' % (i,)
n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
n = tf.identity(n, scope + '_output')
outs.append(n)
return tf.concat(outs, 3, name=scope + '_concat')
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mobilenet Base Class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import contextlib
import copy
import os
import contextlib2
import tensorflow as tf
slim = tf.contrib.slim
@slim.add_arg_scope
def apply_activation(x, name=None, activation_fn=None):
return activation_fn(x, name=name) if activation_fn else x
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
[pad_beg[1], pad_end[1]], [0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
@contextlib.contextmanager
def _set_arg_scope_defaults(defaults):
"""Sets arg scope defaults for all items present in defaults.
Args:
defaults: dictionary mapping function to default_dict
Yields:
context manager
"""
with contextlib2.ExitStack() as stack:
_ = [
stack.enter_context(slim.arg_scope(func, **default_arg))
for func, default_arg in defaults.items()
]
yield
@slim.add_arg_scope
def depth_multiplier(output_params,
multiplier,
divisible_by=8,
min_depth=8,
**unused_kwargs):
if 'num_outputs' not in output_params:
return
d = output_params['num_outputs']
output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
min_depth)
_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
def op(opfunc, **params):
multiplier = params.pop('multiplier_transorm', depth_multiplier)
return _Op(opfunc, params=params, multiplier_func=multiplier)
@slim.add_arg_scope
def mobilenet_base( # pylint: disable=invalid-name
inputs,
conv_defs,
multiplier=1.0,
final_endpoint=None,
output_stride=None,
use_explicit_padding=False,
scope=None,
is_training=False):
"""Mobilenet base network.
Constructs a network from inputs to the given final endpoint. By default
the network is constructed in inference mode. To create network
in training mode use:
with slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_base(...)
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
conv_defs: A list of op(...) layers specifying the net architecture.
multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
final_endpoint: The name of last layer, for early termination for
for V1-based networks: last layer is "layer_14", for V2: "layer_20"
output_stride: An integer that specifies the requested ratio of input to
output spatial resolution. If not None, then we invoke atrous convolution
if necessary to prevent the network from reducing the spatial resolution
of the activation maps. Allowed values are 1 or any even number, excluding
zero. Typical values are 8 (accurate fully convolutional mode), 16
(fast fully convolutional mode), and 32 (classification mode).
NOTE- output_stride relies on all consequent operators to support dilated
operators via "rate" parameter. This might require wrapping non-conv
operators to operate properly.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
scope: optional variable scope.
is_training: How to setup batch_norm and other ops. Note: most of the time
this does not need be set directly. Use mobilenet.training_scope() to set
up training instead. This parameter is here for backward compatibility
only. It is safe to set it to the value matching
training_scope(is_training=...). It is also safe to explicitly set
it to False, even if there is outer training_scope set to to training.
(The network will be built in inference mode).
Returns:
tensor_out: output tensor.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: depth_multiplier <= 0, or the target output_stride is not
allowed.
"""
if multiplier <= 0:
raise ValueError('multiplier is not greater than zero.')
# Set conv defs defaults and overrides.
conv_defs_defaults = conv_defs.get('defaults', {})
conv_defs_overrides = conv_defs.get('overrides', {})
if use_explicit_padding:
conv_defs_overrides = copy.deepcopy(conv_defs_overrides)
conv_defs_overrides[
(slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'}
if output_stride is not None:
if output_stride == 0 or (output_stride > 1 and output_stride % 2):
raise ValueError('Output stride must be None, 1 or a multiple of 2.')
# a) Set the tensorflow scope
# b) set padding to default: note we might consider removing this
# since it is also set by mobilenet_scope
# c) set all defaults
# d) set all extra overrides.
with _scope_all(scope, default_scope='Mobilenet'), \
slim.arg_scope([slim.batch_norm], is_training=is_training), \
_set_arg_scope_defaults(conv_defs_defaults), \
_set_arg_scope_defaults(conv_defs_overrides):
# The current_stride variable keeps track of the output stride of the
# activations, i.e., the running product of convolution strides up to the
# current network layer. This allows us to invoke atrous convolution
# whenever applying the next convolution would result in the activations
# having output stride larger than the target output_stride.
current_stride = 1
# The atrous convolution rate parameter.
rate = 1
net = inputs
# Insert default parameters before the base scope which includes
# any custom overrides set in mobilenet.
end_points = {}
scopes = {}
for i, opdef in enumerate(conv_defs['spec']):
params = dict(opdef.params)
opdef.multiplier_func(params, multiplier)
stride = params.get('stride', 1)
if output_stride is not None and current_stride == output_stride:
# If we have reached the target output_stride, then we need to employ
# atrous convolution with stride=1 and multiply the atrous rate by the
# current unit's stride for use in subsequent layers.
layer_stride = 1
layer_rate = rate
rate *= stride
else:
layer_stride = stride
layer_rate = 1
current_stride *= stride
# Update params.
params['stride'] = layer_stride
# Only insert rate to params if rate > 1.
if layer_rate > 1:
params['rate'] = layer_rate
# Set padding
if use_explicit_padding:
if 'kernel_size' in params:
net = _fixed_padding(net, params['kernel_size'], layer_rate)
else:
params['use_explicit_padding'] = True
end_point = 'layer_%d' % (i + 1)
try:
net = opdef.op(net, **params)
except Exception:
print('Failed to create op %i: %r params: %r' % (i, opdef, params))
raise
end_points[end_point] = net
scope = os.path.dirname(net.name)
scopes[scope] = end_point
if final_endpoint is not None and end_point == final_endpoint:
break
# Add all tensors that end with 'output' to
# endpoints
for t in net.graph.get_operations():
scope = os.path.dirname(t.name)
bn = os.path.basename(t.name)
if scope in scopes and t.name.endswith('output'):
end_points[scopes[scope] + '/' + bn] = t.outputs[0]
return net, end_points
@contextlib.contextmanager
def _scope_all(scope, default_scope=None):
with tf.variable_scope(scope, default_name=default_scope) as s,\
tf.name_scope(s.original_name_scope):
yield s
@slim.add_arg_scope
def mobilenet(inputs,
num_classes=1001,
prediction_fn=slim.softmax,
reuse=None,
scope='Mobilenet',
base_only=False,
**mobilenet_args):
"""Mobilenet model for classification, supports both V1 and V2.
Note: default mode is inference, use mobilenet.training_scope to create
training network.
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
prediction_fn: a function to get predictions out of logits
(default softmax).
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
base_only: if True will only create the base of the network (no pooling
and no logits).
**mobilenet_args: passed to mobilenet_base verbatim.
- conv_defs: list of conv defs
- multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
- output_stride: will ensure that the last layer has at most total stride.
If the architecture calls for more stride than that provided
(e.g. output_stride=16, but the architecture has 5 stride=2 operators),
it will replace output_stride with fractional convolutions using Atrous
Convolutions.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, num_classes]
end_points: a dictionary from components of the network to the corresponding
activation tensor.
Raises:
ValueError: Input rank is invalid.
"""
is_training = mobilenet_args.get('is_training', False)
input_shape = inputs.get_shape().as_list()
if len(input_shape) != 4:
raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
inputs = tf.identity(inputs, 'input')
net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args)
if base_only:
return net, end_points
net = tf.identity(net, name='embedding')
with tf.variable_scope('Logits'):
net = global_pool(net)
end_points['global_pool'] = net
if not num_classes:
return net, end_points
net = slim.dropout(net, scope='Dropout', is_training=is_training)
# 1 x 1 x num_classes
# Note: legacy scope name.
logits = slim.conv2d(
net,
num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='Conv2d_1c_1x1')
logits = tf.squeeze(logits, [1, 2])
logits = tf.identity(logits, name='output')
end_points['Logits'] = logits
if prediction_fn:
end_points['Predictions'] = prediction_fn(logits, 'Predictions')
return logits, end_points
def global_pool(input_tensor, pool_op=tf.nn.avg_pool):
"""Applies avg pool to produce 1x1 output.
NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
baked in average pool which has better support across hardware.
Args:
input_tensor: input tensor
pool_op: pooling op (avg pool is default)
Returns:
a tensor batch_size x 1 x 1 x depth.
"""
shape = input_tensor.get_shape().as_list()
if shape[1] is None or shape[2] is None:
kernel_size = tf.convert_to_tensor(
[1, tf.shape(input_tensor)[1],
tf.shape(input_tensor)[2], 1])
else:
kernel_size = [1, shape[1], shape[2], 1]
output = pool_op(
input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
# Recover output shape, for unknown shape.
output.set_shape([None, 1, 1, None])
return output
def training_scope(is_training=True,
weight_decay=0.00004,
stddev=0.09,
dropout_keep_prob=0.8,
bn_decay=0.997):
"""Defines Mobilenet training scope.
Usage:
with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
# the network created will be trainble with dropout/batch norm
# initialized appropriately.
Args:
is_training: if set to False this will ensure that all customizations are
set to non-training mode. This might be helpful for code that is reused
across both training/evaluation, but most of the time training_scope with
value False is not needed.
weight_decay: The weight decay to use for regularizing the model.
stddev: Standard deviation for initialization, if negative uses xavier.
dropout_keep_prob: dropout keep probability
bn_decay: decay for the batch norm moving averages.
Returns:
An argument scope to use via arg_scope.
"""
# Note: do not introduce parameters that would change the inference
# model here (for example whether to use bias), modify conv_def instead.
batch_norm_params = {
'is_training': is_training,
'decay': bn_decay,
}
if stddev < 0:
weight_intitializer = slim.initializers.xavier_initializer()
else:
weight_intitializer = tf.truncated_normal_initializer(stddev=stddev)
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope(
[slim.conv2d, slim.fully_connected, slim.separable_conv2d],
weights_initializer=weight_intitializer,
normalizer_fn=slim.batch_norm), \
slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\
slim.arg_scope([slim.batch_norm], **batch_norm_params), \
slim.arg_scope([slim.dropout], is_training=is_training,
keep_prob=dropout_keep_prob), \
slim.arg_scope([slim.conv2d], \
weights_regularizer=slim.l2_regularizer(weight_decay)), \
slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
return s
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of Mobilenet V2.
Architecture: https://arxiv.org/abs/1801.04381
The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
3.4 M parameters.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import tensorflow as tf
from nets.mobilenet import conv_blocks as ops
from nets.mobilenet import mobilenet as lib
slim = tf.contrib.slim
op = lib.op
expand_input = ops.expand_input_by_factor
# pyformat: disable
# Architecture: https://arxiv.org/abs/1801.04381
V2_DEF = dict(
defaults={
# Note: these parameters of batch norm affect the architecture
# that's why they are here and not in training_scope.
(slim.batch_norm,): {'center': True, 'scale': True},
(slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
},
(ops.expanded_conv,): {
'expansion_size': expand_input(6),
'split_expansion': 1,
'normalizer_fn': slim.batch_norm,
'residual': True
},
(slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
op(ops.expanded_conv,
expansion_size=expand_input(1, divisible_by=1),
num_outputs=16),
op(ops.expanded_conv, stride=2, num_outputs=24),
op(ops.expanded_conv, stride=1, num_outputs=24),
op(ops.expanded_conv, stride=2, num_outputs=32),
op(ops.expanded_conv, stride=1, num_outputs=32),
op(ops.expanded_conv, stride=1, num_outputs=32),
op(ops.expanded_conv, stride=2, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=2, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=320),
op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
],
)
# pyformat: enable
@slim.add_arg_scope
def mobilenet(input_tensor,
num_classes=1001,
depth_multiplier=1.0,
scope='MobilenetV2',
conv_defs=None,
finegrain_classification_mode=False,
min_depth=None,
divisible_by=None,
**kwargs):
"""Creates mobilenet V2 network.
Inference mode is created by default. To create training use training_scope
below.
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
Args:
input_tensor: The input tensor
num_classes: number of classes
depth_multiplier: The multiplier applied to scale number of
channels in each layer. Note: this is called depth multiplier in the
paper but the name is kept for consistency with slim's model builder.
scope: Scope of the operator
conv_defs: Allows to override default conv def.
finegrain_classification_mode: When set to True, the model
will keep the last layer large even for small multipliers. Following
https://arxiv.org/abs/1801.04381
suggests that it improves performance for ImageNet-type of problems.
*Note* ignored if final_endpoint makes the builder exit earlier.
min_depth: If provided, will ensure that all layers will have that
many channels after application of depth multiplier.
divisible_by: If provided will ensure that all layers # channels
will be divisible by this number.
**kwargs: passed directly to mobilenet.mobilenet:
prediciton_fn- what prediction function to use.
reuse-: whether to reuse variables (if reuse set to true, scope
must be given).
Returns:
logits/endpoints pair
Raises:
ValueError: On invalid arguments
"""
if conv_defs is None:
conv_defs = V2_DEF
if 'multiplier' in kwargs:
raise ValueError('mobilenetv2 doesn\'t support generic '
'multiplier parameter use "depth_multiplier" instead.')
if finegrain_classification_mode:
conv_defs = copy.deepcopy(conv_defs)
if depth_multiplier < 1:
conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
depth_args = {}
# NB: do not set depth_args unless they are provided to avoid overriding
# whatever default depth_multiplier might have thanks to arg_scope.
if min_depth is not None:
depth_args['min_depth'] = min_depth
if divisible_by is not None:
depth_args['divisible_by'] = divisible_by
with slim.arg_scope((lib.depth_multiplier,), **depth_args):
return lib.mobilenet(
input_tensor,
num_classes=num_classes,
conv_defs=conv_defs,
scope=scope,
multiplier=depth_multiplier,
**kwargs)
@slim.add_arg_scope
def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
"""Creates base of the mobilenet (no pooling and no logits) ."""
return mobilenet(input_tensor,
depth_multiplier=depth_multiplier,
base_only=True, **kwargs)
def training_scope(**kwargs):
"""Defines MobilenetV2 training scope.
Usage:
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
with slim.
Args:
**kwargs: Passed to mobilenet.training_scope. The following parameters
are supported:
weight_decay- The weight decay to use for regularizing the model.
stddev- Standard deviation for initialization, if negative uses xavier.
dropout_keep_prob- dropout keep probability
bn_decay- decay for the batch norm moving averages.
Returns:
An `arg_scope` to use for the mobilenet v2 model.
"""
return lib.training_scope(**kwargs)
__all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF']
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import tensorflow as tf
from nets.mobilenet import conv_blocks as ops
from nets.mobilenet import mobilenet
from nets.mobilenet import mobilenet_v2
slim = tf.contrib.slim
def find_ops(optype):
"""Find ops of a given type in graphdef or a graph.
Args:
optype: operation type (e.g. Conv2D)
Returns:
List of operations.
"""
gd = tf.get_default_graph()
return [var for var in gd.get_operations() if var.type == optype]
class MobilenetV2Test(tf.test.TestCase):
def setUp(self):
tf.reset_default_graph()
def testCreation(self):
spec = dict(mobilenet_v2.V2_DEF)
_, ep = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
num_convs = len(find_ops('Conv2D'))
# This is mostly a sanity test. No deep reason for these particular
# constants.
#
# All but first 2 and last one have two convolutions, and there is one
# extra conv that is not in the spec. (logits)
self.assertEqual(num_convs, len(spec['spec']) * 2 - 2)
# Check that depthwise are exposed.
for i in range(2, 17):
self.assertIn('layer_%d/depthwise_output' % i, ep)
def testCreationNoClasses(self):
spec = copy.deepcopy(mobilenet_v2.V2_DEF)
net, ep = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec,
num_classes=None)
self.assertIs(net, ep['global_pool'])
def testImageSizes(self):
for input_size, output_size in [(224, 7), (192, 6), (160, 5),
(128, 4), (96, 3)]:
tf.reset_default_graph()
_, ep = mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, input_size, input_size, 3)))
self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3],
[output_size] * 2)
def testWithSplits(self):
spec = copy.deepcopy(mobilenet_v2.V2_DEF)
spec['overrides'] = {
(ops.expanded_conv,): dict(split_expansion=2),
}
_, _ = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
num_convs = len(find_ops('Conv2D'))
# All but 3 op has 3 conv operatore, the remainign 3 have one
# and there is one unaccounted.
self.assertEqual(num_convs, len(spec['spec']) * 3 - 5)
def testWithOutputStride8(self):
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=8,
scope='MobilenetV2')
self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def testDivisibleBy(self):
tf.reset_default_graph()
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
divisible_by=16,
min_depth=32)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280,
1001], s)
def testDivisibleByWithArgScope(self):
tf.reset_default_graph()
# Verifies that depth_multiplier arg scope actually works
# if no default min_depth is provided.
with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 2)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
self.assertSameElements(s, [32, 192, 128, 1001])
def testFineGrained(self):
tf.reset_default_graph()
# Verifies that depth_multiplier arg scope actually works
# if no default min_depth is provided.
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 2)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01,
finegrain_classification_mode=True)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
# All convolutions will be 8->48, except for the last one.
self.assertSameElements(s, [8, 48, 1001, 1280])
def testMobilenetBase(self):
tf.reset_default_graph()
# Verifies that mobilenet_base returns pre-pooling layer.
with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
net, _ = mobilenet_v2.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
def testWithOutputStride16(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=16)
self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
def testWithOutputStride8AndExplicitPadding(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=8,
use_explicit_padding=True,
scope='MobilenetV2')
self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def testWithOutputStride16AndExplicitPadding(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=16,
use_explicit_padding=True)
self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
if __name__ == '__main__':
tf.test.main()
...@@ -55,7 +55,7 @@ The linked model tar files contain the following: ...@@ -55,7 +55,7 @@ The linked model tar files contain the following:
* Converted [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) flatbuffer model * Converted [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) flatbuffer model
Note that quantized model GraphDefs are still float models, they just have FakeQuantization Note that quantized model GraphDefs are still float models, they just have FakeQuantization
operation embedded to simulate quantization. These are converted by [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) operation embedded to simulate quantization. These are converted by [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/)
to be fully quantized. The final effect of quantization can be seen by comparing the frozen fake to be fully quantized. The final effect of quantization can be seen by comparing the frozen fake
quantized graph to the size of the TFLite flatbuffer, i.e. The TFLite flatbuffer is about 1/4 quantized graph to the size of the TFLite flatbuffer, i.e. The TFLite flatbuffer is about 1/4
the size. the size.
...@@ -127,4 +127,3 @@ $ ./bazel-bin/mobilenet_v1_eval --dataset_dir "path/to/dataset" --checkpoint_dir ...@@ -127,4 +127,3 @@ $ ./bazel-bin/mobilenet_v1_eval --dataset_dir "path/to/dataset" --checkpoint_dir
``` ```
The resulting float and quantized models can be run on-device via [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/). The resulting float and quantized models can be run on-device via [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/).
...@@ -428,7 +428,9 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): ...@@ -428,7 +428,9 @@ def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
def mobilenet_v1_arg_scope(is_training=True, def mobilenet_v1_arg_scope(is_training=True,
weight_decay=0.00004, weight_decay=0.00004,
stddev=0.09, stddev=0.09,
regularize_depthwise=False): regularize_depthwise=False,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001):
"""Defines the default MobilenetV1 arg scope. """Defines the default MobilenetV1 arg scope.
Args: Args:
...@@ -436,6 +438,9 @@ def mobilenet_v1_arg_scope(is_training=True, ...@@ -436,6 +438,9 @@ def mobilenet_v1_arg_scope(is_training=True,
weight_decay: The weight decay to use for regularizing the model. weight_decay: The weight decay to use for regularizing the model.
stddev: The standard deviation of the trunctated normal weight initializer. stddev: The standard deviation of the trunctated normal weight initializer.
regularize_depthwise: Whether or not apply regularization on depthwise. regularize_depthwise: Whether or not apply regularization on depthwise.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns: Returns:
An `arg_scope` to use for the mobilenet v1 model. An `arg_scope` to use for the mobilenet v1 model.
...@@ -444,8 +449,8 @@ def mobilenet_v1_arg_scope(is_training=True, ...@@ -444,8 +449,8 @@ def mobilenet_v1_arg_scope(is_training=True,
'is_training': is_training, 'is_training': is_training,
'center': True, 'center': True,
'scale': True, 'scale': True,
'decay': 0.9997, 'decay': batch_norm_decay,
'epsilon': 0.001, 'epsilon': batch_norm_epsilon,
} }
# Set weight_decay for weights in Conv and DepthSepConv layers. # Set weight_decay for weights in Conv and DepthSepConv layers.
......
...@@ -150,4 +150,3 @@ def main(unused_arg): ...@@ -150,4 +150,3 @@ def main(unused_arg):
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run(main) tf.app.run(main)
...@@ -210,4 +210,3 @@ def main(unused_arg): ...@@ -210,4 +210,3 @@ def main(unused_arg):
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run(main) tf.app.run(main)
...@@ -399,7 +399,7 @@ class NasNetABaseCell(object): ...@@ -399,7 +399,7 @@ class NasNetABaseCell(object):
@tf.contrib.framework.add_arg_scope # No public API. For internal use only. @tf.contrib.framework.add_arg_scope # No public API. For internal use only.
def _apply_drop_path(self, net, current_step=None, def _apply_drop_path(self, net, current_step=None,
use_summaries=True, drop_connect_version='v3'): use_summaries=False, drop_connect_version='v3'):
"""Apply drop_path regularization. """Apply drop_path regularization.
Args: Args:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment