Unverified Commit 57014e4c authored by gunan's avatar gunan Committed by GitHub
Browse files

Merge pull request #2627 from sguada/nasnet

Bring tensorflow/models slim up to date.
parents c46caa56 cbb62479
# Description: # Description:
# Contains files for loading, training and evaluating TF-Slim-based models. # Contains files for loading, training and evaluating TF-Slim-based models.
package(default_visibility = ["//visibility:public"]) package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"]) exports_files(["LICENSE"])
package_group(name = "internal")
py_library( py_library(
name = "dataset_utils", name = "dataset_utils",
srcs = ["datasets/dataset_utils.py"], srcs = ["datasets/dataset_utils.py"],
...@@ -35,6 +35,7 @@ py_binary( ...@@ -35,6 +35,7 @@ py_binary(
name = "build_imagenet_data", name = "build_imagenet_data",
srcs = ["datasets/build_imagenet_data.py"], srcs = ["datasets/build_imagenet_data.py"],
deps = [ deps = [
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -44,6 +45,7 @@ py_library( ...@@ -44,6 +45,7 @@ py_library(
srcs = ["datasets/download_and_convert_cifar10.py"], srcs = ["datasets/download_and_convert_cifar10.py"],
deps = [ deps = [
":dataset_utils", ":dataset_utils",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -62,6 +64,7 @@ py_library( ...@@ -62,6 +64,7 @@ py_library(
srcs = ["datasets/download_and_convert_mnist.py"], srcs = ["datasets/download_and_convert_mnist.py"],
deps = [ deps = [
":dataset_utils", ":dataset_utils",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -73,6 +76,7 @@ py_binary( ...@@ -73,6 +76,7 @@ py_binary(
":download_and_convert_cifar10", ":download_and_convert_cifar10",
":download_and_convert_flowers", ":download_and_convert_flowers",
":download_and_convert_mnist", ":download_and_convert_mnist",
"//tensorflow",
], ],
) )
...@@ -137,6 +141,7 @@ py_test( ...@@ -137,6 +141,7 @@ py_test(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":model_deploy", ":model_deploy",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -154,6 +159,7 @@ py_library( ...@@ -154,6 +159,7 @@ py_library(
srcs = ["preprocessing/inception_preprocessing.py"], srcs = ["preprocessing/inception_preprocessing.py"],
deps = [ deps = [
"//tensorflow", "//tensorflow",
"//tensorflow/python:control_flow_ops",
], ],
) )
...@@ -192,10 +198,13 @@ py_library( ...@@ -192,10 +198,13 @@ py_library(
deps = [ deps = [
":alexnet", ":alexnet",
":cifarnet", ":cifarnet",
":cyclegan",
":inception", ":inception",
":lenet", ":lenet",
":mobilenet_v1", ":mobilenet_v1",
":nasnet",
":overfeat", ":overfeat",
":pix2pix",
":resnet_v1", ":resnet_v1",
":resnet_v2", ":resnet_v2",
":vgg", ":vgg",
...@@ -206,6 +215,7 @@ py_library( ...@@ -206,6 +215,7 @@ py_library(
name = "alexnet", name = "alexnet",
srcs = ["nets/alexnet.py"], srcs = ["nets/alexnet.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_test( py_test(
...@@ -227,6 +237,45 @@ py_library( ...@@ -227,6 +237,45 @@ py_library(
], ],
) )
py_library(
name = "cyclegan",
srcs = ["nets/cyclegan.py"],
deps = [
# "//numpy",
"//tensorflow",
],
)
py_test(
name = "cyclegan_test",
srcs = ["nets/cyclegan_test.py"],
shard_count = 3,
srcs_version = "PY2AND3",
deps = [
":cyclegan",
"//tensorflow",
],
)
py_library(
name = "dcgan",
srcs = ["nets/dcgan.py"],
deps = [
"//tensorflow",
],
)
py_test(
name = "dcgan_test",
srcs = ["nets/dcgan_test.py"],
shard_count = 3,
srcs_version = "PY2AND3",
deps = [
":dcgan",
"//tensorflow",
],
)
py_library( py_library(
name = "inception", name = "inception",
srcs = ["nets/inception.py"], srcs = ["nets/inception.py"],
...@@ -244,6 +293,7 @@ py_library( ...@@ -244,6 +293,7 @@ py_library(
name = "inception_utils", name = "inception_utils",
srcs = ["nets/inception_utils.py"], srcs = ["nets/inception_utils.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_library( py_library(
...@@ -252,6 +302,7 @@ py_library( ...@@ -252,6 +302,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception_utils", ":inception_utils",
"//tensorflow",
], ],
) )
...@@ -261,6 +312,7 @@ py_library( ...@@ -261,6 +312,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception_utils", ":inception_utils",
"//tensorflow",
], ],
) )
...@@ -270,6 +322,7 @@ py_library( ...@@ -270,6 +322,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception_utils", ":inception_utils",
"//tensorflow",
], ],
) )
...@@ -279,6 +332,7 @@ py_library( ...@@ -279,6 +332,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception_utils", ":inception_utils",
"//tensorflow",
], ],
) )
...@@ -286,6 +340,7 @@ py_library( ...@@ -286,6 +340,7 @@ py_library(
name = "inception_resnet_v2", name = "inception_resnet_v2",
srcs = ["nets/inception_resnet_v2.py"], srcs = ["nets/inception_resnet_v2.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_test( py_test(
...@@ -296,6 +351,7 @@ py_test( ...@@ -296,6 +351,7 @@ py_test(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception", ":inception",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -308,6 +364,7 @@ py_test( ...@@ -308,6 +364,7 @@ py_test(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception", ":inception",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -320,6 +377,7 @@ py_test( ...@@ -320,6 +377,7 @@ py_test(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":inception", ":inception",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -360,6 +418,7 @@ py_library( ...@@ -360,6 +418,7 @@ py_library(
name = "mobilenet_v1", name = "mobilenet_v1",
srcs = ["nets/mobilenet_v1.py"], srcs = ["nets/mobilenet_v1.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_test( py_test(
...@@ -370,6 +429,49 @@ py_test( ...@@ -370,6 +429,49 @@ py_test(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":mobilenet_v1", ":mobilenet_v1",
# "//numpy",
"//tensorflow",
],
)
py_library(
name = "nasnet_utils",
srcs = ["nets/nasnet/nasnet_utils.py"],
srcs_version = "PY2AND3",
deps = [
"//tensorflow",
],
)
py_library(
name = "nasnet",
srcs = ["nets/nasnet/nasnet.py"],
srcs_version = "PY2AND3",
deps = [
":nasnet_utils",
"//tensorflow",
],
)
py_test(
name = "nasnet_utils_test",
size = "medium",
srcs = ["nets/nasnet/nasnet_utils_test.py"],
srcs_version = "PY2AND3",
deps = [
":nasnet_utils",
"//tensorflow",
],
)
py_test(
name = "nasnet_test",
size = "large",
srcs = ["nets/nasnet/nasnet_test.py"],
shard_count = 10,
srcs_version = "PY2AND3",
deps = [
":nasnet",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -378,6 +480,7 @@ py_library( ...@@ -378,6 +480,7 @@ py_library(
name = "overfeat", name = "overfeat",
srcs = ["nets/overfeat.py"], srcs = ["nets/overfeat.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_test( py_test(
...@@ -391,10 +494,28 @@ py_test( ...@@ -391,10 +494,28 @@ py_test(
], ],
) )
py_library(
name = "pix2pix",
srcs = ["nets/pix2pix.py"],
srcs_version = "PY2AND3",
deps = ["//tensorflow"],
)
py_test(
name = "pix2pix_test",
srcs = ["nets/pix2pix_test.py"],
srcs_version = "PY2AND3",
deps = [
":pix2pix",
"//tensorflow",
],
)
py_library( py_library(
name = "resnet_utils", name = "resnet_utils",
srcs = ["nets/resnet_utils.py"], srcs = ["nets/resnet_utils.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_library( py_library(
...@@ -403,6 +524,7 @@ py_library( ...@@ -403,6 +524,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":resnet_utils", ":resnet_utils",
"//tensorflow",
], ],
) )
...@@ -413,7 +535,9 @@ py_test( ...@@ -413,7 +535,9 @@ py_test(
shard_count = 2, shard_count = 2,
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":resnet_utils",
":resnet_v1", ":resnet_v1",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -424,6 +548,7 @@ py_library( ...@@ -424,6 +548,7 @@ py_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":resnet_utils", ":resnet_utils",
"//tensorflow",
], ],
) )
...@@ -434,7 +559,9 @@ py_test( ...@@ -434,7 +559,9 @@ py_test(
shard_count = 2, shard_count = 2,
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":resnet_utils",
":resnet_v2", ":resnet_v2",
# "//numpy",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -443,6 +570,7 @@ py_library( ...@@ -443,6 +570,7 @@ py_library(
name = "vgg", name = "vgg",
srcs = ["nets/vgg.py"], srcs = ["nets/vgg.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["//tensorflow"],
) )
py_test( py_test(
...@@ -494,7 +622,6 @@ py_binary( ...@@ -494,7 +622,6 @@ py_binary(
srcs = ["eval_image_classifier.py"], srcs = ["eval_image_classifier.py"],
deps = [ deps = [
":dataset_factory", ":dataset_factory",
":model_deploy",
":nets_factory", ":nets_factory",
":preprocessing_factory", ":preprocessing_factory",
"//tensorflow", "//tensorflow",
...@@ -508,6 +635,7 @@ py_binary( ...@@ -508,6 +635,7 @@ py_binary(
":dataset_factory", ":dataset_factory",
":nets_factory", ":nets_factory",
"//tensorflow", "//tensorflow",
"//tensorflow/python:platform",
], ],
) )
...@@ -521,7 +649,7 @@ py_test( ...@@ -521,7 +649,7 @@ py_test(
], ],
deps = [ deps = [
":export_inference_graph", ":export_inference_graph",
":nets_factory",
"//tensorflow", "//tensorflow",
"//tensorflow/python:platform",
], ],
) )
...@@ -259,12 +259,16 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy | ...@@ -259,12 +259,16 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy |
[MobileNet_v1_1.0_224](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_1.0_224_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|70.7|89.5| [MobileNet_v1_1.0_224](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_1.0_224_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz)|70.7|89.5|
[MobileNet_v1_0.50_160](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.50_160_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|59.9|82.5| [MobileNet_v1_0.50_160](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.50_160_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_0.50_160_2017_06_14.tar.gz)|59.9|82.5|
[MobileNet_v1_0.25_128](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.25_128_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|41.3|66.2| [MobileNet_v1_0.25_128](https://arxiv.org/pdf/1704.04861.pdf)|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py)|[mobilenet_v1_0.25_128_2017_06_14.tar.gz](http://download.tensorflow.org/models/mobilenet_v1_0.25_128_2017_06_14.tar.gz)|41.3|66.2|
[NASNet-A_Mobile_224](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_mobile_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz)|74.0|91.6|
[NASNet-A_Large_331](https://arxiv.org/abs/1707.07012)#|[Code](https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py)|[nasnet-a_large_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz)|82.7|96.2|
^ ResNet V2 models use Inception pre-processing and input image size of 299 (use ^ ResNet V2 models use Inception pre-processing and input image size of 299 (use
`--preprocessing_name inception --eval_image_size 299` when using `--preprocessing_name inception --eval_image_size 299` when using
`eval_image_classifier.py`). Performance numbers for ResNet V2 models are `eval_image_classifier.py`). Performance numbers for ResNet V2 models are
reported on the ImageNet validation set. reported on the ImageNet validation set.
(#) More information and details about the NASNet architectures are available at this [README](nets/nasnet/README.md)
All 16 MobileNet Models reported in the [MobileNet Paper](https://arxiv.org/abs/1704.04861) can be found [here](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet_v1.md). All 16 MobileNet Models reported in the [MobileNet Paper](https://arxiv.org/abs/1704.04861) can be found [here](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet_v1.md).
(\*): Results quoted from the [paper](https://arxiv.org/abs/1603.05027). (\*): Results quoted from the [paper](https://arxiv.org/abs/1603.05027).
...@@ -393,7 +397,8 @@ $ python eval_image_classifier.py \ ...@@ -393,7 +397,8 @@ $ python eval_image_classifier.py \
--model_name=inception_v3 --model_name=inception_v3
``` ```
See the [evaluation module example](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim#evaluation-loop) for an example of how to evaluate a model at multiple checkpoints during or after the training. See the [evaluation module example](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim#evaluation-loop)
for an example of how to evaluate a model at multiple checkpoints during or after the training.
# Exporting the Inference Graph # Exporting the Inference Graph
<a id='Export'></a> <a id='Export'></a>
......
...@@ -45,11 +45,11 @@ ...@@ -45,11 +45,11 @@
# downloading the raw images. # downloading the raw images.
# #
# usage: # usage:
# ./download_and_preprocess_imagenet.sh [data-dir] # ./download_and_convert_imagenet.sh [data-dir]
set -e set -e
if [ -z "$1" ]; then if [ -z "$1" ]; then
echo "usage download_and_preprocess_imagenet.sh [data dir]" echo "usage download_and_convert_imagenet.sh [data dir]"
exit exit
fi fi
......
...@@ -103,6 +103,8 @@ import collections ...@@ -103,6 +103,8 @@ import collections
import tensorflow as tf import tensorflow as tf
from tensorflow.python.eager import context
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -342,7 +344,13 @@ def deploy(config, ...@@ -342,7 +344,13 @@ def deploy(config,
Returns: Returns:
A `DeployedModel` namedtuple. A `DeployedModel` namedtuple.
Raises:
RuntimeError: If eager execution is enabled.
""" """
if context.in_eager_mode():
raise RuntimeError(
'slim.deploy is not supported when eager execution is enabled.')
# Gather initial summaries. # Gather initial summaries.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
......
...@@ -158,7 +158,7 @@ def LogisticClassifier(inputs, labels, scope=None, reuse=None): ...@@ -158,7 +158,7 @@ def LogisticClassifier(inputs, labels, scope=None, reuse=None):
def BatchNormClassifier(inputs, labels, scope=None, reuse=None): def BatchNormClassifier(inputs, labels, scope=None, reuse=None):
with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels], with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],
reuse=reuse): reuse=reuse):
inputs = slim.batch_norm(inputs, decay=0.1) inputs = slim.batch_norm(inputs, decay=0.1, fused=True)
predictions = slim.fully_connected(inputs, 1, predictions = slim.fully_connected(inputs, 1,
activation_fn=tf.sigmoid, activation_fn=tf.sigmoid,
scope='fully_connected') scope='fully_connected')
...@@ -476,6 +476,11 @@ class DeployTest(tf.test.TestCase): ...@@ -476,6 +476,11 @@ class DeployTest(tf.test.TestCase):
j = int(2 * self._labels[i] + np.random.randint(0, 2)) j = int(2 * self._labels[i] + np.random.randint(0, 2))
self._inputs[i, j] = 1 self._inputs[i, j] = 1
def _addBesselsCorrection(self, sample_size, expected_var):
correction_factor = sample_size / (sample_size - 1)
expected_var *= correction_factor
return expected_var
def testLocalTrainOp(self): def testLocalTrainOp(self):
g = tf.Graph() g = tf.Graph()
with g.as_default(): with g.as_default():
...@@ -519,9 +524,11 @@ class DeployTest(tf.test.TestCase): ...@@ -519,9 +524,11 @@ class DeployTest(tf.test.TestCase):
final_mean, final_variance = sess.run([moving_mean, final_mean, final_variance = sess.run([moving_mean,
moving_variance]) moving_variance])
self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25]) expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
self.assertAllClose(final_variance, [0.109375, 0.1875, expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
0.234375, 0.1875]) expected_var = self._addBesselsCorrection(16, expected_var)
self.assertAllClose(final_mean, expected_mean)
self.assertAllClose(final_variance, expected_var)
def testNoSummariesOnGPU(self): def testNoSummariesOnGPU(self):
with tf.Graph().as_default(): with tf.Graph().as_default():
......
...@@ -57,7 +57,8 @@ def alexnet_v2(inputs, ...@@ -57,7 +57,8 @@ def alexnet_v2(inputs,
is_training=True, is_training=True,
dropout_keep_prob=0.5, dropout_keep_prob=0.5,
spatial_squeeze=True, spatial_squeeze=True,
scope='alexnet_v2'): scope='alexnet_v2',
global_pool=False):
"""AlexNet version 2. """AlexNet version 2.
Described in: http://arxiv.org/pdf/1404.5997v2.pdf Described in: http://arxiv.org/pdf/1404.5997v2.pdf
...@@ -66,26 +67,34 @@ def alexnet_v2(inputs, ...@@ -66,26 +67,34 @@ def alexnet_v2(inputs,
layers-imagenet-1gpu.cfg layers-imagenet-1gpu.cfg
Note: All the fully_connected layers have been transformed to conv2d layers. Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224. To use in fully To use in classification mode, resize input to 224x224 or set
convolutional mode, set spatial_squeeze to false. global_pool=True. To use in fully convolutional mode, set
spatial_squeeze to false.
The LRN layers have been removed and change the initializers from The LRN layers have been removed and change the initializers from
random_normal_initializer to xavier_initializer. random_normal_initializer to xavier_initializer.
Args: Args:
inputs: a tensor of size [batch_size, height, width, channels]. inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: the number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained. is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout dropout_keep_prob: the probability that activations are kept in the dropout
layers during training. layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the spatial_squeeze: whether or not should squeeze the spatial dimensions of the
outputs. Useful to remove unnecessary dimensions for classification. logits. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables. scope: Optional scope for the variables.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original AlexNet.)
Returns: Returns:
the last op containing the log predictions and end_points dict. net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0
or None).
end_points: a dict of tensors with intermediate activations.
""" """
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.name + '_end_points' end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d. # Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]): outputs_collections=[end_points_collection]):
...@@ -108,6 +117,13 @@ def alexnet_v2(inputs, ...@@ -108,6 +117,13 @@ def alexnet_v2(inputs,
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6') scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if global_pool:
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
if num_classes:
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7') scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1], net = slim.conv2d(net, num_classes, [1, 1],
...@@ -115,9 +131,6 @@ def alexnet_v2(inputs, ...@@ -115,9 +131,6 @@ def alexnet_v2(inputs,
normalizer_fn=None, normalizer_fn=None,
biases_initializer=tf.zeros_initializer(), biases_initializer=tf.zeros_initializer(),
scope='fc8') scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze: if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net end_points[sc.name + '/fc8'] = net
......
...@@ -48,6 +48,18 @@ class AlexnetV2Test(tf.test.TestCase): ...@@ -48,6 +48,18 @@ class AlexnetV2Test(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 4, 7, num_classes]) [batch_size, 4, 7, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 300, 400
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self): def testEndPoints(self):
batch_size = 5 batch_size = 5
height, width = 224, 224 height, width = 224, 224
...@@ -69,6 +81,29 @@ class AlexnetV2Test(tf.test.TestCase): ...@@ -69,6 +81,29 @@ class AlexnetV2Test(tf.test.TestCase):
] ]
self.assertSetEqual(set(end_points.keys()), set(expected_names)) self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 224, 224
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = alexnet.alexnet_v2(inputs, num_classes)
expected_names = ['alexnet_v2/conv1',
'alexnet_v2/pool1',
'alexnet_v2/conv2',
'alexnet_v2/pool2',
'alexnet_v2/conv3',
'alexnet_v2/conv4',
'alexnet_v2/conv5',
'alexnet_v2/pool5',
'alexnet_v2/fc6',
'alexnet_v2/fc7'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('alexnet_v2/fc7'))
self.assertListEqual(net.get_shape().as_list(),
[batch_size, 1, 1, 4096])
def testModelVariables(self): def testModelVariables(self):
batch_size = 5 batch_size = 5
height, width = 224, 224 height, width = 224, 224
......
...@@ -42,7 +42,9 @@ def cifarnet(images, num_classes=10, is_training=False, ...@@ -42,7 +42,9 @@ def cifarnet(images, num_classes=10, is_training=False,
Args: Args:
images: A batch of `Tensors` of size [batch_size, height, width, channels]. images: A batch of `Tensors` of size [batch_size, height, width, channels].
num_classes: the number of classes in the dataset. num_classes: the number of classes in the dataset. If 0 or None, the logits
layer is omitted and the input features to the logits layer are returned
instead.
is_training: specifies whether or not we're currently training the model. is_training: specifies whether or not we're currently training the model.
This variable will determine the behaviour of the dropout layer. This variable will determine the behaviour of the dropout layer.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
...@@ -50,14 +52,15 @@ def cifarnet(images, num_classes=10, is_training=False, ...@@ -50,14 +52,15 @@ def cifarnet(images, num_classes=10, is_training=False,
scope: Optional variable_scope. scope: Optional variable_scope.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, `num_classes`] is a non-zero integer, or the input to the logits layer if num_classes
is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
""" """
end_points = {} end_points = {}
with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): with tf.variable_scope(scope, 'CifarNet', [images]):
net = slim.conv2d(images, 64, [5, 5], scope='conv1') net = slim.conv2d(images, 64, [5, 5], scope='conv1')
end_points['conv1'] = net end_points['conv1'] = net
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
...@@ -76,6 +79,8 @@ def cifarnet(images, num_classes=10, is_training=False, ...@@ -76,6 +79,8 @@ def cifarnet(images, num_classes=10, is_training=False,
scope='dropout3') scope='dropout3')
net = slim.fully_connected(net, 192, scope='fc4') net = slim.fully_connected(net, 192, scope='fc4')
end_points['fc4'] = net end_points['fc4'] = net
if not num_classes:
return net, end_points
logits = slim.fully_connected(net, num_classes, logits = slim.fully_connected(net, num_classes,
biases_initializer=tf.zeros_initializer(), biases_initializer=tf.zeros_initializer(),
weights_initializer=trunc_normal(1/192.0), weights_initializer=trunc_normal(1/192.0),
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines the CycleGAN generator and discriminator networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import google3
import numpy as np
import tensorflow as tf
layers = tf.contrib.layers
def cyclegan_arg_scope(instance_norm_center=True,
instance_norm_scale=True,
instance_norm_epsilon=0.001,
weights_init_stddev=0.02,
weight_decay=0.0):
"""Returns a default argument scope for all generators and discriminators.
Args:
instance_norm_center: Whether instance normalization applies centering.
instance_norm_scale: Whether instance normalization applies scaling.
instance_norm_epsilon: Small float added to the variance in the instance
normalization to avoid dividing by zero.
weights_init_stddev: Standard deviation of the random values to initialize
the convolution kernels with.
weight_decay: Magnitude of weight decay applied to all convolution kernel
variables of the generator.
Returns:
An arg-scope.
"""
instance_norm_params = {
'center': instance_norm_center,
'scale': instance_norm_scale,
'epsilon': instance_norm_epsilon,
}
weights_regularizer = None
if weight_decay and weight_decay > 0.0:
weights_regularizer = layers.l2_regularizer(weight_decay)
with tf.contrib.framework.arg_scope(
[layers.conv2d],
normalizer_fn=layers.instance_norm,
normalizer_params=instance_norm_params,
weights_initializer=tf.random_normal_initializer(0, weights_init_stddev),
weights_regularizer=weights_regularizer) as sc:
return sc
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose'):
"""Upsamples the given inputs.
Args:
net: A Tensor of size [batch_size, height, width, filters].
num_outputs: The number of output filters.
stride: A list of 2 scalars or a 1x2 Tensor indicating the scale,
relative to the inputs, of the output dimensions. For example, if kernel
size is [2, 3], then the output height and width will be twice and three
times the input size.
method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
or 'conv2d_transpose'.
Returns:
A Tensor which was upsampled using the specified method.
Raises:
ValueError: if `method` is not recognized.
"""
with tf.variable_scope('upconv'):
net_shape = tf.shape(net)
height = net_shape[1]
width = net_shape[2]
# Reflection pad by 1 in spatial dimensions (axes 1, 2 = h, w) to make a 3x3
# 'valid' convolution produce an output with the same dimension as the
# input.
spatial_pad_1 = np.array([[0, 0], [1, 1], [1, 1], [0, 0]])
if method == 'nn_upsample_conv':
net = tf.image.resize_nearest_neighbor(
net, [stride[0] * height, stride[1] * width])
net = tf.pad(net, spatial_pad_1, 'REFLECT')
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
if method == 'bilinear_upsample_conv':
net = tf.image.resize_bilinear(
net, [stride[0] * height, stride[1] * width])
net = tf.pad(net, spatial_pad_1, 'REFLECT')
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'conv2d_transpose':
net = layers.conv2d_transpose(
net, num_outputs, kernel_size=[3, 3], stride=stride, padding='same')
else:
raise ValueError('Unknown method: [%s]', method)
return net
def _dynamic_or_static_shape(tensor):
shape = tf.shape(tensor)
static_shape = tf.contrib.util.constant_value(shape)
return static_shape if static_shape is not None else shape
def cyclegan_generator_resnet(images,
arg_scope_fn=cyclegan_arg_scope,
num_resnet_blocks=6,
num_filters=64,
upsample_fn=cyclegan_upsample,
kernel_size=3,
num_outputs=3,
tanh_linear_slope=0.0,
is_training=False):
"""Defines the cyclegan resnet network architecture.
As closely as possible following
https://github.com/junyanz/CycleGAN/blob/master/models/architectures.lua#L232
FYI: This network requires input height and width to be divisible by 4 in
order to generate an output with shape equal to input shape. Assertions will
catch this if input dimensions are known at graph construction time, but
there's no protection if unknown at graph construction time (you'll see an
error).
Args:
images: Input image tensor of shape [batch_size, h, w, 3].
arg_scope_fn: Function to create the global arg_scope for the network.
num_resnet_blocks: Number of ResNet blocks in the middle of the generator.
num_filters: Number of filters of the first hidden layer.
upsample_fn: Upsampling function for the decoder part of the generator.
kernel_size: Size w or list/tuple [h, w] of the filter kernels for all inner
layers.
num_outputs: Number of output layers. Defaults to 3 for RGB.
tanh_linear_slope: Slope of the linear function to add to the tanh over the
logits.
is_training: Whether the network is created in training mode or inference
only mode. Not actually needed, just for compliance with other generator
network functions.
Returns:
A `Tensor` representing the model output and a dictionary of model end
points.
Raises:
ValueError: If the input height or width is known at graph construction time
and not a multiple of 4.
"""
# Neither dropout nor batch norm -> dont need is_training
del is_training
end_points = {}
input_size = images.shape.as_list()
height, width = input_size[1], input_size[2]
if height and height % 4 != 0:
raise ValueError('The input height must be a multiple of 4.')
if width and width % 4 != 0:
raise ValueError('The input width must be a multiple of 4.')
if not isinstance(kernel_size, (list, tuple)):
kernel_size = [kernel_size, kernel_size]
kernel_height = kernel_size[0]
kernel_width = kernel_size[1]
pad_top = (kernel_height - 1) // 2
pad_bottom = kernel_height // 2
pad_left = (kernel_width - 1) // 2
pad_right = kernel_width // 2
paddings = np.array(
[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
dtype=np.int32)
spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]])
with tf.contrib.framework.arg_scope(arg_scope_fn()):
###########
# Encoder #
###########
with tf.variable_scope('input'):
# 7x7 input stage
net = tf.pad(images, spatial_pad_3, 'REFLECT')
net = layers.conv2d(net, num_filters, kernel_size=[7, 7], padding='VALID')
end_points['encoder_0'] = net
with tf.variable_scope('encoder'):
with tf.contrib.framework.arg_scope(
[layers.conv2d],
kernel_size=kernel_size,
stride=2,
activation_fn=tf.nn.relu,
padding='VALID'):
net = tf.pad(net, paddings, 'REFLECT')
net = layers.conv2d(net, num_filters * 2)
end_points['encoder_1'] = net
net = tf.pad(net, paddings, 'REFLECT')
net = layers.conv2d(net, num_filters * 4)
end_points['encoder_2'] = net
###################
# Residual Blocks #
###################
with tf.variable_scope('residual_blocks'):
with tf.contrib.framework.arg_scope(
[layers.conv2d],
kernel_size=kernel_size,
stride=1,
activation_fn=tf.nn.relu,
padding='VALID'):
for block_id in xrange(num_resnet_blocks):
with tf.variable_scope('block_{}'.format(block_id)):
res_net = tf.pad(net, paddings, 'REFLECT')
res_net = layers.conv2d(res_net, num_filters * 4)
res_net = tf.pad(res_net, paddings, 'REFLECT')
res_net = layers.conv2d(res_net, num_filters * 4,
activation_fn=None)
net += res_net
end_points['resnet_block_%d' % block_id] = net
###########
# Decoder #
###########
with tf.variable_scope('decoder'):
with tf.contrib.framework.arg_scope(
[layers.conv2d],
kernel_size=kernel_size,
stride=1,
activation_fn=tf.nn.relu):
with tf.variable_scope('decoder1'):
net = upsample_fn(net, num_outputs=num_filters * 2, stride=[2, 2])
end_points['decoder1'] = net
with tf.variable_scope('decoder2'):
net = upsample_fn(net, num_outputs=num_filters, stride=[2, 2])
end_points['decoder2'] = net
with tf.variable_scope('output'):
net = tf.pad(net, spatial_pad_3, 'REFLECT')
logits = layers.conv2d(
net,
num_outputs, [7, 7],
activation_fn=None,
normalizer_fn=None,
padding='valid')
logits = tf.reshape(logits, _dynamic_or_static_shape(images))
end_points['logits'] = logits
end_points['predictions'] = tf.tanh(logits) + logits * tanh_linear_slope
return end_points['predictions'], end_points
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow.contrib.slim.nets.cyclegan."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import cyclegan
# TODO(joelshor): Add a test to check generator endpoints.
class CycleganTest(tf.test.TestCase):
def test_generator_inference(self):
"""Check one inference step."""
img_batch = tf.zeros([2, 32, 32, 3])
model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(model_output)
def _test_generator_graph_helper(self, shape):
"""Check that generator can take small and non-square inputs."""
output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape))
self.assertAllEqual(shape, output_imgs.shape.as_list())
def test_generator_graph_small(self):
self._test_generator_graph_helper([4, 32, 32, 3])
def test_generator_graph_medium(self):
self._test_generator_graph_helper([3, 128, 128, 3])
def test_generator_graph_nonsquare(self):
self._test_generator_graph_helper([2, 80, 400, 3])
def test_generator_unknown_batch_dim(self):
"""Check that generator can take unknown batch dimension inputs."""
img = tf.placeholder(tf.float32, shape=[None, 32, None, 3])
output_imgs, _ = cyclegan.cyclegan_generator_resnet(img)
self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list())
def _input_and_output_same_shape_helper(self, kernel_size):
img_batch = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
output_img_batch, _ = cyclegan.cyclegan_generator_resnet(
img_batch, kernel_size=kernel_size)
self.assertAllEqual(img_batch.shape.as_list(),
output_img_batch.shape.as_list())
def input_and_output_same_shape_kernel3(self):
self._input_and_output_same_shape_helper(3)
def input_and_output_same_shape_kernel4(self):
self._input_and_output_same_shape_helper(4)
def input_and_output_same_shape_kernel5(self):
self._input_and_output_same_shape_helper(5)
def input_and_output_same_shape_kernel6(self):
self._input_and_output_same_shape_helper(6)
def _error_if_height_not_multiple_of_four_helper(self, height):
self.assertRaisesRegexp(
ValueError,
'The input height must be a multiple of 4.',
cyclegan.cyclegan_generator_resnet,
tf.placeholder(tf.float32, shape=[None, height, 32, 3]))
def test_error_if_height_not_multiple_of_four_height29(self):
self._error_if_height_not_multiple_of_four_helper(29)
def test_error_if_height_not_multiple_of_four_height30(self):
self._error_if_height_not_multiple_of_four_helper(30)
def test_error_if_height_not_multiple_of_four_height31(self):
self._error_if_height_not_multiple_of_four_helper(31)
def _error_if_width_not_multiple_of_four_helper(self, width):
self.assertRaisesRegexp(
ValueError,
'The input width must be a multiple of 4.',
cyclegan.cyclegan_generator_resnet,
tf.placeholder(tf.float32, shape=[None, 32, width, 3]))
def test_error_if_width_not_multiple_of_four_width29(self):
self._error_if_width_not_multiple_of_four_helper(29)
def test_error_if_width_not_multiple_of_four_width30(self):
self._error_if_width_not_multiple_of_four_helper(30)
def test_error_if_width_not_multiple_of_four_width31(self):
self._error_if_width_not_multiple_of_four_helper(31)
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from math import log
import google3
import tensorflow as tf
slim = tf.contrib.slim
def _validate_image_inputs(inputs):
inputs.get_shape().assert_has_rank(4)
inputs.get_shape()[1:3].assert_is_fully_defined()
if inputs.get_shape()[1] != inputs.get_shape()[2]:
raise ValueError('Input tensor does not have equal width and height: ',
inputs.get_shape()[1:3])
width = inputs.get_shape().as_list()[1]
if log(width, 2) != int(log(width, 2)):
raise ValueError('Input tensor `width` is not a power of 2: ', width)
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
# setups need the gradient of gradient FusedBatchNormGrad.
def discriminator(inputs,
depth=64,
is_training=True,
reuse=None,
scope='Discriminator',
fused_batch_norm=False):
"""Discriminator network for DCGAN.
Construct discriminator network from inputs to the final endpoint.
Args:
inputs: A tensor of size [batch_size, height, width, channels]. Must be
floating point.
depth: Number of channels in first convolution layer.
is_training: Whether the network is for training or not.
reuse: Whether or not the network variables should be reused. `scope`
must be given to be reused.
scope: Optional variable_scope.
fused_batch_norm: If `True`, use a faster, fused implementation of
batch norm.
Returns:
logits: The pre-softmax activations, a tensor of size [batch_size, 1]
end_points: a dictionary from components of the network to their activation.
Raises:
ValueError: If the input image shape is not 4-dimensional, if the spatial
dimensions aren't defined at graph construction time, if the spatial
dimensions aren't square, or if the spatial dimensions aren't a power of
two.
"""
normalizer_fn = slim.batch_norm
normalizer_fn_args = {
'is_training': is_training,
'zero_debias_moving_mean': True,
'fused': fused_batch_norm,
}
_validate_image_inputs(inputs)
inp_shape = inputs.get_shape().as_list()[1]
end_points = {}
with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
with slim.arg_scope([slim.conv2d],
stride=2,
kernel_size=4,
activation_fn=tf.nn.leaky_relu):
net = inputs
for i in xrange(int(log(inp_shape, 2))):
scope = 'conv%i' % (i + 1)
current_depth = depth * 2**i
normalizer_fn_ = None if i == 0 else normalizer_fn
net = slim.conv2d(
net, current_depth, normalizer_fn=normalizer_fn_, scope=scope)
end_points[scope] = net
logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID',
normalizer_fn=None, activation_fn=None)
logits = tf.reshape(logits, [-1, 1])
end_points['logits'] = logits
return logits, end_points
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
# setups need the gradient of gradient FusedBatchNormGrad.
def generator(inputs,
depth=64,
final_size=32,
num_outputs=3,
is_training=True,
reuse=None,
scope='Generator',
fused_batch_norm=False):
"""Generator network for DCGAN.
Construct generator network from inputs to the final endpoint.
Args:
inputs: A tensor with any size N. [batch_size, N]
depth: Number of channels in last deconvolution layer.
final_size: The shape of the final output.
num_outputs: Number of output features. For images, this is the number of
channels.
is_training: whether is training or not.
reuse: Whether or not the network has its variables should be reused. scope
must be given to be reused.
scope: Optional variable_scope.
fused_batch_norm: If `True`, use a faster, fused implementation of
batch norm.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, 32, 32, channels]
end_points: a dictionary from components of the network to their activation.
Raises:
ValueError: If `inputs` is not 2-dimensional.
ValueError: If `final_size` isn't a power of 2 or is less than 8.
"""
normalizer_fn = slim.batch_norm
normalizer_fn_args = {
'is_training': is_training,
'zero_debias_moving_mean': True,
'fused': fused_batch_norm,
}
inputs.get_shape().assert_has_rank(2)
if log(final_size, 2) != int(log(final_size, 2)):
raise ValueError('`final_size` (%i) must be a power of 2.' % final_size)
if final_size < 8:
raise ValueError('`final_size` (%i) must be greater than 8.' % final_size)
end_points = {}
num_layers = int(log(final_size, 2)) - 1
with tf.variable_scope(scope, values=[inputs], reuse=reuse) as scope:
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
with slim.arg_scope([slim.conv2d_transpose],
normalizer_fn=normalizer_fn,
stride=2,
kernel_size=4):
net = tf.expand_dims(tf.expand_dims(inputs, 1), 1)
# First upscaling is different because it takes the input vector.
current_depth = depth * 2 ** (num_layers - 1)
scope = 'deconv1'
net = slim.conv2d_transpose(
net, current_depth, stride=1, padding='VALID', scope=scope)
end_points[scope] = net
for i in xrange(2, num_layers):
scope = 'deconv%i' % (i)
current_depth = depth * 2 ** (num_layers - i)
net = slim.conv2d_transpose(net, current_depth, scope=scope)
end_points[scope] = net
# Last layer has different normalizer and activation.
scope = 'deconv%i' % (num_layers)
net = slim.conv2d_transpose(
net, depth, normalizer_fn=None, activation_fn=None, scope=scope)
end_points[scope] = net
# Convert to proper channels.
scope = 'logits'
logits = slim.conv2d(
net,
num_outputs,
normalizer_fn=None,
activation_fn=None,
kernel_size=1,
stride=1,
padding='VALID',
scope=scope)
end_points[scope] = logits
logits.get_shape().assert_has_rank(4)
logits.get_shape().assert_is_compatible_with(
[None, final_size, final_size, num_outputs])
return logits, end_points
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dcgan."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import dcgan
class DCGANTest(tf.test.TestCase):
def test_generator_run(self):
tf.set_random_seed(1234)
noise = tf.random_normal([100, 64])
image, _ = dcgan.generator(noise)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
image.eval()
def test_generator_graph(self):
tf.set_random_seed(1234)
# Check graph construction for a number of image size/depths and batch
# sizes.
for i, batch_size in zip(xrange(3, 7), xrange(3, 8)):
tf.reset_default_graph()
final_size = 2 ** i
noise = tf.random_normal([batch_size, 64])
image, end_points = dcgan.generator(
noise,
depth=32,
final_size=final_size)
self.assertAllEqual([batch_size, final_size, final_size, 3],
image.shape.as_list())
expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits']
self.assertSetEqual(set(expected_names), set(end_points.keys()))
# Check layer depths.
for j in range(1, i):
layer = end_points['deconv%i' % j]
self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
def test_generator_invalid_input(self):
wrong_dim_input = tf.zeros([5, 32, 32])
with self.assertRaises(ValueError):
dcgan.generator(wrong_dim_input)
correct_input = tf.zeros([3, 2])
with self.assertRaisesRegexp(ValueError, 'must be a power of 2'):
dcgan.generator(correct_input, final_size=30)
with self.assertRaisesRegexp(ValueError, 'must be greater than 8'):
dcgan.generator(correct_input, final_size=4)
def test_discriminator_run(self):
image = tf.random_uniform([5, 32, 32, 3], -1, 1)
output, _ = dcgan.discriminator(image)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
output.eval()
def test_discriminator_graph(self):
# Check graph construction for a number of image size/depths and batch
# sizes.
for i, batch_size in zip(xrange(1, 6), xrange(3, 8)):
tf.reset_default_graph()
img_w = 2 ** i
image = tf.random_uniform([batch_size, img_w, img_w, 3], -1, 1)
output, end_points = dcgan.discriminator(
image,
depth=32)
self.assertAllEqual([batch_size, 1], output.get_shape().as_list())
expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits']
self.assertSetEqual(set(expected_names), set(end_points.keys()))
# Check layer depths.
for j in range(1, i+1):
layer = end_points['conv%i' % j]
self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1])
def test_discriminator_invalid_input(self):
wrong_dim_img = tf.zeros([5, 32, 32])
with self.assertRaises(ValueError):
dcgan.discriminator(wrong_dim_img)
spatially_undefined_shape = tf.placeholder(tf.float32, [5, 32, None, 3])
with self.assertRaises(ValueError):
dcgan.discriminator(spatially_undefined_shape)
not_square = tf.zeros([5, 32, 16, 3])
with self.assertRaisesRegexp(ValueError, 'not have equal width and height'):
dcgan.discriminator(not_square)
not_power_2 = tf.zeros([5, 30, 30, 3])
with self.assertRaisesRegexp(ValueError, 'not a power of 2'):
dcgan.discriminator(not_power_2)
if __name__ == '__main__':
tf.test.main()
...@@ -45,7 +45,12 @@ def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): ...@@ -45,7 +45,12 @@ def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1') activation_fn=None, scope='Conv2d_1x1')
net += scale * up scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn: if activation_fn:
net = activation_fn(net) net = activation_fn(net)
return net return net
...@@ -65,7 +70,13 @@ def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): ...@@ -65,7 +70,13 @@ def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1') activation_fn=None, scope='Conv2d_1x1')
net += scale * up
scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn: if activation_fn:
net = activation_fn(net) net = activation_fn(net)
return net return net
...@@ -85,7 +96,13 @@ def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): ...@@ -85,7 +96,13 @@ def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1') activation_fn=None, scope='Conv2d_1x1')
net += scale * up
scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn: if activation_fn:
net = activation_fn(net) net = activation_fn(net)
return net return net
...@@ -95,7 +112,8 @@ def inception_resnet_v2_base(inputs, ...@@ -95,7 +112,8 @@ def inception_resnet_v2_base(inputs,
final_endpoint='Conv2d_7b_1x1', final_endpoint='Conv2d_7b_1x1',
output_stride=16, output_stride=16,
align_feature_maps=False, align_feature_maps=False,
scope=None): scope=None,
activation_fn=tf.nn.relu):
"""Inception model from http://arxiv.org/abs/1602.07261. """Inception model from http://arxiv.org/abs/1602.07261.
Constructs an Inception Resnet v2 network from inputs to the given final Constructs an Inception Resnet v2 network from inputs to the given final
...@@ -113,6 +131,7 @@ def inception_resnet_v2_base(inputs, ...@@ -113,6 +131,7 @@ def inception_resnet_v2_base(inputs,
align_feature_maps: When true, changes all the VALID paddings in the network align_feature_maps: When true, changes all the VALID paddings in the network
to SAME padding so that the feature maps are aligned. to SAME padding so that the feature maps are aligned.
scope: Optional variable_scope. scope: Optional variable_scope.
activation_fn: Activation function for block scopes.
Returns: Returns:
tensor_out: output tensor corresponding to the final_endpoint. tensor_out: output tensor corresponding to the final_endpoint.
...@@ -191,7 +210,8 @@ def inception_resnet_v2_base(inputs, ...@@ -191,7 +210,8 @@ def inception_resnet_v2_base(inputs,
if add_and_check_final('Mixed_5b', net): return net, end_points if add_and_check_final('Mixed_5b', net): return net, end_points
# TODO(alemi): Register intermediate endpoints # TODO(alemi): Register intermediate endpoints
net = slim.repeat(net, 10, block35, scale=0.17) net = slim.repeat(net, 10, block35, scale=0.17,
activation_fn=activation_fn)
# 17 x 17 x 1088 if output_stride == 8, # 17 x 17 x 1088 if output_stride == 8,
# 33 x 33 x 1088 if output_stride == 16 # 33 x 33 x 1088 if output_stride == 16
...@@ -220,7 +240,8 @@ def inception_resnet_v2_base(inputs, ...@@ -220,7 +240,8 @@ def inception_resnet_v2_base(inputs,
# TODO(alemi): register intermediate endpoints # TODO(alemi): register intermediate endpoints
with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
net = slim.repeat(net, 20, block17, scale=0.10) net = slim.repeat(net, 20, block17, scale=0.10,
activation_fn=activation_fn)
if add_and_check_final('PreAuxLogits', net): return net, end_points if add_and_check_final('PreAuxLogits', net): return net, end_points
if output_stride == 8: if output_stride == 8:
...@@ -257,7 +278,7 @@ def inception_resnet_v2_base(inputs, ...@@ -257,7 +278,7 @@ def inception_resnet_v2_base(inputs,
if add_and_check_final('Mixed_7a', net): return net, end_points if add_and_check_final('Mixed_7a', net): return net, end_points
# TODO(alemi): register intermediate endpoints # TODO(alemi): register intermediate endpoints
net = slim.repeat(net, 9, block8, scale=0.20) net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn)
net = block8(net, activation_fn=None) net = block8(net, activation_fn=None)
# 8 x 8 x 1536 # 8 x 8 x 1536
...@@ -271,33 +292,42 @@ def inception_resnet_v2(inputs, num_classes=1001, is_training=True, ...@@ -271,33 +292,42 @@ def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
dropout_keep_prob=0.8, dropout_keep_prob=0.8,
reuse=None, reuse=None,
scope='InceptionResnetV2', scope='InceptionResnetV2',
create_aux_logits=True): create_aux_logits=True,
activation_fn=tf.nn.relu):
"""Creates the Inception Resnet V2 model. """Creates the Inception Resnet V2 model.
Args: Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3]. inputs: a 4-D tensor of size [batch_size, height, width, 3].
num_classes: number of predicted classes. Dimension batch_size may be undefined. If create_aux_logits is false,
also height and width may be undefined.
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not. is_training: whether is training or not.
dropout_keep_prob: float, the fraction to keep before final layer. dropout_keep_prob: float, the fraction to keep before final layer.
reuse: whether or not the network and its variables should be reused. To be reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given. able to reuse 'scope' must be given.
scope: Optional variable_scope. scope: Optional variable_scope.
create_aux_logits: Whether to include the auxilliary logits. create_aux_logits: Whether to include the auxilliary logits.
activation_fn: Activation function for conv2d.
Returns: Returns:
logits: the logits outputs of the model. net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0 or
None).
end_points: the set of end_points from the inception model. end_points: the set of end_points from the inception model.
""" """
end_points = {} end_points = {}
with tf.variable_scope(scope, 'InceptionResnetV2', [inputs, num_classes], with tf.variable_scope(scope, 'InceptionResnetV2', [inputs],
reuse=reuse) as scope: reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training): is_training=is_training):
net, end_points = inception_resnet_v2_base(inputs, scope=scope) net, end_points = inception_resnet_v2_base(inputs, scope=scope,
activation_fn=activation_fn)
if create_aux_logits: if create_aux_logits and num_classes:
with tf.variable_scope('AuxLogits'): with tf.variable_scope('AuxLogits'):
aux = end_points['PreAuxLogits'] aux = end_points['PreAuxLogits']
aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID',
...@@ -311,13 +341,20 @@ def inception_resnet_v2(inputs, num_classes=1001, is_training=True, ...@@ -311,13 +341,20 @@ def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
end_points['AuxLogits'] = aux end_points['AuxLogits'] = aux
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
# can be set to False to disable pooling here (as in resnet_*()).
kernel_size = net.get_shape()[1:3]
if kernel_size.is_fully_defined():
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_8x8') scope='AvgPool_1a_8x8')
else:
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
if not num_classes:
return net, end_points
net = slim.flatten(net) net = slim.flatten(net)
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='Dropout') scope='Dropout')
end_points['PreLogitsFlatten'] = net end_points['PreLogitsFlatten'] = net
logits = slim.fully_connected(net, num_classes, activation_fn=None, logits = slim.fully_connected(net, num_classes, activation_fn=None,
scope='Logits') scope='Logits')
...@@ -330,13 +367,15 @@ inception_resnet_v2.default_image_size = 299 ...@@ -330,13 +367,15 @@ inception_resnet_v2.default_image_size = 299
def inception_resnet_v2_arg_scope(weight_decay=0.00004, def inception_resnet_v2_arg_scope(weight_decay=0.00004,
batch_norm_decay=0.9997, batch_norm_decay=0.9997,
batch_norm_epsilon=0.001): batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu):
"""Returns the scope with the default parameters for inception_resnet_v2. """Returns the scope with the default parameters for inception_resnet_v2.
Args: Args:
weight_decay: the weight decay for weights variables. weight_decay: the weight decay for weights variables.
batch_norm_decay: decay for the moving average of batch_norm momentums. batch_norm_decay: decay for the moving average of batch_norm momentums.
batch_norm_epsilon: small float added to variance to avoid dividing by zero. batch_norm_epsilon: small float added to variance to avoid dividing by zero.
activation_fn: Activation function for conv2d.
Returns: Returns:
a arg_scope with the parameters needed for inception_resnet_v2. a arg_scope with the parameters needed for inception_resnet_v2.
...@@ -349,9 +388,10 @@ def inception_resnet_v2_arg_scope(weight_decay=0.00004, ...@@ -349,9 +388,10 @@ def inception_resnet_v2_arg_scope(weight_decay=0.00004,
batch_norm_params = { batch_norm_params = {
'decay': batch_norm_decay, 'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon, 'epsilon': batch_norm_epsilon,
'fused': None, # Use fused batch norm if possible.
} }
# Set activation_fn and parameters for batch_norm. # Set activation_fn and parameters for batch_norm.
with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, with slim.arg_scope([slim.conv2d], activation_fn=activation_fn,
normalizer_fn=slim.batch_norm, normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params) as scope: normalizer_params=batch_norm_params) as scope:
return scope return scope
...@@ -54,6 +54,19 @@ class InceptionTest(tf.test.TestCase): ...@@ -54,6 +54,19 @@ class InceptionTest(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildNoClasses(self):
batch_size = 5
height, width = 299, 299
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, endpoints = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue('AuxLogits' not in endpoints)
self.assertTrue('Logits' not in endpoints)
self.assertTrue(
net.op.name.startswith('InceptionResnetV2/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
def testBuildEndPoints(self): def testBuildEndPoints(self):
batch_size = 5 batch_size = 5
height, width = 299, 299 height, width = 299, 299
...@@ -213,6 +226,39 @@ class InceptionTest(tf.test.TestCase): ...@@ -213,6 +226,39 @@ class InceptionTest(tf.test.TestCase):
self.assertListEqual(pre_pool.get_shape().as_list(), self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 1536]) [batch_size, 3, 3, 1536])
def testGlobalPool(self):
batch_size = 2
height, width = 400, 600
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 11, 17, 1536])
def testGlobalPoolUnknownImageShape(self):
batch_size = 2
height, width = 400, 600
num_classes = 1000
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, (batch_size, None, None, 3))
logits, end_points = inception.inception_resnet_v2(
inputs, num_classes, create_aux_logits=False)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
images = tf.random_uniform((batch_size, height, width, 3))
sess.run(tf.global_variables_initializer())
logits_out, pre_pool_out = sess.run([logits, pre_pool],
{inputs: images.eval()})
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 11, 17, 1536))
def testUnknownBatchSize(self): def testUnknownBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 299, 299 height, width = 299, 299
......
...@@ -32,7 +32,8 @@ slim = tf.contrib.slim ...@@ -32,7 +32,8 @@ slim = tf.contrib.slim
def inception_arg_scope(weight_decay=0.00004, def inception_arg_scope(weight_decay=0.00004,
use_batch_norm=True, use_batch_norm=True,
batch_norm_decay=0.9997, batch_norm_decay=0.9997,
batch_norm_epsilon=0.001): batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu):
"""Defines the default arg scope for inception models. """Defines the default arg scope for inception models.
Args: Args:
...@@ -41,6 +42,7 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -41,6 +42,7 @@ def inception_arg_scope(weight_decay=0.00004,
batch_norm_decay: Decay for batch norm moving average. batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm. in batch norm.
activation_fn: Activation function for conv2d.
Returns: Returns:
An `arg_scope` to use for the inception models. An `arg_scope` to use for the inception models.
...@@ -52,6 +54,8 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -52,6 +54,8 @@ def inception_arg_scope(weight_decay=0.00004,
'epsilon': batch_norm_epsilon, 'epsilon': batch_norm_epsilon,
# collection containing update_ops. # collection containing update_ops.
'updates_collections': tf.GraphKeys.UPDATE_OPS, 'updates_collections': tf.GraphKeys.UPDATE_OPS,
# use fused batch norm if possible.
'fused': None,
} }
if use_batch_norm: if use_batch_norm:
normalizer_fn = slim.batch_norm normalizer_fn = slim.batch_norm
...@@ -65,7 +69,7 @@ def inception_arg_scope(weight_decay=0.00004, ...@@ -65,7 +69,7 @@ def inception_arg_scope(weight_decay=0.00004,
with slim.arg_scope( with slim.arg_scope(
[slim.conv2d], [slim.conv2d],
weights_initializer=slim.variance_scaling_initializer(), weights_initializer=slim.variance_scaling_initializer(),
activation_fn=tf.nn.relu, activation_fn=activation_fn,
normalizer_fn=normalizer_fn, normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params) as sc: normalizer_params=normalizer_params) as sc:
return sc return sc
...@@ -261,7 +261,8 @@ def inception_v1(inputs, ...@@ -261,7 +261,8 @@ def inception_v1(inputs,
prediction_fn=slim.softmax, prediction_fn=slim.softmax,
spatial_squeeze=True, spatial_squeeze=True,
reuse=None, reuse=None,
scope='InceptionV1'): scope='InceptionV1',
global_pool=False):
"""Defines the Inception V1 architecture. """Defines the Inception V1 architecture.
This architecture is defined in: This architecture is defined in:
...@@ -275,7 +276,9 @@ def inception_v1(inputs, ...@@ -275,7 +276,9 @@ def inception_v1(inputs,
Args: Args:
inputs: a tensor of size [batch_size, height, width, channels]. inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not. is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
prediction_fn: a function to get predictions out of logits. prediction_fn: a function to get predictions out of logits.
...@@ -284,23 +287,35 @@ def inception_v1(inputs, ...@@ -284,23 +287,35 @@ def inception_v1(inputs,
reuse: whether or not the network and its variables should be reused. To be reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given. able to reuse 'scope' must be given.
scope: Optional variable_scope. scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, num_classes] is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
""" """
# Final pooling and prediction # Final pooling and prediction
with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes], with tf.variable_scope(scope, 'InceptionV1', [inputs], reuse=reuse) as scope:
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training): is_training=is_training):
net, end_points = inception_v1_base(inputs, scope=scope) net, end_points = inception_v1_base(inputs, scope=scope)
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7') net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')
net = slim.dropout(net, end_points['AvgPool_0a_7x7'] = net
dropout_keep_prob, scope='Dropout_0b') if not num_classes:
return net, end_points
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='Conv2d_0c_1x1') normalizer_fn=None, scope='Conv2d_0c_1x1')
if spatial_squeeze: if spatial_squeeze:
......
...@@ -35,13 +35,26 @@ class InceptionV1Test(tf.test.TestCase): ...@@ -35,13 +35,26 @@ class InceptionV1Test(tf.test.TestCase):
inputs = tf.random_uniform((batch_size, height, width, 3)) inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v1(inputs, num_classes) logits, end_points = inception.inception_v1(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) self.assertTrue(logits.op.name.startswith(
'InceptionV1/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
self.assertTrue('Predictions' in end_points) self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(), self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v1(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV1/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self): def testBuildBaseNetwork(self):
batch_size = 5 batch_size = 5
height, width = 224, 224 height, width = 224, 224
...@@ -144,6 +157,25 @@ class InceptionV1Test(tf.test.TestCase): ...@@ -144,6 +157,25 @@ class InceptionV1Test(tf.test.TestCase):
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
def testGlobalPoolUnknownImageShape(self):
tf.reset_default_graph()
batch_size = 2
height, width = 300, 400
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v1(inputs, num_classes,
global_pool=True)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 10, 13, 1024])
def testUnknowBatchSize(self): def testUnknowBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 224, 224 height, width = 224, 224
......
...@@ -458,7 +458,8 @@ def inception_v2(inputs, ...@@ -458,7 +458,8 @@ def inception_v2(inputs,
prediction_fn=slim.softmax, prediction_fn=slim.softmax,
spatial_squeeze=True, spatial_squeeze=True,
reuse=None, reuse=None,
scope='InceptionV2'): scope='InceptionV2',
global_pool=False):
"""Inception v2 model for classification. """Inception v2 model for classification.
Constructs an Inception v2 network for classification as described in Constructs an Inception v2 network for classification as described in
...@@ -468,7 +469,9 @@ def inception_v2(inputs, ...@@ -468,7 +469,9 @@ def inception_v2(inputs,
Args: Args:
inputs: a tensor of shape [batch_size, height, width, channels]. inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not. is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
min_depth: Minimum depth value (number of channels) for all convolution ops. min_depth: Minimum depth value (number of channels) for all convolution ops.
...@@ -484,10 +487,15 @@ def inception_v2(inputs, ...@@ -484,10 +487,15 @@ def inception_v2(inputs,
reuse: whether or not the network and its variables should be reused. To be reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given. able to reuse 'scope' must be given.
scope: Optional variable_scope. scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, num_classes] is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
...@@ -499,17 +507,25 @@ def inception_v2(inputs, ...@@ -499,17 +507,25 @@ def inception_v2(inputs,
raise ValueError('depth_multiplier is not greater than zero.') raise ValueError('depth_multiplier is not greater than zero.')
# Final pooling and prediction # Final pooling and prediction
with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes], with tf.variable_scope(scope, 'InceptionV2', [inputs], reuse=reuse) as scope:
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training): is_training=is_training):
net, end_points = inception_v2_base( net, end_points = inception_v2_base(
inputs, scope=scope, min_depth=min_depth, inputs, scope=scope, min_depth=min_depth,
depth_multiplier=depth_multiplier) depth_multiplier=depth_multiplier)
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
net = slim.avg_pool2d(net, kernel_size, padding='VALID', net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_{}x{}'.format(*kernel_size)) scope='AvgPool_1a_{}x{}'.format(*kernel_size))
end_points['AvgPool_1a'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 1024 # 1 x 1 x 1024
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
......
...@@ -35,13 +35,26 @@ class InceptionV2Test(tf.test.TestCase): ...@@ -35,13 +35,26 @@ class InceptionV2Test(tf.test.TestCase):
inputs = tf.random_uniform((batch_size, height, width, 3)) inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v2(inputs, num_classes) logits, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) self.assertTrue(logits.op.name.startswith(
'InceptionV2/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
self.assertTrue('Predictions' in end_points) self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(), self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV2/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self): def testBuildBaseNetwork(self):
batch_size = 5 batch_size = 5
height, width = 224, 224 height, width = 224, 224
...@@ -258,6 +271,25 @@ class InceptionV2Test(tf.test.TestCase): ...@@ -258,6 +271,25 @@ class InceptionV2Test(tf.test.TestCase):
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
def testGlobalPoolUnknownImageShape(self):
tf.reset_default_graph()
batch_size = 2
height, width = 300, 400
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v2(inputs, num_classes,
global_pool=True)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 10, 13, 1024])
def testUnknowBatchSize(self): def testUnknowBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 224, 224 height, width = 224, 224
......
...@@ -426,7 +426,8 @@ def inception_v3(inputs, ...@@ -426,7 +426,8 @@ def inception_v3(inputs,
spatial_squeeze=True, spatial_squeeze=True,
reuse=None, reuse=None,
create_aux_logits=True, create_aux_logits=True,
scope='InceptionV3'): scope='InceptionV3',
global_pool=False):
"""Inception model from http://arxiv.org/abs/1512.00567. """Inception model from http://arxiv.org/abs/1512.00567.
"Rethinking the Inception Architecture for Computer Vision" "Rethinking the Inception Architecture for Computer Vision"
...@@ -443,7 +444,9 @@ def inception_v3(inputs, ...@@ -443,7 +444,9 @@ def inception_v3(inputs,
Args: Args:
inputs: a tensor of size [batch_size, height, width, channels]. inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not. is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
min_depth: Minimum depth value (number of channels) for all convolution ops. min_depth: Minimum depth value (number of channels) for all convolution ops.
...@@ -460,10 +463,15 @@ def inception_v3(inputs, ...@@ -460,10 +463,15 @@ def inception_v3(inputs,
able to reuse 'scope' must be given. able to reuse 'scope' must be given.
create_aux_logits: Whether to create the auxiliary logits. create_aux_logits: Whether to create the auxiliary logits.
scope: Optional variable_scope. scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, num_classes] is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
...@@ -474,8 +482,7 @@ def inception_v3(inputs, ...@@ -474,8 +482,7 @@ def inception_v3(inputs,
raise ValueError('depth_multiplier is not greater than zero.') raise ValueError('depth_multiplier is not greater than zero.')
depth = lambda d: max(int(d * depth_multiplier), min_depth) depth = lambda d: max(int(d * depth_multiplier), min_depth)
with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], with tf.variable_scope(scope, 'InceptionV3', [inputs], reuse=reuse) as scope:
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training): is_training=is_training):
net, end_points = inception_v3_base( net, end_points = inception_v3_base(
...@@ -483,7 +490,7 @@ def inception_v3(inputs, ...@@ -483,7 +490,7 @@ def inception_v3(inputs,
depth_multiplier=depth_multiplier) depth_multiplier=depth_multiplier)
# Auxiliary Head logits # Auxiliary Head logits
if create_aux_logits: if create_aux_logits and num_classes:
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'): stride=1, padding='SAME'):
aux_logits = end_points['Mixed_6e'] aux_logits = end_points['Mixed_6e']
...@@ -511,9 +518,18 @@ def inception_v3(inputs, ...@@ -511,9 +518,18 @@ def inception_v3(inputs,
# Final pooling and prediction # Final pooling and prediction
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='GlobalPool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
net = slim.avg_pool2d(net, kernel_size, padding='VALID', net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_{}x{}'.format(*kernel_size)) scope='AvgPool_1a_{}x{}'.format(*kernel_size))
end_points['AvgPool_1a'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 2048 # 1 x 1 x 2048
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
end_points['PreLogits'] = net end_points['PreLogits'] = net
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment