"tests/vscode:/vscode.git/clone" did not exist on "b8cf84a3f902550937255c5b28b39827ba52beb6"
Unverified Commit 57014e4c authored by gunan's avatar gunan Committed by GitHub
Browse files

Merge pull request #2627 from sguada/nasnet

Bring tensorflow/models slim up to date.
parents c46caa56 cbb62479
...@@ -35,13 +35,26 @@ class InceptionV3Test(tf.test.TestCase): ...@@ -35,13 +35,26 @@ class InceptionV3Test(tf.test.TestCase):
inputs = tf.random_uniform((batch_size, height, width, 3)) inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v3(inputs, num_classes) logits, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) self.assertTrue(logits.op.name.startswith(
'InceptionV3/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
self.assertTrue('Predictions' in end_points) self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(), self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 299, 299
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV3/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 2048])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self): def testBuildBaseNetwork(self):
batch_size = 5 batch_size = 5
height, width = 299, 299 height, width = 299, 299
...@@ -225,6 +238,24 @@ class InceptionV3Test(tf.test.TestCase): ...@@ -225,6 +238,24 @@ class InceptionV3Test(tf.test.TestCase):
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048]) self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])
def testGlobalPoolUnknownImageShape(self):
tf.reset_default_graph()
batch_size = 2
height, width = 400, 600
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v3(inputs, num_classes,
global_pool=True)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7c']
feed_dict = {inputs: input_np}
tf.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 11, 17, 2048])
def testUnknowBatchSize(self): def testUnknowBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 299, 299 height, width = 299, 299
......
...@@ -263,7 +263,9 @@ def inception_v4(inputs, num_classes=1001, is_training=True, ...@@ -263,7 +263,9 @@ def inception_v4(inputs, num_classes=1001, is_training=True,
Args: Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3]. inputs: a 4-D tensor of size [batch_size, height, width, 3].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not. is_training: whether is training or not.
dropout_keep_prob: float, the fraction to keep before final layer. dropout_keep_prob: float, the fraction to keep before final layer.
reuse: whether or not the network and its variables should be reused. To be reuse: whether or not the network and its variables should be reused. To be
...@@ -272,7 +274,9 @@ def inception_v4(inputs, num_classes=1001, is_training=True, ...@@ -272,7 +274,9 @@ def inception_v4(inputs, num_classes=1001, is_training=True,
create_aux_logits: Whether to include the auxiliary logits. create_aux_logits: Whether to include the auxiliary logits.
Returns: Returns:
logits: the logits outputs of the model. net: a Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the non-dropped input to the logits layer
if num_classes is 0 or None.
end_points: the set of end_points from the inception model. end_points: the set of end_points from the inception model.
""" """
end_points = {} end_points = {}
...@@ -284,7 +288,7 @@ def inception_v4(inputs, num_classes=1001, is_training=True, ...@@ -284,7 +288,7 @@ def inception_v4(inputs, num_classes=1001, is_training=True,
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'): stride=1, padding='SAME'):
# Auxiliary Head logits # Auxiliary Head logits
if create_aux_logits: if create_aux_logits and num_classes:
with tf.variable_scope('AuxLogits'): with tf.variable_scope('AuxLogits'):
# 17 x 17 x 1024 # 17 x 17 x 1024
aux_logits = end_points['Mixed_6h'] aux_logits = end_points['Mixed_6h']
...@@ -303,10 +307,20 @@ def inception_v4(inputs, num_classes=1001, is_training=True, ...@@ -303,10 +307,20 @@ def inception_v4(inputs, num_classes=1001, is_training=True,
end_points['AuxLogits'] = aux_logits end_points['AuxLogits'] = aux_logits
# Final pooling and prediction # Final pooling and prediction
# TODO(sguada,arnoegw): Consider adding a parameter global_pool which
# can be set to False to disable pooling here (as in resnet_*()).
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
# 8 x 8 x 1536 # 8 x 8 x 1536
net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', kernel_size = net.get_shape()[1:3]
scope='AvgPool_1a') if kernel_size.is_fully_defined():
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a')
else:
net = tf.reduce_mean(net, [1, 2], keep_dims=True,
name='global_pool')
end_points['global_pool'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 1536 # 1 x 1 x 1536
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
net = slim.flatten(net, scope='PreLogitsFlatten') net = slim.flatten(net, scope='PreLogitsFlatten')
......
...@@ -43,6 +43,17 @@ class InceptionTest(tf.test.TestCase): ...@@ -43,6 +43,17 @@ class InceptionTest(tf.test.TestCase):
self.assertListEqual(predictions.get_shape().as_list(), self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 299, 299
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v4(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV4/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildWithoutAuxLogits(self): def testBuildWithoutAuxLogits(self):
batch_size = 5 batch_size = 5
height, width = 299, 299 height, width = 299, 299
...@@ -90,6 +101,7 @@ class InceptionTest(tf.test.TestCase): ...@@ -90,6 +101,7 @@ class InceptionTest(tf.test.TestCase):
'Mixed_7d': [batch_size, 8, 8, 1536], 'Mixed_7d': [batch_size, 8, 8, 1536],
# Logits and predictions # Logits and predictions
'AuxLogits': [batch_size, num_classes], 'AuxLogits': [batch_size, num_classes],
'global_pool': [batch_size, 1, 1, 1536],
'PreLogitsFlatten': [batch_size, 1536], 'PreLogitsFlatten': [batch_size, 1536],
'Logits': [batch_size, num_classes], 'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]} 'Predictions': [batch_size, num_classes]}
...@@ -164,6 +176,38 @@ class InceptionTest(tf.test.TestCase): ...@@ -164,6 +176,38 @@ class InceptionTest(tf.test.TestCase):
self.assertListEqual(pre_pool.get_shape().as_list(), self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 1536]) [batch_size, 3, 3, 1536])
def testGlobalPool(self):
batch_size = 2
height, width = 400, 600
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v4(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7d']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 11, 17, 1536])
def testGlobalPoolUnknownImageShape(self):
batch_size = 2
height, width = 400, 600
num_classes = 1000
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, (batch_size, None, None, 3))
logits, end_points = inception.inception_v4(
inputs, num_classes, create_aux_logits=False)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7d']
images = tf.random_uniform((batch_size, height, width, 3))
sess.run(tf.global_variables_initializer())
logits_out, pre_pool_out = sess.run([logits, pre_pool],
{inputs: images.eval()})
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 11, 17, 1536))
def testUnknownBatchSize(self): def testUnknownBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 299, 299 height, width = 299, 299
......
...@@ -40,7 +40,9 @@ def lenet(images, num_classes=10, is_training=False, ...@@ -40,7 +40,9 @@ def lenet(images, num_classes=10, is_training=False,
Args: Args:
images: A batch of `Tensors` of size [batch_size, height, width, channels]. images: A batch of `Tensors` of size [batch_size, height, width, channels].
num_classes: the number of classes in the dataset. num_classes: the number of classes in the dataset. If 0 or None, the logits
layer is omitted and the input features to the logits layer are returned
instead.
is_training: specifies whether or not we're currently training the model. is_training: specifies whether or not we're currently training the model.
This variable will determine the behaviour of the dropout layer. This variable will determine the behaviour of the dropout layer.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
...@@ -48,28 +50,30 @@ def lenet(images, num_classes=10, is_training=False, ...@@ -48,28 +50,30 @@ def lenet(images, num_classes=10, is_training=False,
scope: Optional variable_scope. scope: Optional variable_scope.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, `num_classes`] is a non-zero integer, or the inon-dropped-out nput to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
""" """
end_points = {} end_points = {}
with tf.variable_scope(scope, 'LeNet', [images, num_classes]): with tf.variable_scope(scope, 'LeNet', [images]):
net = slim.conv2d(images, 32, [5, 5], scope='conv1') net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2') net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.flatten(net) net = slim.flatten(net)
end_points['Flatten'] = net end_points['Flatten'] = net
net = slim.fully_connected(net, 1024, scope='fc3') net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, if not num_classes:
scope='dropout3') return net, end_points
logits = slim.fully_connected(net, num_classes, activation_fn=None, net = end_points['dropout3'] = slim.dropout(
scope='fc4') net, dropout_keep_prob, is_training=is_training, scope='dropout3')
logits = end_points['Logits'] = slim.fully_connected(
net, num_classes, activation_fn=None, scope='fc4')
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions') end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points return logits, end_points
......
...@@ -154,7 +154,7 @@ def mobilenet_v1_base(inputs, ...@@ -154,7 +154,7 @@ def mobilenet_v1_base(inputs,
inputs: a tensor of shape [batch_size, height, width, channels]. inputs: a tensor of shape [batch_size, height, width, channels].
final_endpoint: specifies the endpoint to construct the network up to. It final_endpoint: specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise',
'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5_pointwise', 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5'_pointwise,
'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise',
'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise',
'Conv2d_12_pointwise', 'Conv2d_13_pointwise']. 'Conv2d_12_pointwise', 'Conv2d_13_pointwise'].
...@@ -276,12 +276,15 @@ def mobilenet_v1(inputs, ...@@ -276,12 +276,15 @@ def mobilenet_v1(inputs,
prediction_fn=tf.contrib.layers.softmax, prediction_fn=tf.contrib.layers.softmax,
spatial_squeeze=True, spatial_squeeze=True,
reuse=None, reuse=None,
scope='MobilenetV1'): scope='MobilenetV1',
global_pool=False):
"""Mobilenet v1 model for classification. """Mobilenet v1 model for classification.
Args: Args:
inputs: a tensor of shape [batch_size, height, width, channels]. inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
dropout_keep_prob: the percentage of activation values that are retained. dropout_keep_prob: the percentage of activation values that are retained.
is_training: whether is training or not. is_training: whether is training or not.
min_depth: Minimum depth value (number of channels) for all convolution ops. min_depth: Minimum depth value (number of channels) for all convolution ops.
...@@ -298,10 +301,15 @@ def mobilenet_v1(inputs, ...@@ -298,10 +301,15 @@ def mobilenet_v1(inputs,
reuse: whether or not the network and its variables should be reused. To be reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given. able to reuse 'scope' must be given.
scope: Optional variable_scope. scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns: Returns:
logits: the pre-softmax activations, a tensor of size net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
[batch_size, num_classes] is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding end_points: a dictionary from components of the network to the corresponding
activation. activation.
...@@ -313,8 +321,7 @@ def mobilenet_v1(inputs, ...@@ -313,8 +321,7 @@ def mobilenet_v1(inputs,
raise ValueError('Invalid input tensor rank, expected 4, was: %d' % raise ValueError('Invalid input tensor rank, expected 4, was: %d' %
len(input_shape)) len(input_shape))
with tf.variable_scope(scope, 'MobilenetV1', [inputs, num_classes], with tf.variable_scope(scope, 'MobilenetV1', [inputs], reuse=reuse) as scope:
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training): is_training=is_training):
net, end_points = mobilenet_v1_base(inputs, scope=scope, net, end_points = mobilenet_v1_base(inputs, scope=scope,
...@@ -322,10 +329,18 @@ def mobilenet_v1(inputs, ...@@ -322,10 +329,18 @@ def mobilenet_v1(inputs,
depth_multiplier=depth_multiplier, depth_multiplier=depth_multiplier,
conv_defs=conv_defs) conv_defs=conv_defs)
with tf.variable_scope('Logits'): with tf.variable_scope('Logits'):
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) if global_pool:
net = slim.avg_pool2d(net, kernel_size, padding='VALID', # Global average pooling.
scope='AvgPool_1a') net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['AvgPool_1a'] = net end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a')
end_points['AvgPool_1a'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 1024 # 1 x 1 x 1024
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
......
...@@ -35,13 +35,26 @@ class MobilenetV1Test(tf.test.TestCase): ...@@ -35,13 +35,26 @@ class MobilenetV1Test(tf.test.TestCase):
inputs = tf.random_uniform((batch_size, height, width, 3)) inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = mobilenet_v1.mobilenet_v1(inputs, num_classes) logits, end_points = mobilenet_v1.mobilenet_v1(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('MobilenetV1/Logits')) self.assertTrue(logits.op.name.startswith(
'MobilenetV1/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
self.assertTrue('Predictions' in end_points) self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(), self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes]) [batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = mobilenet_v1.mobilenet_v1(inputs, num_classes)
self.assertTrue(net.op.name.startswith('MobilenetV1/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self): def testBuildBaseNetwork(self):
batch_size = 5 batch_size = 5
height, width = 224, 224 height, width = 224, 224
...@@ -383,6 +396,25 @@ class MobilenetV1Test(tf.test.TestCase): ...@@ -383,6 +396,25 @@ class MobilenetV1Test(tf.test.TestCase):
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
def testGlobalPoolUnknownImageShape(self):
tf.reset_default_graph()
batch_size = 2
height, width = 300, 400
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = mobilenet_v1.mobilenet_v1(inputs, num_classes,
global_pool=True)
self.assertTrue(logits.op.name.startswith('MobilenetV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_13_pointwise']
feed_dict = {inputs: input_np}
tf.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 10, 13, 1024])
def testUnknowBatchSize(self): def testUnknowBatchSize(self):
batch_size = 1 batch_size = 1
height, width = 224, 224 height, width = 224, 224
......
# TensorFlow-Slim NASNet-A Implementation/Checkpoints
This directory contains the code for the NASNet-A model from the paper
[Learning Transferable Architectures for Scalable Image Recognition](https://arxiv.org/abs/1707.07012) by Zoph et al.
In nasnet.py there are three different configurations of NASNet-A that are implementented. One of the models is the NASNet-A built for CIFAR-10 and the
other two are variants of NASNet-A trained on ImageNet, which are listed below.
# Pre-Trained Models
Two NASNet-A checkpoints are available that have been trained on the
[ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/)
image classification dataset. Accuracies were computed by evaluating using a single image crop.
Model Checkpoint | Million MACs | Million Parameters | Top-1 Accuracy| Top-5 Accuracy |
:----:|:------------:|:----------:|:-------:|:-------:|
[NASNet-A_Mobile_224](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz)|564|5.3|74.0|91.6|
[NASNet-A_Large_331](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz)|23800|88.9|82.7|96.2|
Here is an example of how to download the NASNet-A_Mobile_224 checkpoint. The way to download the NASNet-A_Large_331 is the same.
```shell
CHECKPOINT_DIR=/tmp/checkpoints
mkdir ${CHECKPOINT_DIR}
cd ${CHECKPOINT_DIR}
wget https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz
tar -xvf nasnet-a_mobile_04_10_2017.tar.gz
rm nasnet-a_mobile_04_10_2017.tar.gz
```
More information on integrating NASNet Models into your project can be found at the [TF-Slim Image Classification Library](https://github.com/tensorflow/models/blob/master/research/slim/README.md).
To get started running models on-device go to [TensorFlow Mobile](https://www.tensorflow.org/mobile/).
###Sample Commands for using NASNet-A Mobile and Large Checkpoints for Inference
-------
Run eval with the NASNet-A mobile ImageNet model
```shell
DATASET_DIR=/tmp/imagenet
EVAL_DIR=/tmp/tfmodel/eval
CHECKPOINT_DIR=/tmp/checkpoints/model.ckpt
python tensorflow_models/research/slim/eval_image_classifier \
--checkpoint_path=${CHECKPOINT_DIR} \
--eval_dir=${EVAL_DIR} \
--dataset_dir=${DATASET_DIR} \
--dataset_name=imagenet \
--dataset_split_name=validation \
--model_name=nasnet_mobile \
--eval_image_size=224 \
--moving_average_decay=0.9999 \
```
Run eval with the NASNet-A large ImageNet model
```shell
DATASET_DIR=/tmp/imagenet
EVAL_DIR=/tmp/tfmodel/eval
CHECKPOINT_DIR=/tmp/checkpoints/model.ckpt
python tensorflow_models/research/slim/eval_image_classifier \
--checkpoint_path=${CHECKPOINT_DIR} \
--eval_dir=${EVAL_DIR} \
--dataset_dir=${DATASET_DIR} \
--dataset_name=imagenet \
--dataset_split_name=validation \
--model_name=nasnet_large \
--eval_image_size=331 \
--moving_average_decay=0.9999 \
```
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for the NASNet classification networks.
Paper: https://arxiv.org/abs/1707.07012
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets.nasnet import nasnet_utils
arg_scope = tf.contrib.framework.arg_scope
slim = tf.contrib.slim
# Notes for training NASNet Cifar Model
# -------------------------------------
# batch_size: 32
# learning rate: 0.025
# cosine (single period) learning rate decay
# auxiliary head loss weighting: 0.4
# clip global norm of all gradients by 5
def _cifar_config(is_training=True):
drop_path_keep_prob = 1.0 if not is_training else 0.6
return tf.contrib.training.HParams(
stem_multiplier=3.0,
drop_path_keep_prob=drop_path_keep_prob,
num_cells=18,
use_aux_head=1,
num_conv_filters=32,
dense_dropout_keep_prob=1.0,
filter_scaling_rate=2.0,
num_reduction_layers=2,
data_format='NHWC',
skip_reduction_layer_input=0,
# 600 epochs with a batch size of 32
# This is used for the drop path probabilities since it needs to increase
# the drop out probability over the course of training.
total_training_steps=937500,
)
# Notes for training large NASNet model on ImageNet
# -------------------------------------
# batch size (per replica): 16
# learning rate: 0.015 * 100
# learning rate decay factor: 0.97
# num epochs per decay: 2.4
# sync sgd with 100 replicas
# auxiliary head loss weighting: 0.4
# label smoothing: 0.1
# clip global norm of all gradients by 10
def _large_imagenet_config(is_training=True):
drop_path_keep_prob = 1.0 if not is_training else 0.7
return tf.contrib.training.HParams(
stem_multiplier=3.0,
dense_dropout_keep_prob=0.5,
num_cells=18,
filter_scaling_rate=2.0,
num_conv_filters=168,
drop_path_keep_prob=drop_path_keep_prob,
use_aux_head=1,
num_reduction_layers=2,
data_format='NHWC',
skip_reduction_layer_input=1,
total_training_steps=250000,
)
# Notes for training the mobile NASNet ImageNet model
# -------------------------------------
# batch size (per replica): 32
# learning rate: 0.04 * 50
# learning rate scaling factor: 0.97
# num epochs per decay: 2.4
# sync sgd with 50 replicas
# auxiliary head weighting: 0.4
# label smoothing: 0.1
# clip global norm of all gradients by 10
def _mobile_imagenet_config():
return tf.contrib.training.HParams(
stem_multiplier=1.0,
dense_dropout_keep_prob=0.5,
num_cells=12,
filter_scaling_rate=2.0,
drop_path_keep_prob=1.0,
num_conv_filters=44,
use_aux_head=1,
num_reduction_layers=2,
data_format='NHWC',
skip_reduction_layer_input=0,
total_training_steps=250000,
)
def nasnet_cifar_arg_scope(weight_decay=5e-4,
batch_norm_decay=0.9,
batch_norm_epsilon=1e-5):
"""Defines the default arg scope for the NASNet-A Cifar model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Cifar Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
weights_initializer = tf.contrib.layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected],
activation_fn=None, scope='FC'):
with arg_scope([slim.conv2d, slim.separable_conv2d],
activation_fn=None, biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def nasnet_mobile_arg_scope(weight_decay=4e-5,
batch_norm_decay=0.9997,
batch_norm_epsilon=1e-3):
"""Defines the default arg scope for the NASNet-A Mobile ImageNet model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Mobile Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
weights_initializer = tf.contrib.layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected],
activation_fn=None, scope='FC'):
with arg_scope([slim.conv2d, slim.separable_conv2d],
activation_fn=None, biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def nasnet_large_arg_scope(weight_decay=5e-5,
batch_norm_decay=0.9997,
batch_norm_epsilon=1e-3):
"""Defines the default arg scope for the NASNet-A Large ImageNet model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Large Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
weights_initializer = tf.contrib.layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected],
activation_fn=None, scope='FC'):
with arg_scope([slim.conv2d, slim.separable_conv2d],
activation_fn=None, biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def _build_aux_head(net, end_points, num_classes, hparams, scope):
"""Auxiliary head used for all models across all datasets."""
with tf.variable_scope(scope):
aux_logits = tf.identity(net)
with tf.variable_scope('aux_logits'):
aux_logits = slim.avg_pool2d(
aux_logits, [5, 5], stride=3, padding='VALID')
aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj')
aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0')
aux_logits = tf.nn.relu(aux_logits)
# Shape of feature map before the final layer.
shape = aux_logits.shape
if hparams.data_format == 'NHWC':
shape = shape[1:3]
else:
shape = shape[2:4]
aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID')
aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1')
aux_logits = tf.nn.relu(aux_logits)
aux_logits = tf.contrib.layers.flatten(aux_logits)
aux_logits = slim.fully_connected(aux_logits, num_classes)
end_points['AuxLogits'] = aux_logits
def _imagenet_stem(inputs, hparams, stem_cell):
"""Stem used for models trained on ImageNet."""
num_stem_cells = 2
# 149 x 149 x 32
num_stem_filters = int(32 * hparams.stem_multiplier)
net = slim.conv2d(
inputs, num_stem_filters, [3, 3], stride=2, scope='conv0',
padding='VALID')
net = slim.batch_norm(net, scope='conv0_bn')
# Run the reduction cells
cell_outputs = [None, net]
filter_scaling = 1.0 / (hparams.filter_scaling_rate**num_stem_cells)
for cell_num in range(num_stem_cells):
net = stem_cell(
net,
scope='cell_stem_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=2,
prev_layer=cell_outputs[-2],
cell_num=cell_num)
cell_outputs.append(net)
filter_scaling *= hparams.filter_scaling_rate
return net, cell_outputs
def _cifar_stem(inputs, hparams):
"""Stem used for models trained on Cifar."""
num_stem_filters = int(hparams.num_conv_filters * hparams.stem_multiplier)
net = slim.conv2d(
inputs,
num_stem_filters,
3,
scope='l1_stem_3x3')
net = slim.batch_norm(net, scope='l1_stem_bn')
return net, [None, net]
def build_nasnet_cifar(
images, num_classes, is_training=True):
"""Build NASNet model for the Cifar Dataset."""
hparams = _cifar_config(is_training=is_training)
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
if hparams.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
is_training=is_training):
with arg_scope([slim.avg_pool2d,
slim.max_pool2d,
slim.conv2d,
slim.batch_norm,
slim.separable_conv2d,
nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool,
nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim],
data_format=hparams.data_format):
return _build_nasnet_base(images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='cifar')
build_nasnet_cifar.default_image_size = 32
def build_nasnet_mobile(images, num_classes,
is_training=True, is_batchnorm_training=True,
final_endpoint=None):
"""Build NASNet Mobile model for the ImageNet Dataset."""
hparams = _mobile_imagenet_config()
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
if hparams.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
# If ImageNet, then add an additional two for the stem cells
total_num_cells += 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
with arg_scope([slim.dropout, nasnet_utils.drop_path],
is_training=is_training):
with arg_scope([slim.batch_norm], is_training=is_batchnorm_training):
with arg_scope([slim.avg_pool2d,
slim.max_pool2d,
slim.conv2d,
slim.batch_norm,
slim.separable_conv2d,
nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool,
nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim],
data_format=hparams.data_format):
return _build_nasnet_base(images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='imagenet',
final_endpoint=final_endpoint)
build_nasnet_mobile.default_image_size = 224
def build_nasnet_large(images, num_classes,
is_training=True, is_batchnorm_training=True,
final_endpoint=None):
"""Build NASNet Large model for the ImageNet Dataset."""
hparams = _large_imagenet_config(is_training=is_training)
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
if hparams.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
# If ImageNet, then add an additional two for the stem cells
total_num_cells += 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob,
total_num_cells, hparams.total_training_steps)
with arg_scope([slim.dropout, nasnet_utils.drop_path],
is_training=is_training):
with arg_scope([slim.batch_norm], is_training=is_batchnorm_training):
with arg_scope([slim.avg_pool2d,
slim.max_pool2d,
slim.conv2d,
slim.batch_norm,
slim.separable_conv2d,
nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool,
nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim],
data_format=hparams.data_format):
return _build_nasnet_base(images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='imagenet',
final_endpoint=final_endpoint)
build_nasnet_large.default_image_size = 331
def _build_nasnet_base(images,
normal_cell,
reduction_cell,
num_classes,
hparams,
is_training,
stem_type,
final_endpoint=None):
"""Constructs a NASNet image model."""
end_points = {}
def add_and_check_endpoint(endpoint_name, net):
end_points[endpoint_name] = net
return final_endpoint and (endpoint_name == final_endpoint)
# Find where to place the reduction cells or stride normal cells
reduction_indices = nasnet_utils.calc_reduction_layers(
hparams.num_cells, hparams.num_reduction_layers)
stem_cell = reduction_cell
if stem_type == 'imagenet':
stem = lambda: _imagenet_stem(images, hparams, stem_cell)
elif stem_type == 'cifar':
stem = lambda: _cifar_stem(images, hparams)
else:
raise ValueError('Unknown stem_type: ', stem_type)
net, cell_outputs = stem()
if add_and_check_endpoint('Stem', net): return net, end_points
# Setup for building in the auxiliary head.
aux_head_cell_idxes = []
if len(reduction_indices) >= 2:
aux_head_cell_idxes.append(reduction_indices[1] - 1)
# Run the cells
filter_scaling = 1.0
# true_cell_num accounts for the stem cells
true_cell_num = 2 if stem_type == 'imagenet' else 0
for cell_num in range(hparams.num_cells):
stride = 1
if hparams.skip_reduction_layer_input:
prev_layer = cell_outputs[-2]
if cell_num in reduction_indices:
filter_scaling *= hparams.filter_scaling_rate
net = reduction_cell(
net,
scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)),
filter_scaling=filter_scaling,
stride=2,
prev_layer=cell_outputs[-2],
cell_num=true_cell_num)
if add_and_check_endpoint(
'Reduction_Cell_{}'.format(reduction_indices.index(cell_num)), net):
return net, end_points
true_cell_num += 1
cell_outputs.append(net)
if not hparams.skip_reduction_layer_input:
prev_layer = cell_outputs[-2]
net = normal_cell(
net,
scope='cell_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=stride,
prev_layer=prev_layer,
cell_num=true_cell_num)
if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
return net, end_points
true_cell_num += 1
if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and
num_classes and is_training):
aux_net = tf.nn.relu(net)
_build_aux_head(aux_net, end_points, num_classes, hparams,
scope='aux_{}'.format(cell_num))
cell_outputs.append(net)
# Final softmax layer
with tf.variable_scope('final_layer'):
net = tf.nn.relu(net)
net = nasnet_utils.global_avg_pool(net)
if add_and_check_endpoint('global_pool', net) or num_classes is None:
return net, end_points
net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout')
logits = slim.fully_connected(net, num_classes)
if add_and_check_endpoint('Logits', logits):
return net, end_points
predictions = tf.nn.softmax(logits, name='predictions')
if add_and_check_endpoint('Predictions', predictions):
return net, end_points
return logits, end_points
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nasnet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets.nasnet import nasnet
slim = tf.contrib.slim
class NASNetTest(tf.test.TestCase):
def testBuildLogitsCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = 10
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
logits, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildLogitsMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildLogitsLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
logits, end_points = nasnet.build_nasnet_large(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
net, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
self.assertFalse('AuxLogits' in end_points)
self.assertFalse('Predictions' in end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 768])
def testBuildPreLogitsMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
net, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
self.assertFalse('AuxLogits' in end_points)
self.assertFalse('Predictions' in end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1056])
def testBuildPreLogitsLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
net, end_points = nasnet.build_nasnet_large(inputs, num_classes)
self.assertFalse('AuxLogits' in end_points)
self.assertFalse('Predictions' in end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 4032])
def testAllEndPointsShapesCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = 10
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
_, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 32, 32, 96],
'Cell_0': [batch_size, 32, 32, 192],
'Cell_1': [batch_size, 32, 32, 192],
'Cell_2': [batch_size, 32, 32, 192],
'Cell_3': [batch_size, 32, 32, 192],
'Cell_4': [batch_size, 32, 32, 192],
'Cell_5': [batch_size, 32, 32, 192],
'Cell_6': [batch_size, 16, 16, 384],
'Cell_7': [batch_size, 16, 16, 384],
'Cell_8': [batch_size, 16, 16, 384],
'Cell_9': [batch_size, 16, 16, 384],
'Cell_10': [batch_size, 16, 16, 384],
'Cell_11': [batch_size, 16, 16, 384],
'Cell_12': [batch_size, 8, 8, 768],
'Cell_13': [batch_size, 8, 8, 768],
'Cell_14': [batch_size, 8, 8, 768],
'Cell_15': [batch_size, 8, 8, 768],
'Cell_16': [batch_size, 8, 8, 768],
'Cell_17': [batch_size, 8, 8, 768],
'Reduction_Cell_0': [batch_size, 16, 16, 256],
'Reduction_Cell_1': [batch_size, 8, 8, 512],
'global_pool': [batch_size, 768],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testAllEndPointsShapesMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
_, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 28, 28, 88],
'Cell_0': [batch_size, 28, 28, 264],
'Cell_1': [batch_size, 28, 28, 264],
'Cell_2': [batch_size, 28, 28, 264],
'Cell_3': [batch_size, 28, 28, 264],
'Cell_4': [batch_size, 14, 14, 528],
'Cell_5': [batch_size, 14, 14, 528],
'Cell_6': [batch_size, 14, 14, 528],
'Cell_7': [batch_size, 14, 14, 528],
'Cell_8': [batch_size, 7, 7, 1056],
'Cell_9': [batch_size, 7, 7, 1056],
'Cell_10': [batch_size, 7, 7, 1056],
'Cell_11': [batch_size, 7, 7, 1056],
'Reduction_Cell_0': [batch_size, 14, 14, 352],
'Reduction_Cell_1': [batch_size, 7, 7, 704],
'global_pool': [batch_size, 1056],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testAllEndPointsShapesLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
_, end_points = nasnet.build_nasnet_large(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 42, 42, 336],
'Cell_0': [batch_size, 42, 42, 1008],
'Cell_1': [batch_size, 42, 42, 1008],
'Cell_2': [batch_size, 42, 42, 1008],
'Cell_3': [batch_size, 42, 42, 1008],
'Cell_4': [batch_size, 42, 42, 1008],
'Cell_5': [batch_size, 42, 42, 1008],
'Cell_6': [batch_size, 21, 21, 2016],
'Cell_7': [batch_size, 21, 21, 2016],
'Cell_8': [batch_size, 21, 21, 2016],
'Cell_9': [batch_size, 21, 21, 2016],
'Cell_10': [batch_size, 21, 21, 2016],
'Cell_11': [batch_size, 21, 21, 2016],
'Cell_12': [batch_size, 11, 11, 4032],
'Cell_13': [batch_size, 11, 11, 4032],
'Cell_14': [batch_size, 11, 11, 4032],
'Cell_15': [batch_size, 11, 11, 4032],
'Cell_16': [batch_size, 11, 11, 4032],
'Cell_17': [batch_size, 11, 11, 4032],
'Reduction_Cell_0': [batch_size, 21, 21, 1344],
'Reduction_Cell_1': [batch_size, 11, 11, 2688],
'global_pool': [batch_size, 4032],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testVariablesSetDeviceMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
# Force all Variables to reside on the device.
with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
nasnet.build_nasnet_mobile(inputs, num_classes)
with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
nasnet.build_nasnet_mobile(inputs, num_classes)
for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
self.assertDeviceEqual(v.device, '/cpu:0')
for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
self.assertDeviceEqual(v.device, '/gpu:0')
def testUnknownBatchSizeMobileModel(self):
batch_size = 1
height, width = 224, 224
num_classes = 1000
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, _ = nasnet.build_nasnet_mobile(inputs, num_classes)
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random_uniform((batch_size, height, width, 3))
sess.run(tf.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluationMobileModel(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session() as sess:
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, _ = nasnet.build_nasnet_mobile(eval_inputs,
num_classes,
is_training=False)
predictions = tf.argmax(logits, 1)
sess.run(tf.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A custom module for some common operations used by NASNet.
Functions exposed in this file:
- calc_reduction_layers
- get_channel_index
- get_channel_dim
- global_avg_pool
- factorized_reduction
- drop_path
Classes exposed in this file:
- NasNetABaseCell
- NasNetANormalCell
- NasNetAReductionCell
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import google3
import tensorflow as tf
arg_scope = tf.contrib.framework.arg_scope
slim = tf.contrib.slim
DATA_FORMAT_NCHW = 'NCHW'
DATA_FORMAT_NHWC = 'NHWC'
INVALID = 'null'
def calc_reduction_layers(num_cells, num_reduction_layers):
"""Figure out what layers should have reductions."""
reduction_layers = []
for pool_num in range(1, num_reduction_layers + 1):
layer_num = (float(pool_num) / (num_reduction_layers + 1)) * num_cells
layer_num = int(layer_num)
reduction_layers.append(layer_num)
return reduction_layers
@tf.contrib.framework.add_arg_scope
def get_channel_index(data_format=INVALID):
assert data_format != INVALID
axis = 3 if data_format == 'NHWC' else 1
return axis
@tf.contrib.framework.add_arg_scope
def get_channel_dim(shape, data_format=INVALID):
assert data_format != INVALID
assert len(shape) == 4
if data_format == 'NHWC':
return int(shape[3])
elif data_format == 'NCHW':
return int(shape[1])
else:
raise ValueError('Not a valid data_format', data_format)
@tf.contrib.framework.add_arg_scope
def global_avg_pool(x, data_format=INVALID):
"""Average pool away the height and width spatial dimensions of x."""
assert data_format != INVALID
assert data_format in ['NHWC', 'NCHW']
assert x.shape.ndims == 4
if data_format == 'NHWC':
return tf.reduce_mean(x, [1, 2])
else:
return tf.reduce_mean(x, [2, 3])
@tf.contrib.framework.add_arg_scope
def factorized_reduction(net, output_filters, stride, data_format=INVALID):
"""Reduces the shape of net without information loss due to striding."""
assert output_filters % 2 == 0, (
'Need even number of filters when using this factorized reduction.')
assert data_format != INVALID
if stride == 1:
net = slim.conv2d(net, output_filters, 1, scope='path_conv')
net = slim.batch_norm(net, scope='path_bn')
return net
if data_format == 'NHWC':
stride_spec = [1, stride, stride, 1]
else:
stride_spec = [1, 1, stride, stride]
# Skip path 1
path1 = tf.nn.avg_pool(
net, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format)
path1 = slim.conv2d(path1, int(output_filters / 2), 1, scope='path1_conv')
# Skip path 2
# First pad with 0's on the right and bottom, then shift the filter to
# include those 0's that were added.
if data_format == 'NHWC':
pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]]
path2 = tf.pad(net, pad_arr)[:, 1:, 1:, :]
concat_axis = 3
else:
pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]]
path2 = tf.pad(net, pad_arr)[:, :, 1:, 1:]
concat_axis = 1
path2 = tf.nn.avg_pool(
path2, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format)
path2 = slim.conv2d(path2, int(output_filters / 2), 1, scope='path2_conv')
# Concat and apply BN
final_path = tf.concat(values=[path1, path2], axis=concat_axis)
final_path = slim.batch_norm(final_path, scope='final_path_bn')
return final_path
@tf.contrib.framework.add_arg_scope
def drop_path(net, keep_prob, is_training=True):
"""Drops out a whole example hiddenstate with the specified probability."""
if is_training:
batch_size = tf.shape(net)[0]
noise_shape = [batch_size, 1, 1, 1]
random_tensor = keep_prob
random_tensor += tf.random_uniform(noise_shape, dtype=tf.float32)
binary_tensor = tf.floor(random_tensor)
net = tf.div(net, keep_prob) * binary_tensor
return net
def _operation_to_filter_shape(operation):
splitted_operation = operation.split('x')
filter_shape = int(splitted_operation[0][-1])
assert filter_shape == int(
splitted_operation[1][0]), 'Rectangular filters not supported.'
return filter_shape
def _operation_to_num_layers(operation):
splitted_operation = operation.split('_')
if 'x' in splitted_operation[-1]:
return 1
return int(splitted_operation[-1])
def _operation_to_info(operation):
"""Takes in operation name and returns meta information.
An example would be 'separable_3x3_4' -> (3, 4).
Args:
operation: String that corresponds to convolution operation.
Returns:
Tuple of (filter shape, num layers).
"""
num_layers = _operation_to_num_layers(operation)
filter_shape = _operation_to_filter_shape(operation)
return num_layers, filter_shape
def _stacked_separable_conv(net, stride, operation, filter_size):
"""Takes in an operations and parses it to the correct sep operation."""
num_layers, kernel_size = _operation_to_info(operation)
for layer_num in range(num_layers - 1):
net = tf.nn.relu(net)
net = slim.separable_conv2d(
net,
filter_size,
kernel_size,
depth_multiplier=1,
scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1),
stride=stride)
net = slim.batch_norm(
net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1))
stride = 1
net = tf.nn.relu(net)
net = slim.separable_conv2d(
net,
filter_size,
kernel_size,
depth_multiplier=1,
scope='separable_{0}x{0}_{1}'.format(kernel_size, num_layers),
stride=stride)
net = slim.batch_norm(
net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, num_layers))
return net
def _operation_to_pooling_type(operation):
"""Takes in the operation string and returns the pooling type."""
splitted_operation = operation.split('_')
return splitted_operation[0]
def _operation_to_pooling_shape(operation):
"""Takes in the operation string and returns the pooling kernel shape."""
splitted_operation = operation.split('_')
shape = splitted_operation[-1]
assert 'x' in shape
filter_height, filter_width = shape.split('x')
assert filter_height == filter_width
return int(filter_height)
def _operation_to_pooling_info(operation):
"""Parses the pooling operation string to return its type and shape."""
pooling_type = _operation_to_pooling_type(operation)
pooling_shape = _operation_to_pooling_shape(operation)
return pooling_type, pooling_shape
def _pooling(net, stride, operation):
"""Parses operation and performs the correct pooling operation on net."""
padding = 'SAME'
pooling_type, pooling_shape = _operation_to_pooling_info(operation)
if pooling_type == 'avg':
net = slim.avg_pool2d(net, pooling_shape, stride=stride, padding=padding)
elif pooling_type == 'max':
net = slim.max_pool2d(net, pooling_shape, stride=stride, padding=padding)
else:
raise NotImplementedError('Unimplemented pooling type: ', pooling_type)
return net
class NasNetABaseCell(object):
"""NASNet Cell class that is used as a 'layer' in image architectures.
Args:
num_conv_filters: The number of filters for each convolution operation.
operations: List of operations that are performed in the NASNet Cell in
order.
used_hiddenstates: Binary array that signals if the hiddenstate was used
within the cell. This is used to determine what outputs of the cell
should be concatenated together.
hiddenstate_indices: Determines what hiddenstates should be combined
together with the specified operations to create the NASNet cell.
"""
def __init__(self, num_conv_filters, operations, used_hiddenstates,
hiddenstate_indices, drop_path_keep_prob, total_num_cells,
total_training_steps):
self._num_conv_filters = num_conv_filters
self._operations = operations
self._used_hiddenstates = used_hiddenstates
self._hiddenstate_indices = hiddenstate_indices
self._drop_path_keep_prob = drop_path_keep_prob
self._total_num_cells = total_num_cells
self._total_training_steps = total_training_steps
def _reduce_prev_layer(self, prev_layer, curr_layer):
"""Matches dimension of prev_layer to the curr_layer."""
# Set the prev layer to the current layer if it is none
if prev_layer is None:
return curr_layer
curr_num_filters = self._filter_size
prev_num_filters = get_channel_dim(prev_layer.shape)
curr_filter_shape = int(curr_layer.shape[2])
prev_filter_shape = int(prev_layer.shape[2])
if curr_filter_shape != prev_filter_shape:
prev_layer = tf.nn.relu(prev_layer)
prev_layer = factorized_reduction(
prev_layer, curr_num_filters, stride=2)
elif curr_num_filters != prev_num_filters:
prev_layer = tf.nn.relu(prev_layer)
prev_layer = slim.conv2d(
prev_layer, curr_num_filters, 1, scope='prev_1x1')
prev_layer = slim.batch_norm(prev_layer, scope='prev_bn')
return prev_layer
def _cell_base(self, net, prev_layer):
"""Runs the beginning of the conv cell before the predicted ops are run."""
num_filters = self._filter_size
# Check to be sure prev layer stuff is setup correctly
prev_layer = self._reduce_prev_layer(prev_layer, net)
net = tf.nn.relu(net)
net = slim.conv2d(net, num_filters, 1, scope='1x1')
net = slim.batch_norm(net, scope='beginning_bn')
split_axis = get_channel_index()
net = tf.split(
axis=split_axis, num_or_size_splits=1, value=net)
for split in net:
assert int(split.shape[split_axis] == int(self._num_conv_filters *
self._filter_scaling))
net.append(prev_layer)
return net
def __call__(self, net, scope=None, filter_scaling=1, stride=1,
prev_layer=None, cell_num=-1):
"""Runs the conv cell."""
self._cell_num = cell_num
self._filter_scaling = filter_scaling
self._filter_size = int(self._num_conv_filters * filter_scaling)
i = 0
with tf.variable_scope(scope):
net = self._cell_base(net, prev_layer)
for iteration in range(5):
with tf.variable_scope('comb_iter_{}'.format(iteration)):
left_hiddenstate_idx, right_hiddenstate_idx = (
self._hiddenstate_indices[i],
self._hiddenstate_indices[i + 1])
original_input_left = left_hiddenstate_idx < 2
original_input_right = right_hiddenstate_idx < 2
h1 = net[left_hiddenstate_idx]
h2 = net[right_hiddenstate_idx]
operation_left = self._operations[i]
operation_right = self._operations[i+1]
i += 2
# Apply conv operations
with tf.variable_scope('left'):
h1 = self._apply_conv_operation(h1, operation_left,
stride, original_input_left)
with tf.variable_scope('right'):
h2 = self._apply_conv_operation(h2, operation_right,
stride, original_input_right)
# Combine hidden states using 'add'.
with tf.variable_scope('combine'):
h = h1 + h2
# Add hiddenstate to the list of hiddenstates we can choose from
net.append(h)
with tf.variable_scope('cell_output'):
net = self._combine_unused_states(net)
return net
def _apply_conv_operation(self, net, operation,
stride, is_from_original_input):
"""Applies the predicted conv operation to net."""
# Dont stride if this is not one of the original hiddenstates
if stride > 1 and not is_from_original_input:
stride = 1
input_filters = get_channel_dim(net.shape)
filter_size = self._filter_size
if 'separable' in operation:
net = _stacked_separable_conv(net, stride, operation, filter_size)
elif operation in ['none']:
# Check if a stride is needed, then use a strided 1x1 here
if stride > 1 or (input_filters != filter_size):
net = tf.nn.relu(net)
net = slim.conv2d(net, filter_size, 1, stride=stride, scope='1x1')
net = slim.batch_norm(net, scope='bn_1')
elif 'pool' in operation:
net = _pooling(net, stride, operation)
if input_filters != filter_size:
net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1')
net = slim.batch_norm(net, scope='bn_1')
else:
raise ValueError('Unimplemented operation', operation)
if operation != 'none':
net = self._apply_drop_path(net)
return net
def _combine_unused_states(self, net):
"""Concatenate the unused hidden states of the cell."""
used_hiddenstates = self._used_hiddenstates
final_height = int(net[-1].shape[2])
final_num_filters = get_channel_dim(net[-1].shape)
assert len(used_hiddenstates) == len(net)
for idx, used_h in enumerate(used_hiddenstates):
curr_height = int(net[idx].shape[2])
curr_num_filters = get_channel_dim(net[idx].shape)
# Determine if a reduction should be applied to make the number of
# filters match.
should_reduce = final_num_filters != curr_num_filters
should_reduce = (final_height != curr_height) or should_reduce
should_reduce = should_reduce and not used_h
if should_reduce:
stride = 2 if final_height != curr_height else 1
with tf.variable_scope('reduction_{}'.format(idx)):
net[idx] = factorized_reduction(
net[idx], final_num_filters, stride)
states_to_combine = (
[h for h, is_used in zip(net, used_hiddenstates) if not is_used])
# Return the concat of all the states
concat_axis = get_channel_index()
net = tf.concat(values=states_to_combine, axis=concat_axis)
return net
def _apply_drop_path(self, net):
"""Apply drop_path regularization to net."""
drop_path_keep_prob = self._drop_path_keep_prob
if drop_path_keep_prob < 1.0:
# Scale keep prob by layer number
assert self._cell_num != -1
# The added 2 is for the reduction cells
num_cells = self._total_num_cells
layer_ratio = (self._cell_num + 1)/float(num_cells)
with tf.device('/cpu:0'):
tf.summary.scalar('layer_ratio', layer_ratio)
drop_path_keep_prob = 1 - layer_ratio * (1 - drop_path_keep_prob)
# Decrease the keep probability over time
current_step = tf.cast(tf.contrib.framework.get_or_create_global_step(),
tf.float32)
drop_path_burn_in_steps = self._total_training_steps
current_ratio = (
current_step / drop_path_burn_in_steps)
current_ratio = tf.minimum(1.0, current_ratio)
with tf.device('/cpu:0'):
tf.summary.scalar('current_ratio', current_ratio)
drop_path_keep_prob = (
1 - current_ratio * (1 - drop_path_keep_prob))
with tf.device('/cpu:0'):
tf.summary.scalar('drop_path_keep_prob', drop_path_keep_prob)
net = drop_path(net, drop_path_keep_prob)
return net
class NasNetANormalCell(NasNetABaseCell):
"""NASNetA Normal Cell."""
def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
total_training_steps):
operations = ['separable_5x5_2',
'separable_3x3_2',
'separable_5x5_2',
'separable_3x3_2',
'avg_pool_3x3',
'none',
'avg_pool_3x3',
'avg_pool_3x3',
'separable_3x3_2',
'none']
used_hiddenstates = [1, 0, 0, 0, 0, 0, 0]
hiddenstate_indices = [0, 1, 1, 1, 0, 1, 1, 1, 0, 0]
super(NasNetANormalCell, self).__init__(num_conv_filters, operations,
used_hiddenstates,
hiddenstate_indices,
drop_path_keep_prob,
total_num_cells,
total_training_steps)
class NasNetAReductionCell(NasNetABaseCell):
"""NASNetA Reduction Cell."""
def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells,
total_training_steps):
operations = ['separable_5x5_2',
'separable_7x7_2',
'max_pool_3x3',
'separable_7x7_2',
'avg_pool_3x3',
'separable_5x5_2',
'none',
'avg_pool_3x3',
'separable_3x3_2',
'max_pool_3x3']
used_hiddenstates = [1, 1, 1, 0, 0, 0, 0]
hiddenstate_indices = [0, 1, 0, 1, 0, 1, 3, 2, 2, 0]
super(NasNetAReductionCell, self).__init__(num_conv_filters, operations,
used_hiddenstates,
hiddenstate_indices,
drop_path_keep_prob,
total_num_cells,
total_training_steps)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.nasnet.nasnet_utils."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets.nasnet import nasnet_utils
class NasnetUtilsTest(tf.test.TestCase):
def testCalcReductionLayers(self):
num_cells = 18
num_reduction_layers = 2
reduction_layers = nasnet_utils.calc_reduction_layers(
num_cells, num_reduction_layers)
self.assertEqual(len(reduction_layers), 2)
self.assertEqual(reduction_layers[0], 6)
self.assertEqual(reduction_layers[1], 12)
def testGetChannelIndex(self):
data_formats = ['NHWC', 'NCHW']
for data_format in data_formats:
index = nasnet_utils.get_channel_index(data_format)
correct_index = 3 if data_format == 'NHWC' else 1
self.assertEqual(index, correct_index)
def testGetChannelDim(self):
data_formats = ['NHWC', 'NCHW']
shape = [10, 20, 30, 40]
for data_format in data_formats:
dim = nasnet_utils.get_channel_dim(shape, data_format)
correct_dim = shape[3] if data_format == 'NHWC' else shape[1]
self.assertEqual(dim, correct_dim)
def testGlobalAvgPool(self):
data_formats = ['NHWC', 'NCHW']
inputs = tf.placeholder(tf.float32, (5, 10, 20, 10))
for data_format in data_formats:
output = nasnet_utils.global_avg_pool(
inputs, data_format)
self.assertEqual(output.shape, [5, 10])
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -30,6 +30,7 @@ from nets import overfeat ...@@ -30,6 +30,7 @@ from nets import overfeat
from nets import resnet_v1 from nets import resnet_v1
from nets import resnet_v2 from nets import resnet_v2
from nets import vgg from nets import vgg
from nets.nasnet import nasnet
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -57,6 +58,9 @@ networks_map = {'alexnet_v2': alexnet.alexnet_v2, ...@@ -57,6 +58,9 @@ networks_map = {'alexnet_v2': alexnet.alexnet_v2,
'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_075, 'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_075,
'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_050, 'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_050,
'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_025, 'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_025,
'nasnet_cifar': nasnet.build_nasnet_cifar,
'nasnet_mobile': nasnet.build_nasnet_mobile,
'nasnet_large': nasnet.build_nasnet_large,
} }
arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
...@@ -84,6 +88,9 @@ arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, ...@@ -84,6 +88,9 @@ arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_arg_scope, 'mobilenet_v1_075': mobilenet_v1.mobilenet_v1_arg_scope,
'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_arg_scope, 'mobilenet_v1_050': mobilenet_v1.mobilenet_v1_arg_scope,
'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_arg_scope, 'mobilenet_v1_025': mobilenet_v1.mobilenet_v1_arg_scope,
'nasnet_cifar': nasnet.nasnet_cifar_arg_scope,
'nasnet_mobile': nasnet.nasnet_mobile_arg_scope,
'nasnet_large': nasnet.nasnet_large_arg_scope,
} }
...@@ -92,7 +99,8 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): ...@@ -92,7 +99,8 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
Args: Args:
name: The name of the network. name: The name of the network.
num_classes: The number of classes to use for classification. num_classes: The number of classes to use for classification. If 0 or None,
the logits layer is omitted and its input features are returned instead.
weight_decay: The l2 coefficient for the model weights. weight_decay: The l2 coefficient for the model weights.
is_training: `True` if the model is being used for training and `False` is_training: `True` if the model is being used for training and `False`
otherwise. otherwise.
...@@ -100,7 +108,20 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): ...@@ -100,7 +108,20 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
Returns: Returns:
network_fn: A function that applies the model to a batch of images. It has network_fn: A function that applies the model to a batch of images. It has
the following signature: the following signature:
logits, end_points = network_fn(images) net, end_points = network_fn(images)
The `images` input is a tensor of shape [batch_size, height, width, 3]
with height = width = network_fn.default_image_size. (The permissibility
and treatment of other sizes depends on the network_fn.)
The returned `end_points` are a dictionary of intermediate activations.
The returned `net` is the topmost layer, depending on `num_classes`:
If `num_classes` was a non-zero integer, `net` is a logits tensor
of shape [batch_size, num_classes].
If `num_classes` was 0 or `None`, `net` is a tensor with the input
to the logits layer of shape [batch_size, 1, 1, num_features] or
[batch_size, num_features]. Dropout has not been applied to this
(even if the network's original classification does); it remains for
the caller to do this or not.
Raises: Raises:
ValueError: If network `name` is not recognized. ValueError: If network `name` is not recognized.
""" """
...@@ -108,10 +129,10 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): ...@@ -108,10 +129,10 @@ def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
raise ValueError('Name of network unknown %s' % name) raise ValueError('Name of network unknown %s' % name)
func = networks_map[name] func = networks_map[name]
@functools.wraps(func) @functools.wraps(func)
def network_fn(images): def network_fn(images, **kwargs):
arg_scope = arg_scopes_map[name](weight_decay=weight_decay) arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
with slim.arg_scope(arg_scope): with slim.arg_scope(arg_scope):
return func(images, num_classes, is_training=is_training) return func(images, num_classes, is_training=is_training, **kwargs)
if hasattr(func, 'default_image_size'): if hasattr(func, 'default_image_size'):
network_fn.default_image_size = func.default_image_size network_fn.default_image_size = func.default_image_size
......
...@@ -52,7 +52,8 @@ def overfeat(inputs, ...@@ -52,7 +52,8 @@ def overfeat(inputs,
is_training=True, is_training=True,
dropout_keep_prob=0.5, dropout_keep_prob=0.5,
spatial_squeeze=True, spatial_squeeze=True,
scope='overfeat'): scope='overfeat',
global_pool=False):
"""Contains the model definition for the OverFeat network. """Contains the model definition for the OverFeat network.
The definition for the network was obtained from: The definition for the network was obtained from:
...@@ -68,20 +69,26 @@ def overfeat(inputs, ...@@ -68,20 +69,26 @@ def overfeat(inputs,
Args: Args:
inputs: a tensor of size [batch_size, height, width, channels]. inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. num_classes: number of predicted classes. If 0 or None, the logits layer is
omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained. is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout dropout_keep_prob: the probability that activations are kept in the dropout
layers during training. layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the spatial_squeeze: whether or not should squeeze the spatial dimensions of the
outputs. Useful to remove unnecessary dimensions for classification. outputs. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables. scope: Optional scope for the variables.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original OverFeat.)
Returns: Returns:
the last op containing the log predictions and end_points dict. net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0 or
None).
end_points: a dict of tensors with intermediate activations.
""" """
with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
end_points_collection = sc.name + '_end_points' end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d # Collect outputs for conv2d, fully_connected and max_pool2d
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection): outputs_collections=end_points_collection):
...@@ -94,25 +101,31 @@ def overfeat(inputs, ...@@ -94,25 +101,31 @@ def overfeat(inputs,
net = slim.conv2d(net, 1024, [3, 3], scope='conv4') net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
net = slim.conv2d(net, 1024, [3, 3], scope='conv5') net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5') net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
with slim.arg_scope([slim.conv2d], with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005), weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)): biases_initializer=tf.constant_initializer(0.1)):
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6') scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training, # Convert end_points_collection into a end_point dict.
scope='dropout7') end_points = slim.utils.convert_collection_to_dict(
net = slim.conv2d(net, num_classes, [1, 1], end_points_collection)
activation_fn=None, if global_pool:
normalizer_fn=None, net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
biases_initializer=tf.zeros_initializer(), end_points['global_pool'] = net
scope='fc8') if num_classes:
# Convert end_points_collection into a end_point dict. net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
end_points = slim.utils.convert_collection_to_dict(end_points_collection) scope='dropout7')
if spatial_squeeze: net = slim.conv2d(net, num_classes, [1, 1],
net = tf.squeeze(net, [1, 2], name='fc8/squeezed') activation_fn=None,
end_points[sc.name + '/fc8'] = net normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8')
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points return net, end_points
overfeat.default_image_size = 231 overfeat.default_image_size = 231
...@@ -48,6 +48,18 @@ class OverFeatTest(tf.test.TestCase): ...@@ -48,6 +48,18 @@ class OverFeatTest(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(), self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes]) [batch_size, 2, 2, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 281, 281
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self): def testEndPoints(self):
batch_size = 5 batch_size = 5
height, width = 231, 231 height, width = 231, 231
...@@ -69,6 +81,27 @@ class OverFeatTest(tf.test.TestCase): ...@@ -69,6 +81,27 @@ class OverFeatTest(tf.test.TestCase):
] ]
self.assertSetEqual(set(end_points.keys()), set(expected_names)) self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 231, 231
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = overfeat.overfeat(inputs, num_classes)
expected_names = ['overfeat/conv1',
'overfeat/pool1',
'overfeat/conv2',
'overfeat/pool2',
'overfeat/conv3',
'overfeat/conv4',
'overfeat/conv5',
'overfeat/pool5',
'overfeat/fc6',
'overfeat/fc7'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('overfeat/fc7'))
def testModelVariables(self): def testModelVariables(self):
batch_size = 5 batch_size = 5
height, width = 231, 231 height, width = 231, 231
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""Implementation of the Image-to-Image Translation model.
This network represents a port of the following work:
Image-to-Image Translation with Conditional Adversarial Networks
Phillip Isola, Jun-Yan Zhu, Tinghui Zhou and Alexei A. Efros
Arxiv, 2017
https://phillipi.github.io/pix2pix/
A reference implementation written in Lua can be found at:
https://github.com/phillipi/pix2pix/blob/master/models.lua
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import functools
import tensorflow as tf
layers = tf.contrib.layers
def pix2pix_arg_scope():
"""Returns a default argument scope for isola_net.
Returns:
An arg scope.
"""
# These parameters come from the online port, which don't necessarily match
# those in the paper.
# TODO(nsilberman): confirm these values with Philip.
instance_norm_params = {
'center': True,
'scale': True,
'epsilon': 0.00001,
}
with tf.contrib.framework.arg_scope(
[layers.conv2d, layers.conv2d_transpose],
normalizer_fn=layers.instance_norm,
normalizer_params=instance_norm_params,
weights_initializer=tf.random_normal_initializer(0, 0.02)) as sc:
return sc
def upsample(net, num_outputs, kernel_size, method='nn_upsample_conv'):
"""Upsamples the given inputs.
Args:
net: A `Tensor` of size [batch_size, height, width, filters].
num_outputs: The number of output filters.
kernel_size: A list of 2 scalars or a 1x2 `Tensor` indicating the scale,
relative to the inputs, of the output dimensions. For example, if kernel
size is [2, 3], then the output height and width will be twice and three
times the input size.
method: The upsampling method.
Returns:
An `Tensor` which was upsampled using the specified method.
Raises:
ValueError: if `method` is not recognized.
"""
net_shape = tf.shape(net)
height = net_shape[1]
width = net_shape[2]
if method == 'nn_upsample_conv':
net = tf.image.resize_nearest_neighbor(
net, [kernel_size[0] * height, kernel_size[1] * width])
net = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None)
elif method == 'conv2d_transpose':
net = layers.conv2d_transpose(
net, num_outputs, [4, 4], stride=kernel_size, activation_fn=None)
else:
raise ValueError('Unknown method: [%s]', method)
return net
class Block(
collections.namedtuple('Block', ['num_filters', 'decoder_keep_prob'])):
"""Represents a single block of encoder and decoder processing.
The Image-to-Image translation paper works a bit differently than the original
U-Net model. In particular, each block represents a single operation in the
encoder which is concatenated with the corresponding decoder representation.
A dropout layer follows the concatenation and convolution of the concatenated
features.
"""
pass
def _default_generator_blocks():
"""Returns the default generator block definitions.
Returns:
A list of generator blocks.
"""
return [
Block(64, 0.5),
Block(128, 0.5),
Block(256, 0.5),
Block(512, 0),
Block(512, 0),
Block(512, 0),
Block(512, 0),
]
def pix2pix_generator(net,
num_outputs,
blocks=None,
upsample_method='nn_upsample_conv',
is_training=False): # pylint: disable=unused-argument
"""Defines the network architecture.
Args:
net: A `Tensor` of size [batch, height, width, channels]. Note that the
generator currently requires square inputs (e.g. height=width).
num_outputs: The number of (per-pixel) outputs.
blocks: A list of generator blocks or `None` to use the default generator
definition.
upsample_method: The method of upsampling images, one of 'nn_upsample_conv'
or 'conv2d_transpose'
is_training: Whether or not we're in training or testing mode.
Returns:
A `Tensor` representing the model output and a dictionary of model end
points.
Raises:
ValueError: if the input heights do not match their widths.
"""
end_points = {}
blocks = blocks or _default_generator_blocks()
input_size = net.get_shape().as_list()
height, width = input_size[1], input_size[2]
if height != width:
raise ValueError('The input height must match the input width.')
input_size[3] = num_outputs
upsample_fn = functools.partial(upsample, method=upsample_method)
encoder_activations = []
###########
# Encoder #
###########
with tf.variable_scope('encoder'):
with tf.contrib.framework.arg_scope(
[layers.conv2d],
kernel_size=[4, 4],
stride=2,
activation_fn=tf.nn.leaky_relu):
for block_id, block in enumerate(blocks):
# No normalizer for the first encoder layers as per 'Image-to-Image',
# Section 5.1.1
if block_id == 0:
# First layer doesn't use normalizer_fn
net = layers.conv2d(net, block.num_filters, normalizer_fn=None)
elif block_id < len(blocks) - 1:
net = layers.conv2d(net, block.num_filters)
else:
# Last layer doesn't use activation_fn nor normalizer_fn
net = layers.conv2d(
net, block.num_filters, activation_fn=None, normalizer_fn=None)
encoder_activations.append(net)
end_points['encoder%d' % block_id] = net
###########
# Decoder #
###########
reversed_blocks = list(blocks)
reversed_blocks.reverse()
with tf.variable_scope('decoder'):
# Dropout is used at both train and test time as per 'Image-to-Image',
# Section 2.1 (last paragraph).
with tf.contrib.framework.arg_scope([layers.dropout], is_training=True):
for block_id, block in enumerate(reversed_blocks):
if block_id > 0:
net = tf.concat([net, encoder_activations[-block_id - 1]], axis=3)
# The Relu comes BEFORE the upsample op:
net = tf.nn.relu(net)
net = upsample_fn(net, block.num_filters, [2, 2])
if block.decoder_keep_prob > 0:
net = layers.dropout(net, keep_prob=block.decoder_keep_prob)
end_points['decoder%d' % block_id] = net
with tf.variable_scope('output'):
logits = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None)
logits = tf.reshape(logits, input_size)
end_points['logits'] = logits
end_points['predictions'] = tf.tanh(logits)
return logits, end_points
def pix2pix_discriminator(net, num_filters, padding=2, is_training=False):
"""Creates the Image2Image Translation Discriminator.
Args:
net: A `Tensor` of size [batch_size, height, width, channels] representing
the input.
num_filters: A list of the filters in the discriminator. The length of the
list determines the number of layers in the discriminator.
padding: Amount of reflection padding applied before each convolution.
is_training: Whether or not the model is training or testing.
Returns:
A logits `Tensor` of size [batch_size, N, N, 1] where N is the number of
'patches' we're attempting to discriminate and a dictionary of model end
points.
"""
del is_training
end_points = {}
num_layers = len(num_filters)
def padded(net, scope):
if padding:
with tf.variable_scope(scope):
spatial_pad = tf.constant(
[[0, 0], [padding, padding], [padding, padding], [0, 0]],
dtype=tf.int32)
return tf.pad(net, spatial_pad, 'REFLECT')
else:
return net
with tf.contrib.framework.arg_scope(
[layers.conv2d],
kernel_size=[4, 4],
stride=2,
padding='valid',
activation_fn=tf.nn.leaky_relu):
# No normalization on the input layer.
net = layers.conv2d(
padded(net, 'conv0'), num_filters[0], normalizer_fn=None, scope='conv0')
end_points['conv0'] = net
for i in range(1, num_layers - 1):
net = layers.conv2d(
padded(net, 'conv%d' % i), num_filters[i], scope='conv%d' % i)
end_points['conv%d' % i] = net
# Stride 1 on the last layer.
net = layers.conv2d(
padded(net, 'conv%d' % (num_layers - 1)),
num_filters[-1],
stride=1,
scope='conv%d' % (num_layers - 1))
end_points['conv%d' % (num_layers - 1)] = net
# 1-dim logits, stride 1, no activation, no normalization.
logits = layers.conv2d(
padded(net, 'conv%d' % num_layers),
1,
stride=1,
activation_fn=None,
normalizer_fn=None,
scope='conv%d' % num_layers)
end_points['logits'] = logits
end_points['predictions'] = tf.sigmoid(logits)
return logits, end_points
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""Tests for pix2pix."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import pix2pix
class GeneratorTest(tf.test.TestCase):
def test_nonsquare_inputs_raise_exception(self):
batch_size = 2
height, width = 240, 320
num_outputs = 4
images = tf.ones((batch_size, height, width, 3))
with self.assertRaises(ValueError):
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
pix2pix.pix2pix_generator(
images, num_outputs, upsample_method='nn_upsample_conv')
def _reduced_default_blocks(self):
"""Returns the default blocks, scaled down to make test run faster."""
return [pix2pix.Block(b.num_filters // 32, b.decoder_keep_prob)
for b in pix2pix._default_generator_blocks()]
def test_output_size_nn_upsample_conv(self):
batch_size = 2
height, width = 256, 256
num_outputs = 4
images = tf.ones((batch_size, height, width, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
logits, _ = pix2pix.pix2pix_generator(
images, num_outputs, blocks=self._reduced_default_blocks(),
upsample_method='nn_upsample_conv')
with self.test_session() as session:
session.run(tf.global_variables_initializer())
np_outputs = session.run(logits)
self.assertListEqual([batch_size, height, width, num_outputs],
list(np_outputs.shape))
def test_output_size_conv2d_transpose(self):
batch_size = 2
height, width = 256, 256
num_outputs = 4
images = tf.ones((batch_size, height, width, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
logits, _ = pix2pix.pix2pix_generator(
images, num_outputs, blocks=self._reduced_default_blocks(),
upsample_method='conv2d_transpose')
with self.test_session() as session:
session.run(tf.global_variables_initializer())
np_outputs = session.run(logits)
self.assertListEqual([batch_size, height, width, num_outputs],
list(np_outputs.shape))
def test_block_number_dictates_number_of_layers(self):
batch_size = 2
height, width = 256, 256
num_outputs = 4
images = tf.ones((batch_size, height, width, 3))
blocks = [
pix2pix.Block(64, 0.5),
pix2pix.Block(128, 0),
]
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
_, end_points = pix2pix.pix2pix_generator(
images, num_outputs, blocks)
num_encoder_layers = 0
num_decoder_layers = 0
for end_point in end_points:
if end_point.startswith('encoder'):
num_encoder_layers += 1
elif end_point.startswith('decoder'):
num_decoder_layers += 1
self.assertEqual(num_encoder_layers, len(blocks))
self.assertEqual(num_decoder_layers, len(blocks))
class DiscriminatorTest(tf.test.TestCase):
def _layer_output_size(self, input_size, kernel_size=4, stride=2, pad=2):
return (input_size + pad * 2 - kernel_size) // stride + 1
def test_four_layers(self):
batch_size = 2
input_size = 256
output_size = self._layer_output_size(input_size)
output_size = self._layer_output_size(output_size)
output_size = self._layer_output_size(output_size)
output_size = self._layer_output_size(output_size, stride=1)
output_size = self._layer_output_size(output_size, stride=1)
images = tf.ones((batch_size, input_size, input_size, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
logits, end_points = pix2pix.pix2pix_discriminator(
images, num_filters=[64, 128, 256, 512])
self.assertListEqual([batch_size, output_size, output_size, 1],
logits.shape.as_list())
self.assertListEqual([batch_size, output_size, output_size, 1],
end_points['predictions'].shape.as_list())
def test_four_layers_no_padding(self):
batch_size = 2
input_size = 256
output_size = self._layer_output_size(input_size, pad=0)
output_size = self._layer_output_size(output_size, pad=0)
output_size = self._layer_output_size(output_size, pad=0)
output_size = self._layer_output_size(output_size, stride=1, pad=0)
output_size = self._layer_output_size(output_size, stride=1, pad=0)
images = tf.ones((batch_size, input_size, input_size, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
logits, end_points = pix2pix.pix2pix_discriminator(
images, num_filters=[64, 128, 256, 512], padding=0)
self.assertListEqual([batch_size, output_size, output_size, 1],
logits.shape.as_list())
self.assertListEqual([batch_size, output_size, output_size, 1],
end_points['predictions'].shape.as_list())
def test_four_layers_wrog_paddig(self):
batch_size = 2
input_size = 256
images = tf.ones((batch_size, input_size, input_size, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
with self.assertRaises(TypeError):
pix2pix.pix2pix_discriminator(
images, num_filters=[64, 128, 256, 512], padding=1.5)
def test_four_layers_negative_padding(self):
batch_size = 2
input_size = 256
images = tf.ones((batch_size, input_size, input_size, 3))
with tf.contrib.framework.arg_scope(pix2pix.pix2pix_arg_scope()):
with self.assertRaises(ValueError):
pix2pix.pix2pix_discriminator(
images, num_filters=[64, 128, 256, 512], padding=-1)
if __name__ == '__main__':
tf.test.main()
...@@ -228,6 +228,7 @@ def resnet_arg_scope(weight_decay=0.0001, ...@@ -228,6 +228,7 @@ def resnet_arg_scope(weight_decay=0.0001,
'epsilon': batch_norm_epsilon, 'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale, 'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS, 'updates_collections': tf.GraphKeys.UPDATE_OPS,
'fused': None, # Use fused batch norm if possible.
} }
with slim.arg_scope( with slim.arg_scope(
......
...@@ -125,7 +125,7 @@ def bottleneck(inputs, ...@@ -125,7 +125,7 @@ def bottleneck(inputs,
output = tf.nn.relu(shortcut + residual) output = tf.nn.relu(shortcut + residual)
return slim.utils.collect_named_outputs(outputs_collections, return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, sc.name,
output) output)
...@@ -166,9 +166,9 @@ def resnet_v1(inputs, ...@@ -166,9 +166,9 @@ def resnet_v1(inputs,
inputs: A tensor of size [batch, height_in, width_in, channels]. inputs: A tensor of size [batch, height_in, width_in, channels].
blocks: A list of length equal to the number of ResNet blocks. Each element blocks: A list of length equal to the number of ResNet blocks. Each element
is a resnet_utils.Block object describing the units in the block. is a resnet_utils.Block object describing the units in the block.
num_classes: Number of predicted classes for classification tasks. If None num_classes: Number of predicted classes for classification tasks.
we return the features before the logit layer. If 0 or None, we return the features before the logit layer.
is_training: whether is training or not. is_training: whether batch_norm layers are in training mode.
global_pool: If True, we perform global average pooling before computing the global_pool: If True, we perform global average pooling before computing the
logits. Set to True for image classification, False for dense prediction. logits. Set to True for image classification, False for dense prediction.
output_stride: If None, then the output will be computed at the nominal output_stride: If None, then the output will be computed at the nominal
...@@ -189,10 +189,10 @@ def resnet_v1(inputs, ...@@ -189,10 +189,10 @@ def resnet_v1(inputs,
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is False, then height_out and width_out are reduced by a If global_pool is False, then height_out and width_out are reduced by a
factor of output_stride compared to the respective height_in and width_in, factor of output_stride compared to the respective height_in and width_in,
else both height_out and width_out equal one. If num_classes is None, then else both height_out and width_out equal one. If num_classes is 0 or None,
net is the output of the last ResNet block, potentially after global then net is the output of the last ResNet block, potentially after global
average pooling. If num_classes is not None, net contains the pre-softmax average pooling. If num_classes a non-zero integer, net contains the
activations. pre-softmax activations.
end_points: A dictionary from components of the network to the corresponding end_points: A dictionary from components of the network to the corresponding
activation. activation.
...@@ -200,7 +200,7 @@ def resnet_v1(inputs, ...@@ -200,7 +200,7 @@ def resnet_v1(inputs,
ValueError: If the target output_stride is not valid. ValueError: If the target output_stride is not valid.
""" """
with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
end_points_collection = sc.name + '_end_points' end_points_collection = sc.original_name_scope + '_end_points'
with slim.arg_scope([slim.conv2d, bottleneck, with slim.arg_scope([slim.conv2d, bottleneck,
resnet_utils.stack_blocks_dense], resnet_utils.stack_blocks_dense],
outputs_collections=end_points_collection): outputs_collections=end_points_collection):
...@@ -214,18 +214,21 @@ def resnet_v1(inputs, ...@@ -214,18 +214,21 @@ def resnet_v1(inputs,
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
# Convert end_points_collection into a dictionary of end_points.
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if global_pool: if global_pool:
# Global average pooling. # Global average pooling.
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
if num_classes is not None: end_points['global_pool'] = net
if num_classes:
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='logits') normalizer_fn=None, scope='logits')
end_points[sc.name + '/logits'] = net
if spatial_squeeze: if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
# Convert end_points_collection into a dictionary of end_points. end_points[sc.name + '/spatial_squeeze'] = net
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if num_classes is not None:
end_points['predictions'] = slim.softmax(net, scope='predictions') end_points['predictions'] = slim.softmax(net, scope='predictions')
return net, end_points return net, end_points
resnet_v1.default_image_size = 224 resnet_v1.default_image_size = 224
......
...@@ -285,6 +285,31 @@ class ResnetCompleteNetworkTest(tf.test.TestCase): ...@@ -285,6 +285,31 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
self.assertTrue('predictions' in end_points) self.assertTrue('predictions' in end_points)
self.assertListEqual(end_points['predictions'].get_shape().as_list(), self.assertListEqual(end_points['predictions'].get_shape().as_list(),
[2, 1, 1, num_classes]) [2, 1, 1, num_classes])
self.assertTrue('global_pool' in end_points)
self.assertListEqual(end_points['global_pool'].get_shape().as_list(),
[2, 1, 1, 32])
def testEndpointNames(self):
# Like ResnetUtilsTest.testEndPointsV1(), but for the public API.
global_pool = True
num_classes = 10
inputs = create_test_input(2, 224, 224, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
expected = ['resnet/conv1']
for block in range(1, 5):
for unit in range(1, 4 if block < 4 else 3):
for conv in range(1, 4):
expected.append('resnet/block%d/unit_%d/bottleneck_v1/conv%d' %
(block, unit, conv))
expected.append('resnet/block%d/unit_%d/bottleneck_v1' % (block, unit))
expected.append('resnet/block%d/unit_1/bottleneck_v1/shortcut' % block)
expected.append('resnet/block%d' % block)
expected.extend(['global_pool', 'resnet/logits', 'resnet/spatial_squeeze',
'predictions'])
self.assertItemsEqual(end_points.keys(), expected)
def testClassificationShapes(self): def testClassificationShapes(self):
global_pool = True global_pool = True
......
...@@ -39,7 +39,7 @@ ResNet-101 for image classification into 1000 classes: ...@@ -39,7 +39,7 @@ ResNet-101 for image classification into 1000 classes:
ResNet-101 for semantic segmentation into 21 classes: ResNet-101 for semantic segmentation into 21 classes:
# inputs has shape [batch, 513, 513, 3] # inputs has shape [batch, 513, 513, 3]
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)): with slim.arg_scope(resnet_v2.resnet_arg_scope()):
net, end_points = resnet_v2.resnet_v2_101(inputs, net, end_points = resnet_v2.resnet_v2_101(inputs,
21, 21,
is_training=False, is_training=False,
...@@ -104,7 +104,7 @@ def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, ...@@ -104,7 +104,7 @@ def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
output = shortcut + residual output = shortcut + residual
return slim.utils.collect_named_outputs(outputs_collections, return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope, sc.name,
output) output)
...@@ -145,9 +145,9 @@ def resnet_v2(inputs, ...@@ -145,9 +145,9 @@ def resnet_v2(inputs,
inputs: A tensor of size [batch, height_in, width_in, channels]. inputs: A tensor of size [batch, height_in, width_in, channels].
blocks: A list of length equal to the number of ResNet blocks. Each element blocks: A list of length equal to the number of ResNet blocks. Each element
is a resnet_utils.Block object describing the units in the block. is a resnet_utils.Block object describing the units in the block.
num_classes: Number of predicted classes for classification tasks. If None num_classes: Number of predicted classes for classification tasks.
we return the features before the logit layer. If 0 or None, we return the features before the logit layer.
is_training: whether is training or not. is_training: whether batch_norm layers are in training mode.
global_pool: If True, we perform global average pooling before computing the global_pool: If True, we perform global average pooling before computing the
logits. Set to True for image classification, False for dense prediction. logits. Set to True for image classification, False for dense prediction.
output_stride: If None, then the output will be computed at the nominal output_stride: If None, then the output will be computed at the nominal
...@@ -170,10 +170,10 @@ def resnet_v2(inputs, ...@@ -170,10 +170,10 @@ def resnet_v2(inputs,
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is False, then height_out and width_out are reduced by a If global_pool is False, then height_out and width_out are reduced by a
factor of output_stride compared to the respective height_in and width_in, factor of output_stride compared to the respective height_in and width_in,
else both height_out and width_out equal one. If num_classes is None, then else both height_out and width_out equal one. If num_classes is 0 or None,
net is the output of the last ResNet block, potentially after global then net is the output of the last ResNet block, potentially after global
average pooling. If num_classes is not None, net contains the pre-softmax average pooling. If num_classes is a non-zero integer, net contains the
activations. pre-softmax activations.
end_points: A dictionary from components of the network to the corresponding end_points: A dictionary from components of the network to the corresponding
activation. activation.
...@@ -181,7 +181,7 @@ def resnet_v2(inputs, ...@@ -181,7 +181,7 @@ def resnet_v2(inputs,
ValueError: If the target output_stride is not valid. ValueError: If the target output_stride is not valid.
""" """
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
end_points_collection = sc.name + '_end_points' end_points_collection = sc.original_name_scope + '_end_points'
with slim.arg_scope([slim.conv2d, bottleneck, with slim.arg_scope([slim.conv2d, bottleneck,
resnet_utils.stack_blocks_dense], resnet_utils.stack_blocks_dense],
outputs_collections=end_points_collection): outputs_collections=end_points_collection):
...@@ -204,18 +204,21 @@ def resnet_v2(inputs, ...@@ -204,18 +204,21 @@ def resnet_v2(inputs,
# normalization or activation functions in the residual unit output. See # normalization or activation functions in the residual unit output. See
# Appendix of [2]. # Appendix of [2].
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
# Convert end_points_collection into a dictionary of end_points.
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if global_pool: if global_pool:
# Global average pooling. # Global average pooling.
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
end_points['global_pool'] = net
if num_classes is not None: if num_classes is not None:
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='logits') normalizer_fn=None, scope='logits')
end_points[sc.name + '/logits'] = net
if spatial_squeeze: if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
# Convert end_points_collection into a dictionary of end_points. end_points[sc.name + '/spatial_squeeze'] = net
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if num_classes is not None:
end_points['predictions'] = slim.softmax(net, scope='predictions') end_points['predictions'] = slim.softmax(net, scope='predictions')
return net, end_points return net, end_points
resnet_v2.default_image_size = 224 resnet_v2.default_image_size = 224
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment