Unverified Commit 57014e4c authored by gunan's avatar gunan Committed by GitHub
Browse files

Merge pull request #2627 from sguada/nasnet

Bring tensorflow/models slim up to date.
parents c46caa56 cbb62479
......@@ -285,6 +285,31 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
self.assertTrue('predictions' in end_points)
self.assertListEqual(end_points['predictions'].get_shape().as_list(),
[2, 1, 1, num_classes])
self.assertTrue('global_pool' in end_points)
self.assertListEqual(end_points['global_pool'].get_shape().as_list(),
[2, 1, 1, 32])
def testEndpointNames(self):
# Like ResnetUtilsTest.testEndPointsV2(), but for the public API.
global_pool = True
num_classes = 10
inputs = create_test_input(2, 224, 224, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
expected = ['resnet/conv1']
for block in range(1, 5):
for unit in range(1, 4 if block < 4 else 3):
for conv in range(1, 4):
expected.append('resnet/block%d/unit_%d/bottleneck_v2/conv%d' %
(block, unit, conv))
expected.append('resnet/block%d/unit_%d/bottleneck_v2' % (block, unit))
expected.append('resnet/block%d/unit_1/bottleneck_v2/shortcut' % block)
expected.append('resnet/block%d' % block)
expected.extend(['global_pool', 'resnet/logits', 'resnet/spatial_squeeze',
'predictions'])
self.assertItemsEqual(end_points.keys(), expected)
def testClassificationShapes(self):
global_pool = True
......
......@@ -69,7 +69,8 @@ def vgg_a(inputs,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_a',
fc_conv_padding='VALID'):
fc_conv_padding='VALID',
global_pool=False):
"""Oxford Net VGG 11-Layers version A Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
......@@ -77,7 +78,8 @@ def vgg_a(inputs,
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
num_classes: number of predicted classes. If 0 or None, the logits layer is
omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
......@@ -90,12 +92,17 @@ def vgg_a(inputs,
get a prediction map downsampled by a factor of 32 as an output.
Otherwise, the output prediction map will be (input / 32) - 6 in case of
'VALID' padding.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original VGG architecture.)
Returns:
the last op containing the log predictions and end_points dict.
net: the output of the logits layer (if num_classes is a non-zero integer),
or the input to the logits layer (if num_classes is 0 or None).
end_points: a dict of tensors with intermediate activations.
"""
with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
outputs_collections=end_points_collection):
......@@ -109,21 +116,26 @@ def vgg_a(inputs,
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
if global_pool:
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
if num_classes:
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_a.default_image_size = 224
......@@ -135,7 +147,8 @@ def vgg_16(inputs,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_16',
fc_conv_padding='VALID'):
fc_conv_padding='VALID',
global_pool=False):
"""Oxford Net VGG 16-Layers version D Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
......@@ -143,7 +156,8 @@ def vgg_16(inputs,
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
num_classes: number of predicted classes. If 0 or None, the logits layer is
omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
......@@ -156,12 +170,17 @@ def vgg_16(inputs,
get a prediction map downsampled by a factor of 32 as an output.
Otherwise, the output prediction map will be (input / 32) - 6 in case of
'VALID' padding.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original VGG architecture.)
Returns:
the last op containing the log predictions and end_points dict.
net: the output of the logits layer (if num_classes is a non-zero integer),
or the input to the logits layer (if num_classes is 0 or None).
end_points: a dict of tensors with intermediate activations.
"""
with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection):
......@@ -175,21 +194,26 @@ def vgg_16(inputs,
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
if global_pool:
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
if num_classes:
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
if spatial_squeeze and num_classes is not None:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_16.default_image_size = 224
......@@ -201,7 +225,8 @@ def vgg_19(inputs,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_19',
fc_conv_padding='VALID'):
fc_conv_padding='VALID',
global_pool=False):
"""Oxford Net VGG 19-Layers version E Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
......@@ -209,7 +234,8 @@ def vgg_19(inputs,
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
num_classes: number of predicted classes. If 0 or None, the logits layer is
omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
......@@ -222,13 +248,18 @@ def vgg_19(inputs,
get a prediction map downsampled by a factor of 32 as an output.
Otherwise, the output prediction map will be (input / 32) - 6 in case of
'VALID' padding.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original VGG architecture.)
Returns:
the last op containing the log predictions and end_points dict.
net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0 or
None).
end_points: a dict of tensors with intermediate activations.
"""
with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection):
......@@ -242,21 +273,26 @@ def vgg_19(inputs,
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
if global_pool:
net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
end_points['global_pool'] = net
if num_classes:
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_19.default_image_size = 224
......
......@@ -48,6 +48,18 @@ class VGGATest(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
......@@ -74,6 +86,32 @@ class VGGATest(tf.test.TestCase):
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 224, 224
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = vgg.vgg_a(inputs, num_classes)
expected_names = ['vgg_a/conv1/conv1_1',
'vgg_a/pool1',
'vgg_a/conv2/conv2_1',
'vgg_a/pool2',
'vgg_a/conv3/conv3_1',
'vgg_a/conv3/conv3_2',
'vgg_a/pool3',
'vgg_a/conv4/conv4_1',
'vgg_a/conv4/conv4_2',
'vgg_a/pool4',
'vgg_a/conv5/conv5_1',
'vgg_a/conv5/conv5_2',
'vgg_a/pool5',
'vgg_a/fc6',
'vgg_a/fc7',
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('vgg_a/fc7'))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
......@@ -177,6 +215,18 @@ class VGG16Test(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
......@@ -208,6 +258,37 @@ class VGG16Test(tf.test.TestCase):
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 224, 224
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = vgg.vgg_16(inputs, num_classes)
expected_names = ['vgg_16/conv1/conv1_1',
'vgg_16/conv1/conv1_2',
'vgg_16/pool1',
'vgg_16/conv2/conv2_1',
'vgg_16/conv2/conv2_2',
'vgg_16/pool2',
'vgg_16/conv3/conv3_1',
'vgg_16/conv3/conv3_2',
'vgg_16/conv3/conv3_3',
'vgg_16/pool3',
'vgg_16/conv4/conv4_1',
'vgg_16/conv4/conv4_2',
'vgg_16/conv4/conv4_3',
'vgg_16/pool4',
'vgg_16/conv5/conv5_1',
'vgg_16/conv5/conv5_2',
'vgg_16/conv5/conv5_3',
'vgg_16/pool5',
'vgg_16/fc6',
'vgg_16/fc7',
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('vgg_16/fc7'))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
......@@ -321,6 +402,18 @@ class VGG19Test(tf.test.TestCase):
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
......@@ -356,6 +449,41 @@ class VGG19Test(tf.test.TestCase):
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 224, 224
num_classes = None
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
net, end_points = vgg.vgg_19(inputs, num_classes)
expected_names = [
'vgg_19/conv1/conv1_1',
'vgg_19/conv1/conv1_2',
'vgg_19/pool1',
'vgg_19/conv2/conv2_1',
'vgg_19/conv2/conv2_2',
'vgg_19/pool2',
'vgg_19/conv3/conv3_1',
'vgg_19/conv3/conv3_2',
'vgg_19/conv3/conv3_3',
'vgg_19/conv3/conv3_4',
'vgg_19/pool3',
'vgg_19/conv4/conv4_1',
'vgg_19/conv4/conv4_2',
'vgg_19/conv4/conv4_3',
'vgg_19/conv4/conv4_4',
'vgg_19/pool4',
'vgg_19/conv5/conv5_1',
'vgg_19/conv5/conv5_2',
'vgg_19/conv5/conv5_3',
'vgg_19/conv5/conv5_4',
'vgg_19/pool5',
'vgg_19/fc6',
'vgg_19/fc7',
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('vgg_19/fc7'))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
......
......@@ -30,7 +30,8 @@ slim = tf.contrib.slim
def preprocess_for_train(image,
output_height,
output_width,
padding=_PADDING):
padding=_PADDING,
add_image_summaries=True):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
......@@ -41,11 +42,13 @@ def preprocess_for_train(image,
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
padding: The amound of padding before and after each dimension of the image.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
tf.summary.image('image', tf.expand_dims(image, 0))
if add_image_summaries:
tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
......@@ -58,7 +61,8 @@ def preprocess_for_train(image,
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
if add_image_summaries:
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
# Because these operations are not commutative, consider randomizing
# the order their operation.
......@@ -70,18 +74,21 @@ def preprocess_for_train(image,
return tf.image.per_image_standardization(distorted_image)
def preprocess_for_eval(image, output_height, output_width):
def preprocess_for_eval(image, output_height, output_width,
add_image_summaries=True):
"""Preprocesses the given image for evaluation.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
tf.summary.image('image', tf.expand_dims(image, 0))
if add_image_summaries:
tf.summary.image('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
......@@ -89,13 +96,15 @@ def preprocess_for_eval(image, output_height, output_width):
resized_image = tf.image.resize_image_with_crop_or_pad(image,
output_width,
output_height)
tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))
if add_image_summaries:
tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))
# Subtract off the mean and divide by the variance of the pixels.
return tf.image.per_image_standardization(resized_image)
def preprocess_image(image, output_height, output_width, is_training=False):
def preprocess_image(image, output_height, output_width, is_training=False,
add_image_summaries=True):
"""Preprocesses the given image.
Args:
......@@ -104,11 +113,16 @@ def preprocess_image(image, output_height, output_width, is_training=False):
output_width: The width of the image after preprocessing.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
add_image_summaries: Enable image summaries.
Returns:
A preprocessed image.
"""
if is_training:
return preprocess_for_train(image, output_height, output_width)
return preprocess_for_train(
image, output_height, output_width,
add_image_summaries=add_image_summaries)
else:
return preprocess_for_eval(image, output_height, output_width)
return preprocess_for_eval(
image, output_height, output_width,
add_image_summaries=add_image_summaries)
......@@ -54,6 +54,8 @@ def get_preprocessing(name, is_training=False):
'inception_resnet_v2': inception_preprocessing,
'lenet': lenet_preprocessing,
'mobilenet_v1': inception_preprocessing,
'nasnet_mobile': inception_preprocessing,
'nasnet_large': inception_preprocessing,
'resnet_v1_50': vgg_preprocessing,
'resnet_v1_101': vgg_preprocessing,
'resnet_v1_152': vgg_preprocessing,
......
#!/bin/bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script prepares the various different versions of MobileNet models for
# use in a mobile application. If you don't specify your own trained checkpoint
# file, it will download pretrained checkpoints for ImageNet. You'll also need
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment