Commit ba5c96c2 authored by Sergio Guadarrama's avatar Sergio Guadarrama Committed by GitHub
Browse files

Merge pull request #366 from nathansilberman/master

Full code refactor and added all networks
parents bc0a0a86 65fad62d
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for the preactivation form of Residual Networks.
Residual networks (ResNets) were originally proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
The full preactivation 'v2' ResNet variant implemented in this module was
introduced by:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
The key difference of the full preactivation 'v2' variant compared to the
'v1' variant in [1] is the use of batch normalization before every weight layer.
Another difference is that 'v2' ResNets do not include an activation function in
the main pathway. Also see [2; Fig. 4e].
Typical use:
from tensorflow.contrib.slim.nets import resnet_v2
ResNet-101 for image classification into 1000 classes:
# inputs has shape [batch, 224, 224, 3]
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)
ResNet-101 for semantic segmentation into 21 classes:
# inputs has shape [batch, 513, 513, 3]
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)):
net, end_points = resnet_v2.resnet_v2_101(inputs,
21,
is_training=False,
global_pool=False,
output_stride=16)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import resnet_utils
slim = tf.contrib.slim
resnet_arg_scope = resnet_utils.resnet_arg_scope
@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
outputs_collections=None, scope=None):
"""Bottleneck residual unit variant with BN before convolutions.
This is the full preactivation residual unit variant proposed in [2]. See
Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
variant which has an extra bottleneck layer.
When putting together two consecutive ResNet blocks that use this unit, one
should use stride = 2 in the last unit of the first block.
Args:
inputs: A tensor of size [batch, height, width, channels].
depth: The depth of the ResNet unit output.
depth_bottleneck: The depth of the bottleneck layers.
stride: The ResNet unit's stride. Determines the amount of downsampling of
the units output compared to its input.
rate: An integer, rate for atrous convolution.
outputs_collections: Collection to add the ResNet unit output.
scope: Optional variable_scope.
Returns:
The ResNet unit's output.
"""
with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
if depth == depth_in:
shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
else:
shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
normalizer_fn=None, activation_fn=None,
scope='shortcut')
residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
scope='conv1')
residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
rate=rate, scope='conv2')
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
normalizer_fn=None, activation_fn=None,
scope='conv3')
output = shortcut + residual
return slim.utils.collect_named_outputs(outputs_collections,
sc.original_name_scope,
output)
def resnet_v2(inputs,
blocks,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
include_root_block=True,
reuse=None,
scope=None):
"""Generator for v2 (preactivation) ResNet models.
This function generates a family of ResNet v2 models. See the resnet_v2_*()
methods for specific model instantiations, obtained by selecting different
block instantiations that produce ResNets of various depths.
Training for image classification on Imagenet is usually done with [224, 224]
inputs, resulting in [7, 7] feature maps at the output of the last ResNet
block for the ResNets defined in [1] that have nominal stride equal to 32.
However, for dense prediction tasks we advise that one uses inputs with
spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
this case the feature maps at the ResNet output will have spatial shape
[(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
and corners exactly aligned with the input image corners, which greatly
facilitates alignment of the features to the image. Using as input [225, 225]
images results in [8, 8] feature maps at the output of the last ResNet block.
For dense prediction tasks, the ResNet needs to run in fully-convolutional
(FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
have nominal stride equal to 32 and a good choice in FCN mode is to use
output_stride=16 in order to increase the density of the computed features at
small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
blocks: A list of length equal to the number of ResNet blocks. Each element
is a resnet_utils.Block object describing the units in the block.
num_classes: Number of predicted classes for classification tasks. If None
we return the features before the logit layer.
is_training: whether is training or not.
global_pool: If True, we perform global average pooling before computing the
logits. Set to True for image classification, False for dense prediction.
output_stride: If None, then the output will be computed at the nominal
network stride. If output_stride is not None, it specifies the requested
ratio of input to output spatial resolution.
include_root_block: If True, include the initial convolution followed by
max-pooling, if False excludes it. If excluded, `inputs` should be the
results of an activation-less convolution.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
Returns:
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is False, then height_out and width_out are reduced by a
factor of output_stride compared to the respective height_in and width_in,
else both height_out and width_out equal one. If num_classes is None, then
net is the output of the last ResNet block, potentially after global
average pooling. If num_classes is not None, net contains the pre-softmax
activations.
end_points: A dictionary from components of the network to the corresponding
activation.
Raises:
ValueError: If the target output_stride is not valid.
"""
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
end_points_collection = sc.name + '_end_points'
with slim.arg_scope([slim.conv2d, bottleneck,
resnet_utils.stack_blocks_dense],
outputs_collections=end_points_collection):
with slim.arg_scope([slim.batch_norm], is_training=is_training):
net = inputs
if include_root_block:
if output_stride is not None:
if output_stride % 4 != 0:
raise ValueError('The output_stride needs to be a multiple of 4.')
output_stride /= 4
# We do not include batch normalization or activation functions in
# conv1 because the first ResNet unit will perform these. Cf.
# Appendix of [2].
with slim.arg_scope([slim.conv2d],
activation_fn=None, normalizer_fn=None):
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
# This is needed because the pre-activation variant does not have batch
# normalization or activation functions in the residual unit output. See
# Appendix of [2].
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
if global_pool:
# Global average pooling.
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
if num_classes is not None:
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='logits')
# Convert end_points_collection into a dictionary of end_points.
end_points = dict(tf.get_collection(end_points_collection))
if num_classes is not None:
end_points['predictions'] = slim.softmax(net, scope='predictions')
return net, end_points
resnet_v2.default_image_size = 224
def resnet_v2_50(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
reuse=None,
scope='resnet_v2_50'):
"""ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
resnet_utils.Block(
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block(
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
resnet_utils.Block(
'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
resnet_utils.Block(
'block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_101(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
reuse=None,
scope='resnet_v2_101'):
"""ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
resnet_utils.Block(
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block(
'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
resnet_utils.Block(
'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
resnet_utils.Block(
'block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_152(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
reuse=None,
scope='resnet_v2_152'):
"""ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
resnet_utils.Block(
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block(
'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
resnet_utils.Block(
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
resnet_utils.Block(
'block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_200(inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
reuse=None,
scope='resnet_v2_200'):
"""ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
blocks = [
resnet_utils.Block(
'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
resnet_utils.Block(
'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
resnet_utils.Block(
'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
resnet_utils.Block(
'block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
global_pool=global_pool, output_stride=output_stride,
include_root_block=True, reuse=reuse, scope=scope)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.resnet_v2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from nets import resnet_utils
from nets import resnet_v2
slim = tf.contrib.slim
def create_test_input(batch_size, height, width, channels):
"""Create test input tensor.
Args:
batch_size: The number of images per batch or `None` if unknown.
height: The height of each image or `None` if unknown.
width: The width of each image or `None` if unknown.
channels: The number of channels per image or `None` if unknown.
Returns:
Either a placeholder `Tensor` of dimension
[batch_size, height, width, channels] if any of the inputs are `None` or a
constant `Tensor` with the mesh grid values along the spatial dimensions.
"""
if None in [batch_size, height, width, channels]:
return tf.placeholder(tf.float32, (batch_size, height, width, channels))
else:
return tf.to_float(
np.tile(
np.reshape(
np.reshape(np.arange(height), [height, 1]) +
np.reshape(np.arange(width), [1, width]),
[1, height, width, 1]),
[batch_size, 1, 1, channels]))
class ResnetUtilsTest(tf.test.TestCase):
def testSubsampleThreeByThree(self):
x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
x = resnet_utils.subsample(x, 2)
expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
with self.test_session():
self.assertAllClose(x.eval(), expected.eval())
def testSubsampleFourByFour(self):
x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
x = resnet_utils.subsample(x, 2)
expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
with self.test_session():
self.assertAllClose(x.eval(), expected.eval())
def testConv2DSameEven(self):
n, n2 = 4, 2
# Input image.
x = create_test_input(1, n, n, 1)
# Convolution kernel.
w = create_test_input(1, 3, 3, 1)
w = tf.reshape(w, [3, 3, 1, 1])
tf.get_variable('Conv/weights', initializer=w)
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
tf.get_variable_scope().reuse_variables()
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
y1_expected = tf.to_float([[14, 28, 43, 26],
[28, 48, 66, 37],
[43, 66, 84, 46],
[26, 37, 46, 22]])
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
y2 = resnet_utils.subsample(y1, 2)
y2_expected = tf.to_float([[14, 43],
[43, 84]])
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
y3_expected = y2_expected
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
y4_expected = tf.to_float([[48, 37],
[37, 22]])
y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])
with self.test_session() as sess:
sess.run(tf.initialize_all_variables())
self.assertAllClose(y1.eval(), y1_expected.eval())
self.assertAllClose(y2.eval(), y2_expected.eval())
self.assertAllClose(y3.eval(), y3_expected.eval())
self.assertAllClose(y4.eval(), y4_expected.eval())
def testConv2DSameOdd(self):
n, n2 = 5, 3
# Input image.
x = create_test_input(1, n, n, 1)
# Convolution kernel.
w = create_test_input(1, 3, 3, 1)
w = tf.reshape(w, [3, 3, 1, 1])
tf.get_variable('Conv/weights', initializer=w)
tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
tf.get_variable_scope().reuse_variables()
y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
y1_expected = tf.to_float([[14, 28, 43, 58, 34],
[28, 48, 66, 84, 46],
[43, 66, 84, 102, 55],
[58, 84, 102, 120, 64],
[34, 46, 55, 64, 30]])
y1_expected = tf.reshape(y1_expected, [1, n, n, 1])
y2 = resnet_utils.subsample(y1, 2)
y2_expected = tf.to_float([[14, 43, 34],
[43, 84, 55],
[34, 55, 30]])
y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])
y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
y3_expected = y2_expected
y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
y4_expected = y2_expected
with self.test_session() as sess:
sess.run(tf.initialize_all_variables())
self.assertAllClose(y1.eval(), y1_expected.eval())
self.assertAllClose(y2.eval(), y2_expected.eval())
self.assertAllClose(y3.eval(), y3_expected.eval())
self.assertAllClose(y4.eval(), y4_expected.eval())
def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
"""A plain ResNet without extra layers before or after the ResNet blocks."""
with tf.variable_scope(scope, values=[inputs]):
with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
end_points = dict(tf.get_collection('end_points'))
return net, end_points
def testEndPointsV2(self):
"""Test the end points of a tiny v2 bottleneck network."""
bottleneck = resnet_v2.bottleneck
blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
inputs = create_test_input(2, 32, 16, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
expected = [
'tiny/block1/unit_1/bottleneck_v2/shortcut',
'tiny/block1/unit_1/bottleneck_v2/conv1',
'tiny/block1/unit_1/bottleneck_v2/conv2',
'tiny/block1/unit_1/bottleneck_v2/conv3',
'tiny/block1/unit_2/bottleneck_v2/conv1',
'tiny/block1/unit_2/bottleneck_v2/conv2',
'tiny/block1/unit_2/bottleneck_v2/conv3',
'tiny/block2/unit_1/bottleneck_v2/shortcut',
'tiny/block2/unit_1/bottleneck_v2/conv1',
'tiny/block2/unit_1/bottleneck_v2/conv2',
'tiny/block2/unit_1/bottleneck_v2/conv3',
'tiny/block2/unit_2/bottleneck_v2/conv1',
'tiny/block2/unit_2/bottleneck_v2/conv2',
'tiny/block2/unit_2/bottleneck_v2/conv3']
self.assertItemsEqual(expected, end_points)
def _stack_blocks_nondense(self, net, blocks):
"""A simplified ResNet Block stacker without output stride control."""
for block in blocks:
with tf.variable_scope(block.scope, 'block', [net]):
for i, unit in enumerate(block.args):
depth, depth_bottleneck, stride = unit
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
net = block.unit_fn(net,
depth=depth,
depth_bottleneck=depth_bottleneck,
stride=stride,
rate=1)
return net
def _atrousValues(self, bottleneck):
"""Verify the values of dense feature extraction by atrous convolution.
Make sure that dense feature extraction by stack_blocks_dense() followed by
subsampling gives identical results to feature extraction at the nominal
network output stride using the simple self._stack_blocks_nondense() above.
Args:
bottleneck: The bottleneck function.
"""
blocks = [
resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
]
nominal_stride = 8
# Test both odd and even input dimensions.
height = 30
width = 31
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
with slim.arg_scope([slim.batch_norm], is_training=False):
for output_stride in [1, 2, 4, 8, None]:
with tf.Graph().as_default():
with self.test_session() as sess:
tf.set_random_seed(0)
inputs = create_test_input(1, height, width, 3)
# Dense feature extraction followed by subsampling.
output = resnet_utils.stack_blocks_dense(inputs,
blocks,
output_stride)
if output_stride is None:
factor = 1
else:
factor = nominal_stride // output_stride
output = resnet_utils.subsample(output, factor)
# Make the two networks use the same weights.
tf.get_variable_scope().reuse_variables()
# Feature extraction at the nominal network rate.
expected = self._stack_blocks_nondense(inputs, blocks)
sess.run(tf.initialize_all_variables())
output, expected = sess.run([output, expected])
self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)
def testAtrousValuesBottleneck(self):
self._atrousValues(resnet_v2.bottleneck)
class ResnetCompleteNetworkTest(tf.test.TestCase):
"""Tests with complete small ResNet v2 networks."""
def _resnet_small(self,
inputs,
num_classes=None,
is_training=True,
global_pool=True,
output_stride=None,
include_root_block=True,
reuse=None,
scope='resnet_v2_small'):
"""A shallow and thin ResNet v2 for faster tests."""
bottleneck = resnet_v2.bottleneck
blocks = [
resnet_utils.Block(
'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
resnet_utils.Block(
'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
resnet_utils.Block(
'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
resnet_utils.Block(
'block4', bottleneck, [(32, 8, 1)] * 2)]
return resnet_v2.resnet_v2(inputs, blocks, num_classes,
is_training=is_training,
global_pool=global_pool,
output_stride=output_stride,
include_root_block=include_root_block,
reuse=reuse,
scope=scope)
def testClassificationEndPoints(self):
global_pool = True
num_classes = 10
inputs = create_test_input(2, 224, 224, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
logits, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
self.assertTrue(logits.op.name.startswith('resnet/logits'))
self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
self.assertTrue('predictions' in end_points)
self.assertListEqual(end_points['predictions'].get_shape().as_list(),
[2, 1, 1, num_classes])
def testClassificationShapes(self):
global_pool = True
num_classes = 10
inputs = create_test_input(2, 224, 224, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
endpoint_to_shape = {
'resnet/block1': [2, 28, 28, 4],
'resnet/block2': [2, 14, 14, 8],
'resnet/block3': [2, 7, 7, 16],
'resnet/block4': [2, 7, 7, 32]}
for endpoint in endpoint_to_shape:
shape = endpoint_to_shape[endpoint]
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
def testFullyConvolutionalEndpointShapes(self):
global_pool = False
num_classes = 10
inputs = create_test_input(2, 321, 321, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
endpoint_to_shape = {
'resnet/block1': [2, 41, 41, 4],
'resnet/block2': [2, 21, 21, 8],
'resnet/block3': [2, 11, 11, 16],
'resnet/block4': [2, 11, 11, 32]}
for endpoint in endpoint_to_shape:
shape = endpoint_to_shape[endpoint]
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
def testRootlessFullyConvolutionalEndpointShapes(self):
global_pool = False
num_classes = 10
inputs = create_test_input(2, 128, 128, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
include_root_block=False,
scope='resnet')
endpoint_to_shape = {
'resnet/block1': [2, 64, 64, 4],
'resnet/block2': [2, 32, 32, 8],
'resnet/block3': [2, 16, 16, 16],
'resnet/block4': [2, 16, 16, 32]}
for endpoint in endpoint_to_shape:
shape = endpoint_to_shape[endpoint]
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
def testAtrousFullyConvolutionalEndpointShapes(self):
global_pool = False
num_classes = 10
output_stride = 8
inputs = create_test_input(2, 321, 321, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
_, end_points = self._resnet_small(inputs,
num_classes,
global_pool=global_pool,
output_stride=output_stride,
scope='resnet')
endpoint_to_shape = {
'resnet/block1': [2, 41, 41, 4],
'resnet/block2': [2, 41, 41, 8],
'resnet/block3': [2, 41, 41, 16],
'resnet/block4': [2, 41, 41, 32]}
for endpoint in endpoint_to_shape:
shape = endpoint_to_shape[endpoint]
self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)
def testAtrousFullyConvolutionalValues(self):
"""Verify dense feature extraction with atrous convolution."""
nominal_stride = 32
for output_stride in [4, 8, 16, 32, None]:
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
with tf.Graph().as_default():
with self.test_session() as sess:
tf.set_random_seed(0)
inputs = create_test_input(2, 81, 81, 3)
# Dense feature extraction followed by subsampling.
output, _ = self._resnet_small(inputs, None,
is_training=False,
global_pool=False,
output_stride=output_stride)
if output_stride is None:
factor = 1
else:
factor = nominal_stride // output_stride
output = resnet_utils.subsample(output, factor)
# Make the two networks use the same weights.
tf.get_variable_scope().reuse_variables()
# Feature extraction at the nominal network rate.
expected, _ = self._resnet_small(inputs, None,
is_training=False,
global_pool=False)
sess.run(tf.initialize_all_variables())
self.assertAllClose(output.eval(), expected.eval(),
atol=1e-4, rtol=1e-4)
def testUnknownBatchSize(self):
batch = 2
height, width = 65, 65
global_pool = True
num_classes = 10
inputs = create_test_input(None, height, width, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
logits, _ = self._resnet_small(inputs, num_classes,
global_pool=global_pool,
scope='resnet')
self.assertTrue(logits.op.name.startswith('resnet/logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, 1, 1, num_classes])
images = create_test_input(batch, height, width, 3)
with self.test_session() as sess:
sess.run(tf.initialize_all_variables())
output = sess.run(logits, {inputs: images.eval()})
self.assertEqual(output.shape, (batch, 1, 1, num_classes))
def testFullyConvolutionalUnknownHeightWidth(self):
batch = 2
height, width = 65, 65
global_pool = False
inputs = create_test_input(batch, None, None, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
output, _ = self._resnet_small(inputs, None,
global_pool=global_pool)
self.assertListEqual(output.get_shape().as_list(),
[batch, None, None, 32])
images = create_test_input(batch, height, width, 3)
with self.test_session() as sess:
sess.run(tf.initialize_all_variables())
output = sess.run(output, {inputs: images.eval()})
self.assertEqual(output.shape, (batch, 3, 3, 32))
def testAtrousFullyConvolutionalUnknownHeightWidth(self):
batch = 2
height, width = 65, 65
global_pool = False
output_stride = 8
inputs = create_test_input(batch, None, None, 3)
with slim.arg_scope(resnet_utils.resnet_arg_scope()):
output, _ = self._resnet_small(inputs,
None,
global_pool=global_pool,
output_stride=output_stride)
self.assertListEqual(output.get_shape().as_list(),
[batch, None, None, 32])
images = create_test_input(batch, height, width, 3)
with self.test_session() as sess:
sess.run(tf.initialize_all_variables())
output = sess.run(output, {inputs: images.eval()})
self.assertEqual(output.shape, (batch, 9, 9, 32))
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains model definitions for versions of the Oxford VGG network.
These model definitions were introduced in the following technical report:
Very Deep Convolutional Networks For Large-Scale Image Recognition
Karen Simonyan and Andrew Zisserman
arXiv technical report, 2015
PDF: http://arxiv.org/pdf/1409.1556.pdf
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
CC-BY-4.0
More information can be obtained from the VGG website:
www.robots.ox.ac.uk/~vgg/research/very_deep/
Usage:
with slim.arg_scope(vgg.vgg_arg_scope()):
outputs, end_points = vgg.vgg_a(inputs)
with slim.arg_scope(vgg.vgg_arg_scope()):
outputs, end_points = vgg.vgg_16(inputs)
@@vgg_a
@@vgg_16
@@vgg_19
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
slim = tf.contrib.slim
def vgg_arg_scope(weight_decay=0.0005):
"""Defines the VGG arg scope.
Args:
weight_decay: The l2 regularization coefficient.
Returns:
An arg_scope.
"""
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(weight_decay),
biases_initializer=tf.zeros_initializer):
with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
return arg_sc
def vgg_a(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_a'):
"""Oxford Net VGG 11-Layers version A Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
outputs. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables.
Returns:
the last op containing the log predictions and end_points dict.
"""
with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
outputs_collections=end_points_collection):
net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = dict(tf.get_collection(end_points_collection))
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_a.default_image_size = 224
def vgg_16(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_16'):
"""Oxford Net VGG 16-Layers version D Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
outputs. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables.
Returns:
the last op containing the log predictions and end_points dict.
"""
with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection):
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = dict(tf.get_collection(end_points_collection))
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_16.default_image_size = 224
def vgg_19(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='vgg_19'):
"""Oxford Net VGG 19-Layers version E Example.
Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
outputs. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables.
Returns:
the last op containing the log predictions and end_points dict.
"""
with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=end_points_collection):
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
net = slim.max_pool2d(net, [2, 2], scope='pool1')
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
net = slim.max_pool2d(net, [2, 2], scope='pool2')
net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
net = slim.max_pool2d(net, [2, 2], scope='pool3')
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
net = slim.max_pool2d(net, [2, 2], scope='pool4')
net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [2, 2], scope='pool5')
# Use conv2d instead of fully_connected layers.
net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='fc8')
# Convert end_points_collection into a end_point dict.
end_points = dict(tf.get_collection(end_points_collection))
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
vgg_19.default_image_size = 224
# Alias
vgg_d = vgg_16
vgg_e = vgg_19
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.vgg."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import vgg
slim = tf.contrib.slim
class VGGATest(tf.test.TestCase):
def testBuild(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_a(inputs, num_classes)
self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testFullyConvolutional(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False)
self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
_, end_points = vgg.vgg_a(inputs, num_classes)
expected_names = ['vgg_a/conv1/conv1_1',
'vgg_a/pool1',
'vgg_a/conv2/conv2_1',
'vgg_a/pool2',
'vgg_a/conv3/conv3_1',
'vgg_a/conv3/conv3_2',
'vgg_a/pool3',
'vgg_a/conv4/conv4_1',
'vgg_a/conv4/conv4_2',
'vgg_a/pool4',
'vgg_a/conv5/conv5_1',
'vgg_a/conv5/conv5_2',
'vgg_a/pool5',
'vgg_a/fc6',
'vgg_a/fc7',
'vgg_a/fc8'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
vgg.vgg_a(inputs, num_classes)
expected_names = ['vgg_a/conv1/conv1_1/weights',
'vgg_a/conv1/conv1_1/biases',
'vgg_a/conv2/conv2_1/weights',
'vgg_a/conv2/conv2_1/biases',
'vgg_a/conv3/conv3_1/weights',
'vgg_a/conv3/conv3_1/biases',
'vgg_a/conv3/conv3_2/weights',
'vgg_a/conv3/conv3_2/biases',
'vgg_a/conv4/conv4_1/weights',
'vgg_a/conv4/conv4_1/biases',
'vgg_a/conv4/conv4_2/weights',
'vgg_a/conv4/conv4_2/biases',
'vgg_a/conv5/conv5_1/weights',
'vgg_a/conv5/conv5_1/biases',
'vgg_a/conv5/conv5_2/weights',
'vgg_a/conv5/conv5_2/biases',
'vgg_a/fc6/weights',
'vgg_a/fc6/biases',
'vgg_a/fc7/weights',
'vgg_a/fc7/biases',
'vgg_a/fc8/weights',
'vgg_a/fc8/biases',
]
model_variables = [v.op.name for v in slim.get_model_variables()]
self.assertSetEqual(set(model_variables), set(expected_names))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session():
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_a(eval_inputs, is_training=False)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
predictions = tf.argmax(logits, 1)
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def testTrainEvalWithReuse(self):
train_batch_size = 2
eval_batch_size = 1
train_height, train_width = 224, 224
eval_height, eval_width = 256, 256
num_classes = 1000
with self.test_session():
train_inputs = tf.random_uniform(
(train_batch_size, train_height, train_width, 3))
logits, _ = vgg.vgg_a(train_inputs)
self.assertListEqual(logits.get_shape().as_list(),
[train_batch_size, num_classes])
tf.get_variable_scope().reuse_variables()
eval_inputs = tf.random_uniform(
(eval_batch_size, eval_height, eval_width, 3))
logits, _ = vgg.vgg_a(eval_inputs, is_training=False,
spatial_squeeze=False)
self.assertListEqual(logits.get_shape().as_list(),
[eval_batch_size, 2, 2, num_classes])
logits = tf.reduce_mean(logits, [1, 2])
predictions = tf.argmax(logits, 1)
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def testForward(self):
batch_size = 1
height, width = 224, 224
with self.test_session() as sess:
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_a(inputs)
sess.run(tf.initialize_all_variables())
output = sess.run(logits)
self.assertTrue(output.any())
class VGG16Test(tf.test.TestCase):
def testBuild(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_16(inputs, num_classes)
self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testFullyConvolutional(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
_, end_points = vgg.vgg_16(inputs, num_classes)
expected_names = ['vgg_16/conv1/conv1_1',
'vgg_16/conv1/conv1_2',
'vgg_16/pool1',
'vgg_16/conv2/conv2_1',
'vgg_16/conv2/conv2_2',
'vgg_16/pool2',
'vgg_16/conv3/conv3_1',
'vgg_16/conv3/conv3_2',
'vgg_16/conv3/conv3_3',
'vgg_16/pool3',
'vgg_16/conv4/conv4_1',
'vgg_16/conv4/conv4_2',
'vgg_16/conv4/conv4_3',
'vgg_16/pool4',
'vgg_16/conv5/conv5_1',
'vgg_16/conv5/conv5_2',
'vgg_16/conv5/conv5_3',
'vgg_16/pool5',
'vgg_16/fc6',
'vgg_16/fc7',
'vgg_16/fc8'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
vgg.vgg_16(inputs, num_classes)
expected_names = ['vgg_16/conv1/conv1_1/weights',
'vgg_16/conv1/conv1_1/biases',
'vgg_16/conv1/conv1_2/weights',
'vgg_16/conv1/conv1_2/biases',
'vgg_16/conv2/conv2_1/weights',
'vgg_16/conv2/conv2_1/biases',
'vgg_16/conv2/conv2_2/weights',
'vgg_16/conv2/conv2_2/biases',
'vgg_16/conv3/conv3_1/weights',
'vgg_16/conv3/conv3_1/biases',
'vgg_16/conv3/conv3_2/weights',
'vgg_16/conv3/conv3_2/biases',
'vgg_16/conv3/conv3_3/weights',
'vgg_16/conv3/conv3_3/biases',
'vgg_16/conv4/conv4_1/weights',
'vgg_16/conv4/conv4_1/biases',
'vgg_16/conv4/conv4_2/weights',
'vgg_16/conv4/conv4_2/biases',
'vgg_16/conv4/conv4_3/weights',
'vgg_16/conv4/conv4_3/biases',
'vgg_16/conv5/conv5_1/weights',
'vgg_16/conv5/conv5_1/biases',
'vgg_16/conv5/conv5_2/weights',
'vgg_16/conv5/conv5_2/biases',
'vgg_16/conv5/conv5_3/weights',
'vgg_16/conv5/conv5_3/biases',
'vgg_16/fc6/weights',
'vgg_16/fc6/biases',
'vgg_16/fc7/weights',
'vgg_16/fc7/biases',
'vgg_16/fc8/weights',
'vgg_16/fc8/biases',
]
model_variables = [v.op.name for v in slim.get_model_variables()]
self.assertSetEqual(set(model_variables), set(expected_names))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session():
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
predictions = tf.argmax(logits, 1)
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def testTrainEvalWithReuse(self):
train_batch_size = 2
eval_batch_size = 1
train_height, train_width = 224, 224
eval_height, eval_width = 256, 256
num_classes = 1000
with self.test_session():
train_inputs = tf.random_uniform(
(train_batch_size, train_height, train_width, 3))
logits, _ = vgg.vgg_16(train_inputs)
self.assertListEqual(logits.get_shape().as_list(),
[train_batch_size, num_classes])
tf.get_variable_scope().reuse_variables()
eval_inputs = tf.random_uniform(
(eval_batch_size, eval_height, eval_width, 3))
logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
spatial_squeeze=False)
self.assertListEqual(logits.get_shape().as_list(),
[eval_batch_size, 2, 2, num_classes])
logits = tf.reduce_mean(logits, [1, 2])
predictions = tf.argmax(logits, 1)
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def testForward(self):
batch_size = 1
height, width = 224, 224
with self.test_session() as sess:
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_16(inputs)
sess.run(tf.initialize_all_variables())
output = sess.run(logits)
self.assertTrue(output.any())
class VGG19Test(tf.test.TestCase):
def testBuild(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_19(inputs, num_classes)
self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testFullyConvolutional(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False)
self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 2, 2, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
_, end_points = vgg.vgg_19(inputs, num_classes)
expected_names = [
'vgg_19/conv1/conv1_1',
'vgg_19/conv1/conv1_2',
'vgg_19/pool1',
'vgg_19/conv2/conv2_1',
'vgg_19/conv2/conv2_2',
'vgg_19/pool2',
'vgg_19/conv3/conv3_1',
'vgg_19/conv3/conv3_2',
'vgg_19/conv3/conv3_3',
'vgg_19/conv3/conv3_4',
'vgg_19/pool3',
'vgg_19/conv4/conv4_1',
'vgg_19/conv4/conv4_2',
'vgg_19/conv4/conv4_3',
'vgg_19/conv4/conv4_4',
'vgg_19/pool4',
'vgg_19/conv5/conv5_1',
'vgg_19/conv5/conv5_2',
'vgg_19/conv5/conv5_3',
'vgg_19/conv5/conv5_4',
'vgg_19/pool5',
'vgg_19/fc6',
'vgg_19/fc7',
'vgg_19/fc8'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
vgg.vgg_19(inputs, num_classes)
expected_names = [
'vgg_19/conv1/conv1_1/weights',
'vgg_19/conv1/conv1_1/biases',
'vgg_19/conv1/conv1_2/weights',
'vgg_19/conv1/conv1_2/biases',
'vgg_19/conv2/conv2_1/weights',
'vgg_19/conv2/conv2_1/biases',
'vgg_19/conv2/conv2_2/weights',
'vgg_19/conv2/conv2_2/biases',
'vgg_19/conv3/conv3_1/weights',
'vgg_19/conv3/conv3_1/biases',
'vgg_19/conv3/conv3_2/weights',
'vgg_19/conv3/conv3_2/biases',
'vgg_19/conv3/conv3_3/weights',
'vgg_19/conv3/conv3_3/biases',
'vgg_19/conv3/conv3_4/weights',
'vgg_19/conv3/conv3_4/biases',
'vgg_19/conv4/conv4_1/weights',
'vgg_19/conv4/conv4_1/biases',
'vgg_19/conv4/conv4_2/weights',
'vgg_19/conv4/conv4_2/biases',
'vgg_19/conv4/conv4_3/weights',
'vgg_19/conv4/conv4_3/biases',
'vgg_19/conv4/conv4_4/weights',
'vgg_19/conv4/conv4_4/biases',
'vgg_19/conv5/conv5_1/weights',
'vgg_19/conv5/conv5_1/biases',
'vgg_19/conv5/conv5_2/weights',
'vgg_19/conv5/conv5_2/biases',
'vgg_19/conv5/conv5_3/weights',
'vgg_19/conv5/conv5_3/biases',
'vgg_19/conv5/conv5_4/weights',
'vgg_19/conv5/conv5_4/biases',
'vgg_19/fc6/weights',
'vgg_19/fc6/biases',
'vgg_19/fc7/weights',
'vgg_19/fc7/biases',
'vgg_19/fc8/weights',
'vgg_19/fc8/biases',
]
model_variables = [v.op.name for v in slim.get_model_variables()]
self.assertSetEqual(set(model_variables), set(expected_names))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session():
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_19(eval_inputs, is_training=False)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
predictions = tf.argmax(logits, 1)
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def testTrainEvalWithReuse(self):
train_batch_size = 2
eval_batch_size = 1
train_height, train_width = 224, 224
eval_height, eval_width = 256, 256
num_classes = 1000
with self.test_session():
train_inputs = tf.random_uniform(
(train_batch_size, train_height, train_width, 3))
logits, _ = vgg.vgg_19(train_inputs)
self.assertListEqual(logits.get_shape().as_list(),
[train_batch_size, num_classes])
tf.get_variable_scope().reuse_variables()
eval_inputs = tf.random_uniform(
(eval_batch_size, eval_height, eval_width, 3))
logits, _ = vgg.vgg_19(eval_inputs, is_training=False,
spatial_squeeze=False)
self.assertListEqual(logits.get_shape().as_list(),
[eval_batch_size, 2, 2, num_classes])
logits = tf.reduce_mean(logits, [1, 2])
predictions = tf.argmax(logits, 1)
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def testForward(self):
batch_size = 1
height, width = 224, 224
with self.test_session() as sess:
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = vgg.vgg_19(inputs)
sess.run(tf.initialize_all_variables())
output = sess.run(logits)
self.assertTrue(output.any())
if __name__ == '__main__':
tf.test.main()
......@@ -12,20 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images.
"""Provides utilities to preprocess images in CIFAR-10.
The preprocessing steps for VGG were introduced in the following technical
report:
Very Deep Convolutional Networks For Large-Scale Image Recognition
Karen Simonyan and Andrew Zisserman
arXiv technical report, 2015
PDF: http://arxiv.org/pdf/1409.1556.pdf
ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
CC-BY-4.0
More information can be obtained from the VGG website:
www.robots.ox.ac.uk/~vgg/research/very_deep/
"""
from __future__ import absolute_import
......@@ -34,7 +22,7 @@ from __future__ import print_function
import tensorflow as tf
_PADDING = 2
_PADDING = 4
slim = tf.contrib.slim
......@@ -57,21 +45,27 @@ def preprocess_for_train(image,
Returns:
A preprocessed image.
"""
padded_image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
tf.image_summary('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
if padding > 0:
image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
# Randomly crop a [height, width] section of the image.
distorted_image = tf.random_crop(padded_image,
distorted_image = tf.random_crop(image,
[output_height, output_width, 3])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0))
# Because these operations are not commutative, consider randomizing
# the order their operation.
distorted_image = tf.image.random_brightness(distorted_image,
max_delta=63)
distorted_image = tf.image.random_contrast(distorted_image,
lower=0.2, upper=1.8)
# Subtract off the mean and divide by the variance of the pixels.
return tf.image.per_image_whitening(distorted_image)
......@@ -87,9 +81,15 @@ def preprocess_for_eval(image, output_height, output_width):
Returns:
A preprocessed image.
"""
tf.image_summary('image', tf.expand_dims(image, 0))
# Transform the image to floats.
image = tf.to_float(image)
# Resize and crop if needed.
resized_image = tf.image.resize_image_with_crop_or_pad(image,
output_width,
output_height)
tf.image_summary('resized_image', tf.expand_dims(resized_image, 0))
# Subtract off the mean and divide by the variance of the pixels.
return tf.image.per_image_whitening(resized_image)
......
......@@ -20,10 +20,10 @@ from __future__ import print_function
import tensorflow as tf
from slim.models import cifar10_preprocessing
from slim.models import inception_preprocessing
from slim.models import lenet_preprocessing
from slim.models import vgg_preprocessing
from preprocessing import cifarnet_preprocessing
from preprocessing import inception_preprocessing
from preprocessing import lenet_preprocessing
from preprocessing import vgg_preprocessing
slim = tf.contrib.slim
......@@ -45,11 +45,12 @@ def get_preprocessing(name, is_training=False):
ValueError: If Preprocessing `name` is not recognized.
"""
preprocessing_fn_map = {
'cifar10': cifar10_preprocessing,
'cifarnet': cifarnet_preprocessing,
'inception': inception_preprocessing,
'inception_v1': inception_preprocessing,
'inception_v2': inception_preprocessing,
'inception_v3': inception_preprocessing,
'inception_resnet_v2': inception_preprocessing,
'lenet': lenet_preprocessing,
'resnet_v1_50': vgg_preprocessing,
'resnet_v1_101': vgg_preprocessing,
......
#!/bin/bash
#
# This script performs the following operations:
# 1. Downloads the Flowers dataset
# 2. Fine-tunes an InceptionV1 model on the Flowers training set.
# 3. Evaluates the model on the Flowers validation set.
#
# Usage:
# cd slim
# ./slim/scripts/finetune_inception_v1_on_flowers.sh
# Where the pre-trained InceptionV1 checkpoint is saved to.
PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
# Where the training (fine-tuned) checkpoint and logs will be saved to.
TRAIN_DIR=/tmp/flowers-models/inception_v1
# Where the dataset is saved to.
DATASET_DIR=/tmp/flowers
# Download the pre-trained checkpoint.
if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
mkdir ${PRETRAINED_CHECKPOINT_DIR}
fi
if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt ]; then
wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz
tar -xvf inception_v1_2016_08_28.tar.gz
mv inception_v1.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt
rm inception_v1_2016_08_28.tar.gz
fi
# Download the dataset
python download_and_convert_data.py \
--dataset_name=flowers \
--dataset_dir=${DATASET_DIR}
# Fine-tune only the new layers for 2000 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v1 \
--checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt \
--checkpoint_exclude_scopes=InceptionV1/Logits \
--trainable_scopes=InceptionV1/Logits \
--max_number_of_steps=3000 \
--batch_size=32 \
--learning_rate=0.01 \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=100 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR} \
--eval_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v1
# Fine-tune all the new layers for 1000 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--checkpoint_path=${TRAIN_DIR} \
--model_name=inception_v1 \
--max_number_of_steps=1000 \
--batch_size=32 \
--learning_rate=0.001 \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=100 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR}/all \
--eval_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v1
#!/bin/bash
#
# This script performs the following operations:
# 1. Downloads the Flowers dataset
# 2. Fine-tunes an InceptionV3 model on the Flowers training set.
# 3. Evaluates the model on the Flowers validation set.
#
# Usage:
# cd slim
# ./slim/scripts/finetune_inceptionv3_on_flowers.sh
# Where the pre-trained InceptionV3 checkpoint is saved to.
PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
# Where the training (fine-tuned) checkpoint and logs will be saved to.
TRAIN_DIR=/tmp/flowers-models/inception_v3
# Where the dataset is saved to.
DATASET_DIR=/tmp/flowers
# Download the pre-trained checkpoint.
if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
mkdir ${PRETRAINED_CHECKPOINT_DIR}
fi
if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt ]; then
wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz
tar -xvf inception_v3_2016_08_28.tar.gz
mv inception_v3.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt
rm inception_v3_2016_08_28.tar.gz
fi
# Download the dataset
python download_and_convert_data.py \
--dataset_name=flowers \
--dataset_dir=${DATASET_DIR}
# Fine-tune only the new layers for 1000 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v3 \
--checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt \
--checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
--trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
--max_number_of_steps=1000 \
--batch_size=32 \
--learning_rate=0.01 \
--learning_rate_decay_type=fixed \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=100 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR} \
--eval_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v3
# Fine-tune all the new layers for 500 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v3 \
--checkpoint_path=${TRAIN_DIR} \
--max_number_of_steps=500 \
--batch_size=32 \
--learning_rate=0.0001 \
--learning_rate_decay_type=fixed \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=10 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR}/all \
--eval_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=inception_v3
#!/bin/bash
#
# This script performs the following operations:
# 1. Downloads the Cifar10 dataset
# 2. Trains a CifarNet model on the Cifar10 training set.
# 3. Evaluates the model on the Cifar10 testing set.
#
# Usage:
# cd slim
# ./scripts/train_cifar_net_on_mnist.sh
# Where the checkpoint and logs will be saved to.
TRAIN_DIR=/tmp/cifarnet-model
# Where the dataset is saved to.
DATASET_DIR=/tmp/cifar10
# Download the dataset
python download_and_convert_data.py \
--dataset_name=cifar10 \
--dataset_dir=${DATASET_DIR}
# Run training.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR} \
--dataset_name=cifar10 \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=cifarnet \
--preprocessing_name=cifarnet \
--max_number_of_steps=100000 \
--batch_size=128 \
--save_interval_secs=120 \
--save_summaries_secs=120 \
--log_every_n_steps=100 \
--optimizer=sgd \
--learning_rate=0.1 \
--learning_rate_decay_factor=0.1 \
--num_epochs_per_decay=200 \
--weight_decay=0.004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR} \
--eval_dir=${TRAIN_DIR} \
--dataset_name=cifar10 \
--dataset_split_name=test \
--dataset_dir=${DATASET_DIR} \
--model_name=cifarnet
#!/bin/bash
#
# Before running this script, make sure you've followed the instructions for
# downloading and converting the MNIST dataset.
# See slim/datasets/download_and_convert_mnist.py.
# This script performs the following operations:
# 1. Downloads the MNIST dataset
# 2. Trains a LeNet model on the MNIST training set.
# 3. Evaluates the model on the MNIST testing set.
#
# Usage:
# cd slim
# ./slim/scripts/train_lenet_on_mnist.sh
# Compile the training and evaluation binaries
bazel build slim:train
bazel build slim:eval
# Where the checkpoint and logs will be saved to.
TRAIN_DIR=/tmp/lenet-model
# Where the dataset was saved to.
# Where the dataset is saved to.
DATASET_DIR=/tmp/mnist
# Download the dataset
python download_and_convert_data.py \
--dataset_name=mnist \
--dataset_dir=${DATASET_DIR}
# Run training.
./bazel-bin/slim/train \
python train_image_classifier.py \
--train_dir=${TRAIN_DIR} \
--dataset_name=mnist \
--dataset_split_name=train \
......@@ -26,15 +29,17 @@ DATASET_DIR=/tmp/mnist
--model_name=lenet \
--preprocessing_name=lenet \
--max_number_of_steps=20000 \
--batch_size=50 \
--learning_rate=0.01 \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=100 \
--optimizer=sgd \
--learning_rate_decay_factor=1.0
--learning_rate_decay_type=fixed \
--weight_decay=0
# Run evaluation.
./blaze-bin/slim/eval \
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR} \
--eval_dir=${TRAIN_DIR} \
--dataset_name=mnist \
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TF-Slim Walkthrough\n",
"\n",
"This notebook will walk you through the basics of using TF-Slim to define, train and evaluate neural networks on various tasks. It assumes a basic knowledge of neural networks. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Table of contents\n",
"\n",
"<a href=\"#Install\">Installation and setup</a><br>\n",
"<a href='#MLP'>Creating your first neural network with TF-Slim</a><br>\n",
"<a href='#ReadingTFSlimDatasets'>Reading Data with TF-Slim</a><br>\n",
"<a href='#CNN'>Training a convolutional neural network (CNN)</a><br>\n",
"<a href='#Pretained'>Using pre-trained models</a><br>\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Installation and setup\n",
"<a id='Install'></a>\n",
"\n",
"As of 8/28/16, the latest stable release of TF is r0.10, which does not contain the latest version of slim.\n",
"To obtain the latest version of TF-Slim, please install the most recent nightly build of TF\n",
"as explained [here](https://github.com/nathansilberman/models/tree/master/slim#getting-started).\n",
"\n",
"To use TF-Slim for image classification (as we do in this notebook), you also have to install the TF-Slim image models library from [here](https://github.com/tensorflow/models/tree/master/slim). Let's suppose you install this into a directory called TF_MODELS. Then you should change directory to TF_MODELS/slim **before** running this notebook, so that all the files are on the path.\n",
"\n",
"To check you've got these two steps to work, just execute the cell below. It it complains about unknown modules, restart the notebook after moving to the TF-Slim models directory.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import matplotlib\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import math\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import time\n",
"\n",
"from datasets import dataset_utils\n",
"\n",
"# Main slim library\n",
"slim = tf.contrib.slim"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating your first neural network with TF-Slim\n",
"<a id='MLP'></a>\n",
"\n",
"Below we give some code to create a simple multilayer perceptron (MLP) which can be used\n",
"for regression problems. The model has 2 hidden layers.\n",
"The output is a single node. \n",
"When this function is called, it will create various nodes, and silently add them to whichever global TF graph is currently in scope. When a node which corresponds to a layer with adjustable parameters (eg., a fully connected layer) is created, additional parameter variable nodes are silently created, and added to the graph. (We will discuss how to train the parameters later.)\n",
"\n",
"We use variable scope to put all the nodes under a common name,\n",
"so that the graph has some hierarchical structure.\n",
"This is useful when we want to visualize the TF graph in tensorboard, or if we want to query related\n",
"variables. \n",
"The fully connected layers all use the same L2 weight decay and ReLu activations, as specified by **arg_scope**. (However, the final layer overrides these defaults, and uses an identity activation function.)\n",
"\n",
"We also illustrate how to add a dropout layer after the first fully connected layer (FC1). Note that at test time, \n",
"we do not drop out nodes, but instead use the average activations; hence we need to know whether the model is being\n",
"constructed for training or testing, since the computational graph will be different in the two cases\n",
"(although the variables, storing the model parameters, will be shared, since they have the same name/scope)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def regression_model(inputs, is_training=True, scope=\"deep_regression\"):\n",
" \"\"\"Creates the regression model.\n",
" \n",
" Args:\n",
" input_node: A node that yields a `Tensor` of size [batch_size, dimensions].\n",
" is_training: Whether or not we're currently training the model.\n",
" scope: An optional variable_op scope for the model.\n",
" \n",
" Returns:\n",
" output_node: 1-D `Tensor` of shape [batch_size] of responses.\n",
" nodes: A dict of nodes representing the hidden layers.\n",
" \"\"\"\n",
" with tf.variable_scope(scope, 'deep_regression', [input_node]):\n",
" nodes = {}\n",
" # Set the default weight _regularizer and acvitation for each fully_connected layer.\n",
" with slim.arg_scope([slim.fully_connected],\n",
" activation_fn=tf.nn.relu,\n",
" weights_regularizer=slim.l2_regularizer(0.01)):\n",
" \n",
" # Creates a fully connected layer from the inputs with 10 hidden units.\n",
" fc1_node = slim.fully_connected(inputs, 10, scope='fc1')\n",
" nodes['fc1'] = fc1_node\n",
" \n",
" # Adds a dropout layer to prevent over-fitting.\n",
" dropout_node = slim.dropout(fc1_node, 0.8, is_training=is_training)\n",
" \n",
" # Adds another fully connected layer with 5 hidden units.\n",
" fc2_node = slim.fully_connected(dropout_node, 5, scope='fc2')\n",
" nodes['fc2'] = fc2_node\n",
" \n",
" # Creates a fully-connected layer with a single hidden unit. Note that the\n",
" # layer is made linear by setting activation_fn=None.\n",
" prediction_node = slim.fully_connected(fc2_node, 1, activation_fn=None, scope='prediction')\n",
" nodes['out'] = prediction_node\n",
"\n",
" return prediction_node, nodes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's create the model and examine its structure.\n",
"\n",
"We create a TF graph and call regression_model(), which adds nodes (tensors) to the graph. We then examine their shape, and print the names of all the model variables which have been implicitly created inside of each layer. We see that the names of the variables follow the scopes that we specified."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Graph().as_default():\n",
" # Dummy placeholders for arbitrary number of 1d inputs and outputs\n",
" input_node = tf.placeholder(tf.float32, shape=(None, 1))\n",
" output_node = tf.placeholder(tf.float32, shape=(None, 1))\n",
" \n",
" # Build model\n",
" prediction_node, all_nodes = regression_model(input_node)\n",
" \n",
" # Print name and shape of each tensor.\n",
" print \"Layers\"\n",
" for k, v in all_nodes.iteritems():\n",
" print 'name = {}, shape = {}'.format(v.name, v.get_shape())\n",
"\n",
" # Print name and shape of parameter nodes (values not yet initialized)\n",
" print \"\\n\"\n",
" print \"Parameters\"\n",
" for v in slim.get_model_variables():\n",
" print 'name = {}, shape = {}'.format(v.name, v.get_shape())\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's create some 1d regression data .\n",
"\n",
"We will train and test the model on some noisy observations of a nonlinear function.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def produce_batch(batch_size, noise=0.3):\n",
" xs = np.random.random(size=[batch_size, 1]) * 10\n",
" ys = np.sin(xs) + 5 + np.random.normal(size=[batch_size, 1], scale=noise)\n",
" return [xs.astype(np.float32), ys.astype(np.float32)]\n",
"\n",
"x_train, y_train = produce_batch(100)\n",
"x_test, y_test = produce_batch(100)\n",
"plt.scatter(x_train, y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's fit the model to the data\n",
"\n",
"The user has to specify the loss function and the optimizer, and slim does the rest.\n",
"In particular, the slim.learning.train function does the following:\n",
"\n",
"- For each iteration, evaluate the train_op, which updates the parameters using the optimizer applied to the current minibatch. Also, update the global_step.\n",
"- Occasionally store the model checkpoint in the specified directory. This is useful in case your machine crashes - then you can simply restart from the specified checkpoint."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Everytime we run training, we need to store the model checkpoint in a new directory,\n",
"# in case anything has changed.\n",
"import time\n",
"ts = time.time()\n",
"ckpt_dir = '/tmp/tf/regression_model/model{}'.format(ts) # Place to store the checkpoint.\n",
"print('Saving to {}'.format(ckpt_dir))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def convert_data_to_tensors(x, y):\n",
" input_tensor = tf.constant(x)\n",
" input_tensor.set_shape([None, 1])\n",
" output_tensor = tf.constant(y)\n",
" output_tensor.set_shape([None, 1])\n",
" return input_tensor, output_tensor"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"graph = tf.Graph() # new graph\n",
"with graph.as_default():\n",
" input_node, output_node = convert_data_to_tensors(x_train, y_train)\n",
"\n",
" # Make the model.\n",
" prediction_node, nodes = regression_model(input_node, is_training=True)\n",
" \n",
" # Add the loss function to the graph.\n",
" loss_node = slim.losses.sum_of_squares(prediction_node, output_node)\n",
" # The total loss is the uers's loss plus any regularization losses.\n",
" total_loss_node = slim.losses.get_total_loss()\n",
"\n",
" # Create some summaries to visualize the training process:\n",
" ## TODO: add summaries.py to 3p\n",
" #slim.summaries.add_scalar_summary(total_loss, 'Total Loss', print_summary=True)\n",
" \n",
" # Specify the optimizer and create the train op:\n",
" optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
" train_op_node = slim.learning.create_train_op(total_loss_node, optimizer) \n",
"\n",
" # Run the training inside a session.\n",
" final_loss = slim.learning.train(\n",
" train_op_node,\n",
" logdir=ckpt_dir,\n",
" number_of_steps=500,\n",
" save_summaries_secs=1)\n",
" \n",
"print(\"Finished training. Last batch loss:\", final_loss)\n",
"print(\"Checkpoint saved in %s\" % ckpt_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Training with multiple loss functions.\n",
"\n",
"Sometimes we have multiple objectives we want to simultaneously optimize.\n",
"In slim, it is easy to add more losses, as we show below. (We do not optimize the total loss in this example,\n",
"but we show how to compute it.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"graph = tf.Graph() # Make a new graph\n",
"with graph.as_default():\n",
" input_node, output_node = convert_data_to_tensors(x_train, y_train)\n",
" prediction_node, nodes = regression_model(input_node, is_training=True)\n",
"\n",
" # Add multiple loss nodes.\n",
" sum_of_squares_loss_node = slim.losses.sum_of_squares(prediction_node, output_node)\n",
" absolute_difference_loss_node = slim.losses.absolute_difference(prediction_node, output_node)\n",
"\n",
" # The following two ways to compute the total loss are equivalent\n",
" regularization_loss_node = tf.add_n(slim.losses.get_regularization_losses())\n",
" total_loss1_node = sum_of_squares_loss_node + absolute_difference_loss_node + regularization_loss_node\n",
"\n",
" # Regularization Loss is included in the total loss by default.\n",
" # This is good for training, but not for testing.\n",
" total_loss2_node = slim.losses.get_total_loss(add_regularization_losses=True)\n",
" \n",
" init_node = tf.initialize_all_variables()\n",
" with tf.Session() as sess:\n",
" sess.run(init_node) # Will randomize the parameters.\n",
" total_loss1, total_loss2 = sess.run([total_loss1_node, total_loss2_node])\n",
" print('Total Loss1: %f' % total_loss1)\n",
" print('Total Loss2: %f' % total_loss2)\n",
"\n",
" print('Regularization Losses:')\n",
" for loss_node in slim.losses.get_regularization_losses():\n",
" print(loss_node)\n",
"\n",
" print('Loss Functions:')\n",
" for loss_node in slim.losses.get_losses():\n",
" print(loss_node)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's load the saved model and use it for prediction.\n",
"\n",
"The predictive accuracy is not very good, because we used a small model,\n",
"and only trained for 500 steps, to keep the demo fast. \n",
"Running for 5000 steps improves performance a lot."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Graph().as_default():\n",
" input_node, output_node = convert_data_to_tensors(x_test, y_test)\n",
" \n",
" # Create the model structure. (Parameters will be loaded below.)\n",
" prediction_node, nodes = regression_model(input_node, is_training=False)\n",
"\n",
" # Make a session which restores the old parameters from a checkpoint.\n",
" sv = tf.train.Supervisor(logdir=ckpt_dir)\n",
" with sv.managed_session() as sess:\n",
" inputs, predictions, true_outputs = sess.run([input_node, prediction_node, output_node])\n",
"\n",
"plt.scatter(inputs, true_outputs, c='r');\n",
"plt.scatter(inputs, predictions, c='b');\n",
"plt.title('red=true, blue=predicted')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's examine the learned parameters."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Graph().as_default():\n",
" input_node = tf.placeholder(tf.float32, shape=(None, 1))\n",
" output_node = tf.placeholder(tf.float32, shape=(None, 1))\n",
" prediction_node, nodes = regression_model(input_node, is_training=False)\n",
" \n",
" sv = tf.train.Supervisor(logdir=ckpt_dir)\n",
" with sv.managed_session() as sess:\n",
" model_variables = slim.get_model_variables()\n",
" for v in model_variables:\n",
" val = sess.run(v)\n",
" print v.name, val.shape, val\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's compute various evaluation metrics on the test set.\n",
"\n",
"In slim termiology, losses are optimized, but metrics (which may not be differentiable, e.g., precision and recall) are just measured.\n",
"As an illustration, the code below computes mean squared error and mean absolute error metrics on the test set.\n",
"\n",
"Each metric declaration creates several local variables (which must be initialized via tf.initialize_local_variables()) and returns both a value_op and an update_op. When evaluated, the value_op returns the current value of the metric. The update_op loads a new batch of data, runs the model, obtains the predictions and accumulates the metric statistics appropriately before returning the current value of the metric. We store these value nodes and update nodes in 2 dictionaries.\n",
"\n",
"After creating the metric nodes, we can pass them to slim.evaluation.evaluation, which repeatedly evaluates these nodes the specified number of times. (This allows us to compute the evaluation in a streaming fashion across minibatches, which is usefulf for large datasets.) Finally, we print the final value of each metric.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with tf.Graph().as_default():\n",
" input_node, output_node = convert_data_to_tensors(x_test, y_test)\n",
" prediction_node, nodes = regression_model(input_node, is_training=False)\n",
"\n",
" # Specify metrics to evaluate:\n",
" names_to_value_nodes, names_to_update_nodes = slim.metrics.aggregate_metric_map({\n",
" 'Mean Squared Error': slim.metrics.streaming_mean_squared_error(prediction_node, output_node),\n",
" 'Mean Absolute Error': slim.metrics.streaming_mean_absolute_error(prediction_node, output_node)\n",
" })\n",
"\n",
"\n",
" init_node = tf.group(\n",
" tf.initialize_all_variables(),\n",
" tf.initialize_local_variables())\n",
"\n",
" # Make a session which restores the old graph parameters, and then run eval.\n",
" sv = tf.train.Supervisor(logdir=ckpt_dir)\n",
" with sv.managed_session() as sess:\n",
" metric_values = slim.evaluation.evaluation(\n",
" sess,\n",
" num_evals=1, # Single pass over data\n",
" init_op=init_node,\n",
" eval_op=names_to_update_nodes.values(),\n",
" final_op=names_to_value_nodes.values())\n",
"\n",
" names_to_values = dict(zip(names_to_value_nodes.keys(), metric_values))\n",
" for key, value in names_to_values.iteritems():\n",
" print('%s: %f' % (key, value))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reading Data with TF-Slim\n",
"<a id='ReadingTFSlimDatasets'></a>\n",
"\n",
"Reading data with TF-Slim has two main components: A\n",
"[Dataset](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset.py) and a \n",
"[DatasetDataProvider](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py). The former is a descriptor of a dataset, while the latter performs the actions necessary for actually reading the data. Lets look at each one in detail:\n",
"\n",
"\n",
"## Dataset\n",
"A TF-Slim\n",
"[Dataset](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset.py)\n",
"contains descriptive information about a dataset necessary for reading it, such as the list of data files and how to decode them. It also contains metadata including class labels, the size of the train/test splits and descriptions of the tensors that the dataset provides. For example, some datasets contain images with labels. Others augment this data with bounding box annotations, etc. The Dataset object allows us to write generic code using the same API, regardless of the data content and encoding type.\n",
"\n",
"TF-Slim's Dataset works especially well when the data is stored as a (possibly sharded)\n",
"[TFRecords file](https://www.tensorflow.org/versions/r0.10/how_tos/reading_data/index.html#file-formats), where each record contains a [tf.train.Example protocol buffer](https://github.com/tensorflow/tensorflow/blob/r0.10/tensorflow/core/example/example.proto).\n",
"TF-Slim uses a consistent convention for naming the keys and values inside each Example record. \n",
"\n",
"## DatasetDataProvider\n",
"\n",
"A\n",
"[DatasetDataProvider](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py) is a class which actually reads the data from a dataset. It is highly configurable to read the data in various ways that may make a big impact on the efficiency of your training process. For example, it can be single or multi-threaded. If your data is sharded across many files, it can read each files serially, or from every file simultaneously.\n",
"\n",
"## Demo: The Flowers Dataset\n",
"\n",
"For convenience, we've include scripts to convert several common image datasets into TFRecord format and have provided\n",
"the Dataset descriptor files necessary for reading them. We demonstrate how easy it is to use these dataset via the Flowers dataset below."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download the Flowers Dataset\n",
"<a id='DownloadFlowers'></a>\n",
"\n",
"We've made available a tarball of the Flowers dataset which has already been converted to TFRecord format."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from datasets import dataset_utils\n",
"\n",
"url = \"http://download.tensorflow.org/data/flowers.tar.gz\"\n",
"flowers_data_dir = '/tmp/flowers'\n",
"\n",
"dataset_utils.download_and_uncompress_tarball(url, flowers_data_dir) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Display some of the data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from datasets import flowers\n",
"import tensorflow as tf\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"with tf.Graph().as_default(): \n",
" dataset = flowers.get_split('train', flowers_data_dir)\n",
" data_provider = slim.dataset_data_provider.DatasetDataProvider(\n",
" dataset, common_queue_capacity=32, common_queue_min=1)\n",
" image, label = data_provider.get(['image', 'label'])\n",
" \n",
" with tf.Session() as sess: \n",
" with slim.queues.QueueRunners(sess):\n",
" for i in xrange(4):\n",
" np_image, np_label = sess.run([image, label])\n",
" height, width, _ = np_image.shape\n",
" class_name = name = dataset.labels_to_names[np_label]\n",
" \n",
" plt.figure()\n",
" plt.imshow(np_image)\n",
" plt.title('%s, %d x %d' % (name, height, width))\n",
" plt.axis('off')\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convolutional neural nets (CNNs).\n",
"<a id='CNN'></a>\n",
"\n",
"In this section, we show how to train an image classifier using a simple CNN.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define the model.\n",
"\n",
"Below we define a simple CNN. Note that the output layer is linear function - we will apply softmax transformation externally to the model, either in the loss function (for training), or in the prediction function (during testing)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def my_cnn(images, num_classes, is_training): # is_training is not used...\n",
" with slim.arg_scope([slim.max_pool2d], kernel_size=[3, 3], stride=2):\n",
" net = slim.conv2d(images, 64, [5, 5])\n",
" net = slim.max_pool2d(net)\n",
" net = slim.conv2d(net, 64, [5, 5])\n",
" net = slim.max_pool2d(net)\n",
" net = slim.flatten(net)\n",
" net = slim.fully_connected(net, 192)\n",
" net = slim.fully_connected(net, num_classes, activation_fn=None) \n",
" return net"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Apply the model to some randomly generated images."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"with tf.Graph().as_default():\n",
" # The model can handle any input size because the first layer is convolutional.\n",
" # The size of the model is determined when image_node is first passed into the my_cnn function.\n",
" # Once the variables are initialized, the size of all the weight matrices is fixed.\n",
" # Because of the fully connected layers, this means that all subsequent images must have the same\n",
" # input size as the first image.\n",
" batch_size, height, width, channels = 3, 28, 28, 3\n",
" images = tf.random_uniform([batch_size, height, width, channels], maxval=1)\n",
" \n",
" # Create the model.\n",
" num_classes = 10\n",
" logits = my_cnn(images, num_classes, is_training=True)\n",
" probabilities = tf.nn.softmax(logits)\n",
" \n",
" # Initialize all the variables (including parameters) randomly.\n",
" init_op = tf.initialize_all_variables()\n",
" \n",
" with tf.Session() as sess:\n",
" # Run the init_op, evaluate the model outputs and print the results:\n",
" sess.run(init_op)\n",
" probabilities = sess.run(probabilities)\n",
" \n",
"print('Probabilities Shape:')\n",
"print(probabilities.shape) # batch_size x num_classes \n",
"\n",
"print('\\nProbabilities:')\n",
"print(probabilities)\n",
"\n",
"print('\\nSumming across all classes (Should equal 1):')\n",
"print(np.sum(probabilities, 1)) # Each row sums to 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train the model on the Flowers dataset.\n",
"\n",
"Before starting, make sure you've run the code to <a href=\"#DownloadFlowers\">Download the Flowers</a> dataset. Now, we'll get a sense of what it looks like to use TF-Slim's training functions found in\n",
"[learning.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/learning.py). First, we'll create a function, `load_batch`, that loads batches of dataset from a dataset. Next, we'll train a model for a single step (just to demonstrate the API), and evaluate the results."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from preprocessing import inception_preprocessing\n",
"import tensorflow as tf\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"\n",
"def load_batch(dataset, batch_size=32, height=299, width=299, is_training=False):\n",
" \"\"\"Loads a single batch of data.\n",
" \n",
" Args:\n",
" dataset: The dataset to load.\n",
" batch_size: The number of images in the batch.\n",
" height: The size of each image after preprocessing.\n",
" width: The size of each image after preprocessing.\n",
" is_training: Whether or not we're currently training or evaluating.\n",
" \"\"\"\n",
" data_provider = slim.dataset_data_provider.DatasetDataProvider(\n",
" dataset, common_queue_capacity=32,\n",
" common_queue_min=8)\n",
" image_raw, label = data_provider.get(['image', 'label'])\n",
" \n",
" # Preprocess image for usage by Inception.\n",
" image = inception_preprocessing.preprocess_image(image_raw, height, width, is_training=is_training)\n",
" \n",
" # Preprocess the image for display purposes.\n",
" image_raw = tf.expand_dims(image_raw, 0)\n",
" image_raw = tf.image.resize_images(image_raw, height, width)\n",
" image_raw = tf.squeeze(image_raw)\n",
"\n",
" # Batch it up.\n",
" images, images_raw, labels = tf.train.batch(\n",
" [image, image_raw, label],\n",
" batch_size=batch_size,\n",
" num_threads=1,\n",
" capacity=2 * batch_size)\n",
" \n",
" return images, images_raw, labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from datasets import flowers\n",
"\n",
"# This might take a few minutes.\n",
"train_dir = '/tmp/tfslim_model/'\n",
"print('Will save model to %s' % CHECKPOINT_DIR)\n",
"\n",
"with tf.Graph().as_default():\n",
" tf.logging.set_verbosity(tf.logging.INFO)\n",
"\n",
" dataset = flowers.get_split('train', flowers_data_dir)\n",
" images, _, labels = load_batch(dataset)\n",
" \n",
" # Create the model:\n",
" logits = my_cnn(images, num_classes=dataset.num_classes, is_training=True)\n",
" \n",
" # Specify the loss function:\n",
" one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)\n",
" slim.losses.softmax_cross_entropy(logits, one_hot_labels)\n",
" total_loss = slim.losses.get_total_loss()\n",
"\n",
" # Create some summaries to visualize the training process:\n",
" tf.scalar_summary('losses/Total Loss', total_loss)\n",
" \n",
" # Specify the optimizer and create the train op:\n",
" optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
" train_op = slim.learning.create_train_op(total_loss, optimizer)\n",
"\n",
" # Run the training:\n",
" final_loss = slim.learning.train(\n",
" train_op,\n",
" logdir=train_dir,\n",
" number_of_steps=1, # For speed, we just do 1 epoch\n",
" save_summaries_secs=1)\n",
" \n",
" print('Finished training. Final batch loss %d' % final_loss)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Evaluate some metrics.\n",
"\n",
"As we discussed above, we can compute various metrics besides the loss.\n",
"Below we show how to compute prediction accuracy of the trained model, as well as top-5 classification accuracy. (The difference between evaluation and evaluation_loop is that the latter writes the results to a log directory, so they can be viewed in tensorboard.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from datasets import flowers\n",
"\n",
"# This might take a few minutes.\n",
"with tf.Graph().as_default():\n",
" tf.logging.set_verbosity(tf.logging.DEBUG)\n",
" \n",
" dataset = flowers.get_split('train', flowers_data_dir)\n",
" images, _, labels = load_batch(dataset)\n",
" \n",
" logits = my_cnn(images, num_classes=dataset.num_classes, is_training=False)\n",
" predictions = tf.argmax(logits, 1)\n",
" \n",
" # Define the metrics:\n",
" names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({\n",
" 'eval/Accuracy': slim.metrics.streaming_accuracy(predictions, labels),\n",
" 'eval/Recall@5': slim.metrics.streaming_recall_at_k(logits, labels, 5),\n",
" })\n",
"\n",
" print('Running evaluation Loop...')\n",
" checkpoint_path = tf.train.latest_checkpoint(CHECKPOINT_DIR)\n",
" metric_values = slim.evaluation.evaluate_once(\n",
" master='',\n",
" checkpoint_path=checkpoint_path,\n",
" logdir=train_dir,\n",
" eval_op=names_to_updates.values(),\n",
" final_op=names_to_values.values())\n",
"\n",
" names_to_values = dict(zip(names_to_values.keys(), metric_values))\n",
" for name in names_to_values:\n",
" print('%s: %f' % (name, names_to_values[name]))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using pre-trained models\n",
"<a id='Pretrained'></a>\n",
"\n",
"Neural nets work best when they have many parameters, making them very flexible function approximators.\n",
"However, this means they must be trained on big datasets. Since this process is slow, we provide various pre-trained models - see the list [here](https://github.com/tensorflow/models/tree/master/slim#pre-trained-models).\n",
"\n",
"\n",
"You can either use these models as-is, or you can perform \"surgery\" on them, to modify them for some other task. For example, it is common to \"chop off\" the final pre-softmax layer, and replace it with a new set of weights corresponding to some new set of labels. You can then quickly fine tune the new model on a small new dataset. We illustrate this below, using inception-v3 as the base model.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download the Inception V1 checkpoint\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from datasets import dataset_utils\n",
"\n",
"url = \"http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz\"\n",
"checkpoints_dir = '/tmp/checkpoints'\n",
"\n",
"dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"### Apply Pre-trained model to Images.\n",
"\n",
"We have to convert each image to the size expected by the model checkpoint.\n",
"There is no easy way to determine this size from the checkpoint itself.\n",
"So we use a preprocessor to enforce this."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import os\n",
"import tensorflow as tf\n",
"import urllib2\n",
"\n",
"from datasets import imagenet\n",
"from nets import inception\n",
"from preprocessing import inception_preprocessing\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"batch_size = 3\n",
"\n",
"with tf.Graph().as_default():\n",
" url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'\n",
" image_string = urllib2.urlopen(url).read()\n",
" image = tf.image.decode_jpeg(image_string, channels=3)\n",
" processed_image = inception_preprocessing.preprocess_image(image, 224, 224, is_training=False)\n",
" processed_images = tf.expand_dims(processed_image, 0)\n",
" \n",
" with slim.arg_scope(inception.inception_v1_arg_scope()):\n",
" logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False)\n",
" probabilities = tf.nn.softmax(logits)\n",
" \n",
" init_fn = slim.assign_from_checkpoint_fn(\n",
" os.path.join(checkpoints_dir, 'inception_v1.ckpt'),\n",
" slim.get_model_variables('InceptionV1'))\n",
" \n",
" with tf.Session() as sess:\n",
" init_fn(sess)\n",
" np_image, probabilities = sess.run([image, probabilities])\n",
" probabilities = probabilities[0, 0:]\n",
" sorted_inds = [i[0] for i in sorted(enumerate(probabilities), key=lambda x:x[1])]\n",
" \n",
" plt.figure()\n",
" plt.imshow(np_image.astype(np.uint8))\n",
" plt.axis('off')\n",
" plt.show()\n",
" \n",
" sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]\n",
"\n",
" names = imagenet.create_readable_names_for_imagenet_labels()\n",
" for i in range(5):\n",
" index = sorted_inds[i]\n",
" print('Probability %0.2f%% => [%s]' % (probabilities[index], names[index]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fine-tune the model on a different set of labels.\n",
"\n",
"We will fine tune the inception model on the Flowers dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os\n",
"\n",
"from datasets import flowers\n",
"from nets import inception\n",
"from preprocessing import inception_preprocessing\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"\n",
"def get_init_fn():\n",
" \"\"\"Returns a function run by the chief worker to warm-start the training.\"\"\"\n",
" checkpoint_exclude_scopes=[\"InceptionV1/Logits\", \"InceptionV1/AuxLogits\"]\n",
" \n",
" exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]\n",
"\n",
" variables_to_restore = []\n",
" for var in slim.get_model_variables():\n",
" excluded = False\n",
" for exclusion in exclusions:\n",
" if var.op.name.startswith(exclusion):\n",
" excluded = True\n",
" break\n",
" if not excluded:\n",
" variables_to_restore.append(var)\n",
"\n",
" return slim.assign_from_checkpoint_fn(\n",
" os.path.join(checkpoints_dir, 'inception_v1.ckpt'),\n",
" variables_to_restore)\n",
"\n",
"\n",
"train_dir = '/tmp/inception_finetuned/'\n",
"\n",
"with tf.Graph().as_default():\n",
" tf.logging.set_verbosity(tf.logging.INFO)\n",
" \n",
" dataset = flowers.get_split('train', flowers_data_dir)\n",
" images, _, labels = load_batch(dataset, height=224, width=224)\n",
" \n",
" # Create the model:\n",
" with slim.arg_scope(inception.inception_v1_arg_scope()):\n",
" logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)\n",
" \n",
" # Specify the loss function:\n",
" one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)\n",
" slim.losses.softmax_cross_entropy(logits, one_hot_labels)\n",
" total_loss = slim.losses.get_total_loss()\n",
"\n",
" # Create some summaries to visualize the training process:\n",
" tf.scalar_summary('losses/Total Loss', total_loss)\n",
" \n",
" # Specify the optimizer and create the train op:\n",
" optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
" train_op = slim.learning.create_train_op(total_loss, optimizer)\n",
" \n",
" # Run the training:\n",
" final_loss = slim.learning.train(\n",
" train_op,\n",
" logdir=train_dir,\n",
" init_fn=get_init_fn(),\n",
" number_of_steps=2)\n",
" \n",
" \n",
"print('Finished training. Last batch loss %f' % final_loss)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Apply fine tuned model to some images."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"from datasets import flowers\n",
"from nets import inception\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"batch_size = 3\n",
"\n",
"with tf.Graph().as_default():\n",
" tf.logging.set_verbosity(tf.logging.INFO)\n",
" \n",
" dataset = flowers.get_split('train', flowers_data_dir)\n",
" images, images_raw, labels = load_batch(dataset, height=224, width=224)\n",
" \n",
" # Create the model:\n",
" with slim.arg_scope(inception.inception_v1_arg_scope()):\n",
" logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)\n",
"\n",
" probabilities = tf.nn.softmax(logits)\n",
" \n",
" checkpoint_path = tf.train.latest_checkpoint(train_dir)\n",
" init_fn = slim.assign_from_checkpoint_fn(\n",
" checkpoint_path,\n",
" slim.get_variables_to_restore())\n",
" \n",
" with tf.Session() as sess:\n",
" with slim.queues.QueueRunners(sess):\n",
" sess.run(tf.initialize_local_variables())\n",
" init_fn(sess)\n",
" np_probabilities, np_images_raw, np_labels = sess.run([probabilities, images_raw, labels])\n",
" \n",
" for i in xrange(batch_size): \n",
" image = np_images_raw[i, :, :, :]\n",
" true_label = np_labels[i]\n",
" predicted_label = np.argmax(np_probabilities[i, :])\n",
" predicted_name = dataset.labels_to_names[predicted_label]\n",
" true_name = dataset.labels_to_names[true_label]\n",
" \n",
" plt.figure()\n",
" plt.imshow(image.astype(np.uint8))\n",
" plt.title('Ground Truth: [%s], Prediction [%s]' % (true_name, predicted_name))\n",
" plt.axis('off')\n",
" plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generic training script that trains a given model a specified dataset."""
"""Generic training script that trains a model using a given dataset."""
from __future__ import absolute_import
from __future__ import division
......@@ -21,10 +21,10 @@ from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
from slim.datasets import dataset_factory
from slim.models import model_deploy
from slim.models import model_factory
from slim.models import preprocessing_factory
from datasets import dataset_factory
from deployment import model_deploy
from nets import nets_factory
from preprocessing import preprocessing_factory
slim = tf.contrib.slim
......@@ -57,7 +57,7 @@ tf.app.flags.DEFINE_integer(
'The number of threads used to create the batches.')
tf.app.flags.DEFINE_integer(
'log_every_n_steps', 5,
'log_every_n_steps', 10,
'The frequency with which logs are print.')
tf.app.flags.DEFINE_integer(
......@@ -161,8 +161,6 @@ tf.app.flags.DEFINE_float(
'The decay to use for the moving average.'
'If left as None, then moving averages are not used.')
#######################
# Dataset Flags #
#######################
......@@ -208,9 +206,18 @@ tf.app.flags.DEFINE_string(
tf.app.flags.DEFINE_string(
'checkpoint_exclude_scopes', None,
'Comma-separated list of scopes to include when fine-tuning '
'Comma-separated list of scopes of variables to exclude when restoring '
'from a checkpoint.')
tf.app.flags.DEFINE_string(
'trainable_scopes', None,
'Comma-separated list of scopes to filter the set of variables to train.'
'By default, None would train all the variables.')
tf.app.flags.DEFINE_boolean(
'ignore_missing_vars', False,
'When restoring a checkpoint would ignore missing variables.')
FLAGS = tf.app.flags.FLAGS
......@@ -350,15 +357,42 @@ def _get_init_fn():
if not excluded:
variables_to_restore.append(var)
if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
else:
checkpoint_path = FLAGS.checkpoint_path
tf.logging.info('Fine-tuning from %s' % checkpoint_path)
return slim.assign_from_checkpoint_fn(
FLAGS.checkpoint_path,
variables_to_restore)
checkpoint_path,
variables_to_restore,
ignore_missing_vars=FLAGS.ignore_missing_vars)
def _get_variables_to_train():
"""Returns a list of variables to train.
Returns:
A list of variables to train by the optimizer.
"""
if FLAGS.trainable_scopes is None:
return tf.trainable_variables()
else:
scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
variables_to_train = []
for scope in scopes:
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
variables_to_train.extend(variables)
return variables_to_train
def main(_):
if not FLAGS.dataset_dir:
raise ValueError('You must supply the dataset directory with --dataset_dir')
tf.logging.set_verbosity(tf.logging.INFO)
with tf.Graph().as_default():
######################
# Config model_deploy#
......@@ -381,9 +415,9 @@ def main(_):
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
####################
# Select the model #
# Select the network #
####################
model_fn = model_factory.get_model(
network_fn = nets_factory.get_network_fn(
FLAGS.model_name,
num_classes=(dataset.num_classes - FLAGS.labels_offset),
weight_decay=FLAGS.weight_decay,
......@@ -409,10 +443,7 @@ def main(_):
[image, label] = provider.get(['image', 'label'])
label -= FLAGS.labels_offset
if FLAGS.train_image_size is None:
train_image_size = model_fn.default_image_size
else:
train_image_size = FLAGS.train_image_size
train_image_size = FLAGS.train_image_size or network_fn.default_image_size
image = image_preprocessing_fn(image, train_image_size, train_image_size)
......@@ -430,9 +461,9 @@ def main(_):
# Define the model #
####################
def clone_fn(batch_queue):
"""Allows data parallelism by creating multiple clones of the model_fn."""
"""Allows data parallelism by creating multiple clones of network_fn."""
images, labels = batch_queue.dequeue()
logits, end_points = model_fn(images)
logits, end_points = network_fn(images)
#############################
# Specify the loss function #
......@@ -443,6 +474,7 @@ def main(_):
label_smoothing=FLAGS.label_smoothing, weight=0.4, scope='aux_loss')
slim.losses.softmax_cross_entropy(
logits, labels, label_smoothing=FLAGS.label_smoothing, weight=1.0)
return end_points
# Gather initial summaries.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
......@@ -450,12 +482,20 @@ def main(_):
clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
first_clone_scope = deploy_config.clone_scope(0)
# Gather update_ops from the first clone. These contain, for example,
# the updates for the batch_norm variables created by model_fn.
# the updates for the batch_norm variables created by network_fn.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
# Add summaries for end_points.
end_points = clones[0].outputs
for end_point in end_points:
x = end_points[end_point]
summaries.add(tf.histogram_summary('activations/' + end_point, x))
summaries.add(tf.scalar_summary('sparsity/' + end_point,
tf.nn.zero_fraction(x)))
# Add summaries for losses.
for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
tf.scalar_summary('losses/%s' % loss.op.name, loss)
summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))
# Add summaries for variables.
for variable in slim.get_model_variables():
......@@ -494,10 +534,14 @@ def main(_):
# Update ops executed locally by trainer.
update_ops.append(variable_averages.apply(moving_average_variables))
# TODO(sguada) Refactor into function that takes the clones and optimizer
# Variables to train.
variables_to_train = _get_variables_to_train()
# and returns a train_tensor and summary_op
total_loss, clones_gradients = model_deploy.optimize_clones(clones,
optimizer)
total_loss, clones_gradients = model_deploy.optimize_clones(
clones,
optimizer,
var_list=variables_to_train)
# Add total_loss to summary.
summaries.add(tf.scalar_summary('total_loss', total_loss,
name='total_loss'))
......@@ -519,6 +563,7 @@ def main(_):
# Merge all summaries together.
summary_op = tf.merge_summary(list(summaries), name='summary_op')
###########################
# Kicks off the training. #
###########################
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment