Commit 83ee52cc authored by Martin Wicke's avatar Martin Wicke
Browse files

added inception model

parent 1ecaf090
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Small library that points to the ImageNet data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from inception.dataset import Dataset
class ImagenetData(Dataset):
"""ImageNet data set."""
def __init__(self, subset):
super(ImagenetData, self).__init__('ImageNet', subset)
def num_classes(self):
"""Returns the number of classes in the data set."""
return 1000
def num_examples_per_epoch(self):
"""Returns the number of examples in the data set."""
# Bounding box data consists of 615299 bounding boxes for 544546 images.
if self.subset == 'train':
return 1281167
if self.subset == 'validation':
return 50000
def download_message(self):
"""Instruction to download and extract the tarball from Flowers website."""
print('Failed to find any ImageNet %s files'% self.subset)
print('')
print('If you have already downloaded and processed the data, then make '
'sure to set --data_dir to point to the directory containing the '
'location of the sharded TFRecords.\n')
print('If you have not downloaded and prepared the ImageNet data in the '
'TFRecord format, you will need to do this at least once. This '
'process could take several hours depending on the speed of your '
'computer and network connection\n')
print('Please see README.md for instructions on how to build '
'the ImageNet dataset using download_and_preprocess_imagenet.\n')
print('Note that the raw data size is 300 GB and the processed data size '
'is 150 GB. Please ensure you have at least 500GB disk space.')
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A binary to evaluate Inception on the flowers data set.
Note that using the supplied pre-trained inception checkpoint, the eval should
achieve:
precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples]
See the README.md for more details.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception import inception_eval
from inception.imagenet_data import ImagenetData
FLAGS = tf.app.flags.FLAGS
def main(unused_argv=None):
dataset = ImagenetData(subset=FLAGS.subset)
assert dataset.data_files()
if tf.gfile.Exists(FLAGS.eval_dir):
tf.gfile.DeleteRecursively(FLAGS.eval_dir)
tf.gfile.MakeDirs(FLAGS.eval_dir)
inception_eval.evaluate(dataset)
if __name__ == '__main__':
tf.app.run()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A binary to train Inception on the ImageNet data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception import inception_train
from inception.imagenet_data import ImagenetData
FLAGS = tf.app.flags.FLAGS
def main(_):
dataset = ImagenetData(subset=FLAGS.subset)
assert dataset.data_files()
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
inception_train.train(dataset)
if __name__ == '__main__':
tf.app.run()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A library to evaluate Inception on a single GPU.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import math
import os.path
import time
import numpy as np
import tensorflow as tf
from inception import image_processing
from inception import inception_model as inception
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('eval_dir', '/tmp/imagenet_eval',
"""Directory where to write event logs.""")
tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/imagenet_train',
"""Directory where to read model checkpoints.""")
# Flags governing the frequency of the eval.
tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
"""How often to run the eval.""")
tf.app.flags.DEFINE_boolean('run_once', False,
"""Whether to run eval only once.""")
# Flags governing the data used for the eval.
tf.app.flags.DEFINE_integer('num_examples', 50000,
"""Number of examples to run. Note that the eval """
"""ImageNet dataset contains 50000 examples.""")
tf.app.flags.DEFINE_string('subset', 'validation',
"""Either 'validation' or 'train'.""")
def _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op):
"""Runs Eval once.
Args:
saver: Saver.
summary_writer: Summary writer.
top_1_op: Top 1 op.
top_5_op: Top 5 op.
summary_op: Summary op.
"""
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
if os.path.isabs(ckpt.model_checkpoint_path):
# Restores from checkpoint with absolute path.
saver.restore(sess, ckpt.model_checkpoint_path)
else:
# Restores from checkpoint with relative path.
saver.restore(sess, os.path.join(FLAGS.checkpoint_dir,
ckpt.model_checkpoint_path))
# Assuming model_checkpoint_path looks something like:
# /my-favorite-path/imagenet_train/model.ckpt-0,
# extract global_step from it.
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
print('Succesfully loaded model from %s at step=%s.' %
(ckpt.model_checkpoint_path, global_step))
else:
print('No checkpoint file found')
return
# Start the queue runners.
coord = tf.train.Coordinator()
try:
threads = []
for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
start=True))
num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
# Counts the number of correct predictions.
count_top_1 = 0.0
count_top_5 = 0.0
total_sample_count = num_iter * FLAGS.batch_size
step = 0
print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset))
start_time = time.time()
while step < num_iter and not coord.should_stop():
top_1, top_5 = sess.run([top_1_op, top_5_op])
count_top_1 += np.sum(top_1)
count_top_5 += np.sum(top_5)
step += 1
if step % 20 == 0:
duration = time.time() - start_time
sec_per_batch = duration / 20.0
examples_per_sec = FLAGS.batch_size / sec_per_batch
print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
'sec/batch)' % (datetime.now(), step, num_iter,
examples_per_sec, sec_per_batch))
start_time = time.time()
# Compute precision @ 1.
precision_at_1 = count_top_1 / total_sample_count
recall_at_5 = count_top_5 / total_sample_count
print('%s: precision @ 1 = %.4f recall @ 5 = %.4f [%d examples]' %
(datetime.now(), precision_at_1, recall_at_5, total_sample_count))
summary = tf.Summary()
summary.ParseFromString(sess.run(summary_op))
summary.value.add(tag='Precision @ 1', simple_value=precision_at_1)
summary.value.add(tag='Recall @ 5', simple_value=recall_at_5)
summary_writer.add_summary(summary, global_step)
except Exception as e: # pylint: disable=broad-except
coord.request_stop(e)
coord.request_stop()
coord.join(threads, stop_grace_period_secs=10)
def evaluate(dataset):
"""Evaluate model on Dataset for a number of steps."""
with tf.Graph().as_default():
# Get images and labels from the dataset.
images, labels = image_processing.inputs(dataset)
# Number of classes in the Dataset label set plus 1.
# Label 0 is reserved for an (unused) background class.
num_classes = dataset.num_classes() + 1
# Build a Graph that computes the logits predictions from the
# inference model.
logits, _ = inception.inference(images, num_classes)
# Calculate predictions.
top_1_op = tf.nn.in_top_k(logits, labels, 1)
top_5_op = tf.nn.in_top_k(logits, labels, 5)
# Restore the moving average version of the learned variables for eval.
variable_averages = tf.train.ExponentialMovingAverage(
inception.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.merge_all_summaries()
graph_def = tf.get_default_graph().as_graph_def()
summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
graph_def=graph_def)
while True:
_eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op)
if FLAGS.run_once:
break
time.sleep(FLAGS.eval_interval_secs)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Build the Inception v3 network on ImageNet data set.
The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567
Summary of available functions:
inference: Compute inference on the model inputs to make a prediction
loss: Compute the loss of the prediction with respect to the labels
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import re
import tensorflow as tf
from inception.slim import slim
FLAGS = tf.app.flags.FLAGS
# If a model is trained using multiple GPUs, prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'
# Batch normalization. Constant governing the exponential moving average of
# the 'global' mean and variance for all activations.
BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997
# The decay to use for the moving average.
MOVING_AVERAGE_DECAY = 0.9999
def inference(images, num_classes, for_training=False, restore_logits=True,
scope=None):
"""Build Inception v3 model architecture.
See here for reference: http://arxiv.org/abs/1512.00567
Args:
images: Images returned from inputs() or distorted_inputs().
num_classes: number of classes
for_training: If set to `True`, build the inference model for training.
Kernels that operate differently for inference during training
e.g. dropout, are appropriately configured.
restore_logits: whether or not the logits layers should be restored.
Useful for fine-tuning a model with different num_classes.
scope: optional prefix string identifying the ImageNet tower.
Returns:
Logits. 2-D float Tensor.
Auxiliary Logits. 2-D float Tensor of side-head. Used for training only.
"""
# Parameters for BatchNorm.
batch_norm_params = {
# Decay for the moving averages.
'decay': BATCHNORM_MOVING_AVERAGE_DECAY,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
}
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004):
with slim.arg_scope([slim.ops.conv2d],
stddev=0.1,
activation=tf.nn.relu,
batch_norm_params=batch_norm_params):
# Force all Variables to reside on the CPU.
with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
logits, endpoints = slim.inception.inception_v3(
images,
dropout_keep_prob=0.8,
num_classes=num_classes,
is_training=for_training,
restore_logits=restore_logits,
scope=scope)
# Add summaries for viewing model statistics on TensorBoard.
_activation_summaries(endpoints)
# Grab the logits associated with the side head. Employed during training.
auxiliary_logits = endpoints['aux_logits']
return logits, auxiliary_logits
def loss(logits, labels, batch_size=None):
"""Adds all losses for the model.
Note the final loss is not returned. Instead, the list of losses are collected
by slim.losses. The losses are accumulated in tower_loss() and summed to
calculate the total loss.
Args:
logits: List of logits from inference(). Each entry is a 2-D float Tensor.
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
batch_size: integer
"""
if not batch_size:
batch_size = FLAGS.batch_size
# Reshape the labels into a dense Tensor of
# shape [FLAGS.batch_size, num_classes].
sparse_labels = tf.reshape(labels, [batch_size, 1])
indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
concated = tf.concat(1, [indices, sparse_labels])
num_classes = logits[0].get_shape()[-1].value
dense_labels = tf.sparse_to_dense(concated,
[batch_size, num_classes],
1.0, 0.0)
# Cross entropy loss for the main softmax prediction.
slim.losses.cross_entropy_loss(logits[0],
dense_labels,
label_smoothing=0.1,
weight=1.0)
# Cross entropy loss for the auxiliary softmax head.
slim.losses.cross_entropy_loss(logits[1],
dense_labels,
label_smoothing=0.1,
weight=0.4,
scope='aux_loss')
def _activation_summary(x):
"""Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measure the sparsity of activations.
Args:
x: Tensor
"""
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
tf.histogram_summary(tensor_name + '/activations', x)
tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def _activation_summaries(endpoints):
with tf.name_scope('summaries'):
for act in endpoints.values():
_activation_summary(act)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A library to train Inception using multiple GPU's with synchronous updates.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
from datetime import datetime
import os.path
import re
import time
import numpy as np
import tensorflow as tf
from inception import image_processing
from inception import inception_model as inception
from inception.slim import slim
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/tmp/imagenet_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 10000000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_string('subset', 'train',
"""Either 'train' or 'validation'.""")
# Flags governing the hardware employed for running TensorFlow.
tf.app.flags.DEFINE_integer('num_gpus', 1,
"""How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
# Flags governing the type of training.
tf.app.flags.DEFINE_boolean('fine_tune', False,
"""If set, randomly initialize the final layer """
"""of weights in order to train the network on a """
"""new task.""")
tf.app.flags.DEFINE_string('pretrained_model_checkpoint_path', '',
"""If specified, restore this pretrained model """
"""before beginning any training.""")
# **IMPORTANT**
# Please note that this learning rate schedule is heavily dependent on the
# hardware architecture, batch size and any changes to the model architecture
# specification. Selecting a finely tuned learning rate schedule is an
# empirical process that requires some experimentation. Please see README.md
# more guidance and discussion.
#
# With 8 Tesla K40's and a batch size = 256, the following setup achieves
# precision@1 = 73.5% after 100 hours and 100K steps (20 epochs).
# Learning rate decay factor selected from http://arxiv.org/abs/1404.5997.
tf.app.flags.DEFINE_float('initial_learning_rate', 0.1,
"""Initial learning rate.""")
tf.app.flags.DEFINE_float('num_epochs_per_decay', 30.0,
"""Epochs after which learning rate decays.""")
tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.16,
"""Learning rate decay factor.""")
# Constants dictating the learning rate schedule.
RMSPROP_DECAY = 0.9 # Decay term for RMSProp.
RMSPROP_MOMENTUM = 0.9 # Momentum in RMSProp.
RMSPROP_EPSILON = 1.0 # Epsilon term for RMSProp.
def _tower_loss(images, labels, num_classes, scope):
"""Calculate the total loss on a single tower running the ImageNet model.
We perform 'batch splitting'. This means that we cut up a batch across
multiple GPU's. For instance, if the batch size = 32 and num_gpus = 2,
then each tower will operate on an batch of 16 images.
Args:
images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
FLAGS.image_size, 3].
labels: 1-D integer Tensor of [batch_size].
num_classes: number of classes
scope: unique prefix string identifying the ImageNet tower, e.g.
'tower_0'.
Returns:
Tensor of shape [] containing the total loss for a batch of data
"""
# When fine-tuning a model, we do not restore the logits but instead we
# randomly initialize the logits. The number of classes in the output of the
# logit is the number of classes in specified Dataset.
restore_logits = not FLAGS.fine_tune
# Build inference Graph.
logits = inception.inference(images, num_classes, for_training=True,
restore_logits=restore_logits,
scope=scope)
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
split_batch_size = images.get_shape().as_list()[0]
inception.loss(logits, labels, batch_size=split_batch_size)
# Assemble all of the losses for the current tower only.
losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope)
# Calculate the total loss for the current tower.
regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
total_loss = tf.add_n(losses + regularization_losses, name='total_loss')
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summmary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on TensorBoard.
loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(loss_name +' (raw)', l)
tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]):
total_loss = tf.identity(total_loss)
return total_loss
def _average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(0, grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def train(dataset):
"""Train on dataset for a number of steps."""
with tf.Graph().as_default(), tf.device('/cpu:0'):
# Create a variable to count the number of train() calls. This equals the
# number of batches processed * FLAGS.num_gpus.
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0), trainable=False)
# Calculate the learning rate schedule.
num_batches_per_epoch = (dataset.num_examples_per_epoch() /
FLAGS.batch_size)
decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
global_step,
decay_steps,
FLAGS.learning_rate_decay_factor,
staircase=True)
# Create an optimizer that performs gradient descent.
opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY,
momentum=RMSPROP_MOMENTUM,
epsilon=RMSPROP_EPSILON)
# Get images and labels for ImageNet and split the batch across GPUs.
assert FLAGS.batch_size % FLAGS.num_gpus == 0, (
'Batch size must be divisible by number of GPUs')
split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus)
# Override the number of preprocessing threads to account for the increased
# number of GPU towers.
num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus
images, labels = image_processing.distorted_inputs(
dataset,
batch_size=split_batch_size,
num_preprocess_threads=num_preprocess_threads)
input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))
# Number of classes in the Dataset label set plus 1.
# Label 0 is reserved for an (unused) background class.
num_classes = dataset.num_classes() + 1
# Calculate the gradients for each model tower.
tower_grads = []
for i in xrange(FLAGS.num_gpus):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope:
# Calculate the loss for one tower of the ImageNet model. This
# function constructs the entire ImageNet model but shares the
# variables across all towers.
loss = _tower_loss(images, labels, num_classes, scope)
# Reuse variables for the next tower.
tf.get_variable_scope().reuse_variables()
# Retain the summaries from the final tower.
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
# Retain the Batch Normalization updates operations only from the
# final tower. Ideally, we should grab the updates from all towers
# but these stats accumulate extremely fast so we can ignore the
# other stats from the other towers without significant detriment.
batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION,
scope)
# Calculate the gradients for the batch of data on this ImageNet
# tower.
grads = opt.compute_gradients(loss)
# Keep track of the gradients across all towers.
tower_grads.append(grads)
# We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers.
grads = _average_gradients(tower_grads)
# Add a summaries for the input processing and global_step.
summaries.extend(input_summaries)
# Add a summary to track the learning rate.
summaries.append(tf.scalar_summary('learning_rate', lr))
# Add histograms for gradients.
for grad, var in grads:
if grad:
summaries.append(
tf.histogram_summary(var.op.name + '/gradients', grad))
# Apply the gradients to adjust the shared variables.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
summaries.append(tf.histogram_summary(var.op.name, var))
# Track the moving averages of all trainable variables.
# Note that we maintain a "double-average" of the BatchNormalization
# global statistics. This is more complicated then need be but we employ
# this for backward-compatibility with our previous models.
variable_averages = tf.train.ExponentialMovingAverage(
inception.MOVING_AVERAGE_DECAY, global_step)
# Another possiblility is to use tf.slim.get_variables().
variables_to_average = (tf.trainable_variables() +
tf.moving_average_variables())
variables_averages_op = variable_averages.apply(variables_to_average)
# Group all updates to into a single train op.
batchnorm_updates_op = tf.group(*batchnorm_updates)
train_op = tf.group(apply_gradient_op, variables_averages_op,
batchnorm_updates_op)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation from the last tower summaries.
summary_op = tf.merge_summary(summaries)
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph. allow_soft_placement must be set to
# True to build towers on GPU, as some of the ops do not have GPU
# implementations.
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
if FLAGS.pretrained_model_checkpoint_path:
assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
variables_to_restore = tf.get_collection(
slim.variables.VARIABLES_TO_RESTORE)
restorer = tf.train.Saver(variables_to_restore)
restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
print('%s: Pre-trained model restored from %s' %
(datetime.now(), FLAGS.pretrained_model_checkpoint_path))
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.train.SummaryWriter(
FLAGS.train_dir,
graph_def=sess.graph.as_graph_def(add_shapes=True))
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
examples_per_sec = FLAGS.batch_size / float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), step, loss_value,
examples_per_sec, duration))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
# Save the model checkpoint periodically.
if step % 5000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
# Description:
# Contains the operations and nets for building TensorFlow-Slim models.
package(default_visibility = ["//inception:internal"])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
py_library(
name = "scopes",
srcs = ["scopes.py"],
deps = [
"@tf//tensorflow:tensorflow_py",
],
)
py_test(
name = "scopes_test",
size = "small",
srcs = ["scopes_test.py"],
deps = [
":scopes",
],
)
py_library(
name = "variables",
srcs = ["variables.py"],
deps = [
"@tf//tensorflow:tensorflow_py",
":scopes",
],
)
py_test(
name = "variables_test",
size = "small",
srcs = ["variables_test.py"],
deps = [
":variables",
],
)
py_library(
name = "losses",
srcs = ["losses.py"],
deps = [
"@tf//tensorflow:tensorflow_py",
],
)
py_test(
name = "losses_test",
size = "small",
srcs = ["losses_test.py"],
deps = [
":losses",
],
)
py_library(
name = "ops",
srcs = ["ops.py"],
deps = [
"@tf//tensorflow:tensorflow_py",
":losses",
":scopes",
":variables",
],
)
py_test(
name = "ops_test",
size = "small",
srcs = ["ops_test.py"],
deps = [
":ops",
":variables",
],
)
py_library(
name = "inception",
srcs = ["inception_model.py"],
deps = [
"@tf//tensorflow:tensorflow_py",
":ops",
":scopes",
],
)
py_test(
name = "inception_test",
size = "medium",
srcs = ["inception_test.py"],
deps = [
":inception",
],
)
py_library(
name = "slim",
srcs = ["slim.py"],
deps = [
":inception",
":losses",
":ops",
":scopes",
":variables",
],
)
# TensorFlow-Slim
TF-Slim is a lightweight library for defining, training and evaluating models
in TensorFlow. It enables defining complex networks quickly and concisely while
keeping a model's architecture transparent and its hyperparameters explicit.
[TOC]
## Teaser
As a demonstration of the simplicity of using TF-Slim, compare the simplicity
of the code necessary for defining the entire
[VGG](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) network using TF-Slim
to the lengthy and verbose nature of defining just the first three layers (out
of 16) using native tensorflow:
```python{.good}
# VGG16 in TF-Slim.
def vgg16(inputs):
with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005):
net = slim.ops.repeat_op(2, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1')
net = slim.ops.max_pool(net, [2, 2], scope='pool1')
net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 128, [3, 3], scope='conv2')
net = slim.ops.max_pool(net, [2, 2], scope='pool2')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3')
net = slim.ops.max_pool(net, [2, 2], scope='pool3')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv4')
net = slim.ops.max_pool(net, [2, 2], scope='pool4')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv5')
net = slim.ops.max_pool(net, [2, 2], scope='pool5')
net = slim.ops.flatten(net, scope='flatten5')
net = slim.ops.fc(net, 4096, scope='fc6')
net = slim.ops.dropout(net, 0.5, scope='dropout6')
net = slim.ops.fc(net, 4096, scope='fc7')
net = slim.ops.dropout(net, 0.5, scope='dropout7')
net = slim.ops.fc(net, 1000, activation=None, scope='fc8')
return net
```
```python{.bad}
# Layers 1-3 (out of 16) of VGG16 in native tensorflow.
def vgg16(inputs):
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, stddev=1e-1), name='weights')
conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope)
with tf.name_scope('conv1_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope)
with tf.name_scope('pool1')
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1')
```
## Why TF-Slim?
TF-Slim offers several advantages over just the built-in tensorflow libraries:
* Allows one to define models much more compactly by eliminating
boilerplate code. This is accomplished through the use of
[argument scoping](scopes.py)
and numerous high level
[operations](ops.py).
These tools increase readability and maintainability, reduce the likelihood
of an error from copy-and-pasting hyperparameter values and simplifies
hyperparameter tuning.
* Makes developing models simple by providing commonly used
[loss functions](losses.py)
* Provides a concise
[definition](inception.py)
of [Inception v3](http://arxiv.org/abs/1512.00567) network architecture
ready to be used out-of-the-box or subsumed into new models.
Additionally TF-Slim was designed with several principles in mind:
* The various modules of TF-Slim (scopes, variables, ops, losses) are
independent. This flexibility allows users to pick and choose
components of TF-Slim completely à la carte.
* TF-Slim is written using a Functional Programming style. That means it's
super-lightweight and can be used right alongside any of TensorFlow's native
operations.
* Makes re-using network architectures easy. This allows users to build new
networks on top of existing ones as well as fine-tuning pre-trained models on
new tasks.
## What are the various components of TF-Slim?
TF-Slim is composed of several parts which were design to exist independently.
These include:
* [scopes.py](./scopes.py):
provides a new scope named `arg_scope` that allows a user to define default
arguments for specific operations within that scope.
* [variables.py](./variables.py):
provides convenience wrappers for variable creation and manipulation.
* [ops.py](./ops.py):
provides high level operations for building models using tensorflow.
* [losses.py](./losses.py):
contains commonly used loss functions.
## Defining Models
Models can be succinctly defined using TF-Slim by combining its variables,
operations and scopes. Each of these elements are defined below.
### Variables
Creating
[`Variables`](https://www.tensorflow.org/how_tos/variables/index.html)
in native tensorflow requires either a predefined value or an initialization
mechanism
(random, normally distributed). Furthermore, if a variable needs to be created
on a specific device, such as a GPU, the specification must be
[made explicit](https://www.tensorflow.org/how_tos/using_gpu/index.html).
To alleviate the code required for variable creation, TF-Slim provides a set
of thin wrapper functions in [variables.py](./variables.py)
which allow callers to easily define variables.
For example, to create a `weight` variable, initialize it using a truncated
normal distribution, regularize it with an `l2_loss` and place it on the `CPU`,
one need only declare the following:
```python
weights = variables.variable('weights',
shape=[10, 10, 3 , 3],
initializer=tf.truncated_normal_initializer(stddev=0.1),
regularizer=lambda t: losses.l2_loss(t, weight=0.05),
device='/cpu:0')
```
In addition to the functionality provided by `tf.Variable`, `slim.variables`
keeps track of the variables created by `slim.ops` to define a model, which
allows one to distinguish variables that belong to the model versus other
variables.
```python
# Get all the variables defined by the model.
model_variables = slim.variables.get_variables()
# Get all the variables with the same given name, i.e. 'weights', 'biases'.
weights = slim.variables.get_variables_by_name('weights')
biases = slim.variables.get_variables_by_name('biases')
# Get all the variables in VARIABLES_TO_RESTORE collection.
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
weights = variables.variable('weights',
shape=[10, 10, 3 , 3],
initializer=tf.truncated_normal_initializer(stddev=0.1),
regularizer=lambda t: losses.l2_loss(t, weight=0.05),
device='/cpu:0')
```
### Operations (Layers)
While the set of TensorFlow operations is quite extensive, builders of
neural networks typically think of models in terms of "layers". A layer,
such as a Convolutional Layer, a Fully Connected Layer or a BatchNorm Layer
are more abstract than a single TensorFlow operation and typically involve
many such operations. For example, a Convolutional Layer in a neural network
is built using several steps:
1. Creating the weight variables
2. Creating the bias variables
3. Convolving the weights with the input from the previous layer
4. Adding the biases to the result of the convolution.
In python code this can be rather laborious:
```python
input = ...
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(input, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope)
```
To alleviate the need to duplicate this code repeatedly, TF-Slim provides a
number of convenient operations defined at the (more abstract) level of
neural network layers. For example, compare the code above to an invocation
of the TF-Slim code:
```python
input = ...
net = slim.ops.conv2d(input, [3, 3], 128, scope='conv1_1')
```
TF-Slim provides numerous operations used in building neural networks which
roughly correspond to such layers. These include:
Layer | TF-Slim Op
------- | --------
Convolutional Layer | [ops.conv2d](ops.py)
Fully Connected Layer | [ops.fc](ops.py)
BatchNorm layer | [ops.batch_norm](ops.py)
Max Pooling Layer | [ops.max_pool](ops.py)
Avg Pooling Layer | [ops.avg_pool](ops.py)
Dropout Layer | [ops.dropout](ops.py)
[ops.py](./ops.py)
also includes operations that are not really "layers" per se, but are
often used to manipulate hidden unit representations during inference:
Operation | TF-Slim Op
------- | --------
Flatten | [ops.flatten](ops.py)
TF-Slim also provides a meta-operation called `repeat_op` that allows one to
repeatedly perform the same operation. Consider the following snippet from the
[VGG](https://www.robots.ox.ac.uk/~vgg/research/very_deep/) network whose layers
perform several convolutions in a row between pooling layers:
```python
net = ...
net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_1')
net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_2')
net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_3')
net = slim.ops.max_pool(net, [2, 2], scope='pool3')
```
This clear duplication of code can be removed via a standard loop:
```python
net = ...
for i in range(3):
net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1))
net = slim.ops.max_pool(net, [2, 2], scope='pool3')
```
While this does reduce the amount of duplication, it can be made even cleaner
by using the `RepeatOp`:
```python
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3')
net = slim.ops.max_pool(net, [2, 2], scope='pool2')
```
Notice that the RepeatOp not only applies the same argument in-line, it also
is smart enough to unroll the scopes such that the scopes assigned to each
subsequent call of `ops.conv2d` is appended with an underscore and iteration
number. More concretely, the scopes in the example above would be 'conv3_1',
'conv3_2' and 'conv3_3'.
### Scopes
In addition to the types of scope mechanisms in TensorFlow
([name_scope](https://www.tensorflow.org/api_docs/python/framework.html#name_scope),
[op_scope](https://www.tensorflow.org/api_docs/python/framework.html#op_scope),
[variable_scope](https://www.tensorflow.org/api_docs/python/state_ops.html#variable_scope),
[variable_op_scope](https://www.tensorflow.org/api_docs/python/state_ops.html#variable_op_scope)),
TF-Slim adds a new scoping mechanism called "argument scope" or
[arg_scope](scopes.py).
This new scope allows a user to specify one or more operations and a set of
arguments which will be passed to each of the operations defined in the
`arg_scope`. This functionality is best illustrated by example. Consider the
following code snippet:
```python
net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding='SAME', stddev=0.01, weight_decay=0.0005, scope='conv1')
net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', stddev=0.01, weight_decay=0.0005, scope='conv2')
net = slim.ops.conv2d(net, 256, [11, 11], padding='SAME', stddev=0.01, weight_decay=0.0005, scope='conv3')
```
It should be clear that these three Convolution layers share many of the same
hyperparameters. Two have the same padding, all three have the same weight_decay
and standard deviation of its weights. Not only do the duplicated values make
the code more difficult to read, it also adds the addition burder to the writer
of needing to doublecheck that all of the values are identical in each step.
One solution would be to specify default values using variables:
```python
padding='SAME'
stddev=0.01
weight_decay=0.0005
net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding=padding, stddev=stddev, weight_decay=weight_decay, scope='conv1')
net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', stddev=stddev, weight_decay=weight_decay, scope='conv2')
net = slim.ops.conv2d(net, 256, [11, 11], padding=padding, stddev=stddev, weight_decay=weight_decay, scope='conv3')
```
This solution ensures that all three convolutions share the exact same variable
values but doesn't reduce the code clutter. By using an `arg_scope`, we can both
ensure that each layer uses the same values and simplify the code:
```python
with slim.arg_scope([slim.ops.conv2d], padding='SAME', stddev=0.01, weight_decay=0.0005):
net = slim.ops.conv2d(inputs, 64, [11, 11], scope='conv1')
net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', scope='conv2')
net = slim.ops.conv2d(net, 256, [11, 11], scope='conv3')
```
As the example illustrates, the use of arg_scope makes the code cleaner,
simpler and easier to maintain. Notice that while argument values are specifed
in the arg_scope, they can be overwritten locally. In particular, while
the padding argument has been set to 'SAME', the second convolution overrides
it with the value of 'VALID'.
One can also nest `arg_scope`s and use multiple operations in the same scope.
For example:
```python
with arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005):
with arg_scope([slim.ops.conv2d], padding='SAME'), slim.arg_scope([slim.ops.fc], bias=1.0):
net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
net = slim.ops.conv2d(net, 256, [5, 5], stddev=0.03, scope='conv2')
net = slim.ops.flatten(net)
net = slim.ops.fc(net, 1000, activation=None, scope='fc')
```
In this example, the first `arg_scope` applies the same `stddev` and `weight_decay`
arguments to the `conv2d` and `fc` ops in its scope. In the second `arg_scope`,
additional default arguments to `conv2d` only are specified.
In addition to `arg_scope`, TF-Slim provides several decorators that wrap the
use of tensorflow arg scopes. These include `@AddArgScope`, `@AddNameScope`,
`@AddVariableScope`, `@AddOpScope` and `@AddVariableOpScope`. To illustrate
their use, consider the following example.
```python
def MyNewOp(inputs):
varA = ...
varB = ...
outputs = tf.mul(varA, inputs) + varB
return outputs
```
In this example, the user has created a new op which creates two variables. To
ensure that these variables exist within a certain variable scope (to avoid
collisions with variables with the same name), in standard TF, the op must be
called within a variable scope:
```python
inputs = ...
with tf.variable_scope('layer1'):
outputs = MyNewOp(inputs)
```
As an alternative, one can use TF-Slim's decorators to decorate the function
and simplify the call:
```python
@AddVariableScope
def MyNewOp(inputs):
...
return outputs
inputs = ...
outputs = MyNewOp('layer1')
```
The `@AddVariableScope` decorater simply applies the `tf.variable_scope` scoping
to the called function taking "layer1" as its argument. This allows the code
to be written more concisely.
### Losses
The loss function defines a quantity that we want to minimize. For
classification problems, this is typically the cross entropy between the true
(one-hot) distribution and the predicted probability distribution across
classes. For regression problems, this is often the sum-of-squares differences
between the predicted and true values.
Certain models, such as multi-task
learning models, require the use of multiple loss functions simultaneously. In
other words, the loss function ultimatey being minimized is the sum of various
other loss functions. For example, consider a model that predicts both
the type of scene in an image as well as the depth from the
camera of each pixel. This model's loss function would be the sum of the
classification loss and depth prediction loss.
TF-Slim provides an easy-to-use mechanism for defining and keeping track of
loss functions via the
[losses.py](./losses.py)
module. Consider the simple case where we want to train the VGG network:
```python
# Load the images and labels.
images, labels = ...
# Create the model.
predictions = ...
# Define the loss functions and get the total loss.
loss = losses.ClassificationLoss(predictions, labels)
```
In this example, we start by creating the model (using TF-Slim's VGG
implementation), and add the standard classification loss. Now, lets turn
to the case where we have a multi-task model that produces multiple outputs:
```python
# Load the images and labels.
images, scene_labels, depth_labels = ...
# Create the model.
scene_predictions, depth_predictions = CreateMultiTaskModel(images)
# Define the loss functions and get the total loss.
classification_loss = slim.losses.ClassificationLoss(scene_predictions, scene_labels)
sum_of_squares_loss = slim.losses.SumOfSquaresLoss(depth_predictions, depth_labels)
# The following two lines have the same effect:
total_loss1 = classification_loss + sum_of_squares_loss
total_loss2 = tf.get_collection(slim.losses.LOSSES_COLLECTION)
```
In this example, we have two losses which we add by calling
`losses.ClassificationLoss` and `losses.SumOfSquaresLoss`. We can obtain the
total loss by adding them together (`total_loss1`) or by calling
`losses.GetTotalLoss()`. How did this work?
When you create a loss function via TF-Slim, TF-Slim adds the loss to a
special TensorFlow collection of loss functions. This enables you to either
manage the total loss manually, or allow TF-Slim to manage them for you.
What if you want to let TF-Slim manage the losses for you but have a custom loss
function?
[losses.py](./losses.py)
also has a function that adds this loss to TF-Slims collection. For example:
```python
# Load the images and labels.
images, scene_labels, depth_labels, pose_labels = ...
# Create the model.
scene_predictions, depth_predictions, pose_predictions = CreateMultiTaskModel(images)
# Define the loss functions and get the total loss.
classification_loss = slim.losses.ClassificationLoss(scene_predictions, scene_labels)
sum_of_squares_loss = slim.losses.SumOfSquaresLoss(depth_predictions, depth_labels)
pose_loss = MyCustomLossFunction(pose_predictions, pose_labels)
tf.add_to_collection(slim.losses.LOSSES_COLLECTION, pose_loss) # Letting TF-Slim know about the additional loss.
# The following two lines have the same effect:
total_loss1 = classification_loss + sum_of_squares_loss + pose_loss
total_loss2 = losses.GetTotalLoss()
```
In this example, we can again either produce the total loss function manually
or let TF-Slim know about the additional loss and let TF-Slim handle the losses.
## Putting the Pieces Together
By combining TF-Slim Variables, Operations and scopes, we can write a normally
very complex network with very few lines of code. For example, the entire
[VGG](https://www.robots.ox.ac.uk/~vgg/research/very_deep/) architecture can be
defined with just the following snippet:
```python
with arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005):
net = slim.ops.repeat_op(1, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1')
net = slim.ops.max_pool(net, [2, 2], scope='pool1')
net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 128, [3, 3], scope='conv2')
net = slim.ops.max_pool(net, [2, 2], scope='pool2')
net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 256, [3, 3], scope='conv3')
net = slim.ops.max_pool(net, [2, 2], scope='pool3')
net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 512, [3, 3], scope='conv4')
net = slim.ops.max_pool(net, [2, 2], scope='pool4')
net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 512, [3, 3], scope='conv5')
net = slim.ops.max_pool(net, [2, 2], scope='pool5')
net = slim.ops.flatten(net, scope='flatten5')
net = slim.ops.fc(net, 4096, scope='fc6')
net = slim.ops.dropout(net, 0.5, scope='dropout6')
net = slim.ops.fc(net, 4096, scope='fc7')
net = slim.ops.dropout(net, 0.5, scope='dropout7')
net = slim.ops.fc(net, 1000, activation=None, scope='fc8')
return net
```
## Re-using previously defined network architectures and pre-trained models.
### Brief Recap on Restoring Variables from a Checkpoint
After a model has been trained, it can be restored using `tf.train.Saver()`
which restores `Variables` from a given checkpoint. For many cases,
`tf.train.Saver()` provides a simple mechanism to restore all or just a
few variables.
```python
# Create some variables.
v1 = tf.Variable(..., name="v1")
v2 = tf.Variable(..., name="v2")
...
# Add ops to restore all the variables.
restorer = tf.train.Saver()
# Add ops to restore some variables.
restorer = tf.train.Saver([v1, v2])
# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
# Restore variables from disk.
restorer.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Do some work with the model
...
```
See [Restoring Variables](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html#restoring-variables)
and
[Choosing which Variables to Save and Restore](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html#choosing-which-variables-to-save-and-restore)
sections of the [Variables](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html)
page for more details.
### Using slim.variables to Track which Variables need to be Restored
It is often desirable to fine-tune a pre-trained model on an entirely new
dataset or even a new task. In these situations, one must specify which layers
of the model should be reused (and consequently loaded from a checkpoint)
and which layers are new. Indicating which variables or layers should be
restored is a process that quickly becomes cumbersome when done manually.
To help keep track of which variables to restore, `slim.variables` provides a
`restore` argument when creating each Variable. By default, all variables are
marked as `restore=True`, which results in all variables defined by the model
being restored.
```python
# Create some variables.
v1 = slim.variables.variable(name="v1", ..., restore=False)
v2 = slim.variables.variable(name="v2", ...) # By default restore=True
...
# Get list of variables to restore (which contains only 'v2')
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
restorer = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
# Restore variables from disk.
restorer.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Do some work with the model
...
```
Additionally, every layer in `slim.ops` that creates slim.variables (such as
`slim.ops.conv2d`, `slim.ops.fc`, `slim.ops.batch_norm`) also has a `restore`
argument which controls whether the variables created by that layer should be
restored or not.
```python
# Create a small network.
net = slim.ops.conv2d(images, 32, [7, 7], stride=2, scope='conv1')
net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2')
net = slim.ops.conv2d(net, 128, [3, 3], scope='conv3')
net = slim.ops.max_pool(net, [3, 3], stride=2, scope='pool3')
net = slim.ops.flatten(net)
net = slim.ops.fc(net, 10, scope='logits', restore=False)
...
# VARIABLES_TO_RESTORE would contain the 'weights' and 'bias' defined by 'conv1'
# 'conv2' and 'conv3' but not the ones defined by 'logits'
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
# Create a restorer that would restore only the needed variables.
restorer = tf.train.Saver(variables_to_restore)
# Create a saver that would save all the variables (including 'logits').
saver = tf.train.Saver()
with tf.Session() as sess:
# Restore variables from disk.
restorer.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Do some work with the model
...
saver.save(sess, "/tmp/new_model.ckpt")
```
Note: When restoring variables from a checkpoint, the `Saver` locates the
variable names in a checkpoint file and maps them to variables in the current
graph. Above, we created a saver by passing to it a list of variables. In this
case, the names of the variables to locate in the checkpoint file were
implicitly obtained from each provided variable's `var.op.name`.
This works well when the variable names in the checkpoint file match those in
the graph. However, sometimes, we want to restore a model from a checkpoint
whose variables have different names those in the current graph. In this case,
we must provide the `Saver` a dictionary that maps from each checkpoint variable
name to each graph variable. Consider the following example where the checkpoint
variables names are obtained via a simple function:
```python
# Assuming than 'conv1/weights' should be restored from 'vgg16/conv1/weights'
def name_in_checkpoint(var):
return 'vgg16/' + var.op.name
# Assuming than 'conv1/weights' and 'conv1/bias' should be restored from 'conv1/params1' and 'conv1/params2'
def name_in_checkpoint(var):
if "weights" in var.op.name:
return var.op.name.replace("weights", "params1")
if "bias" in var.op.name:
return var.op.name.replace("bias", "params2")
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
variables_to_restore = {name_in_checkpoint(var):var for var in variables_to_restore}
restorer = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
# Restore variables from disk.
restorer.restore(sess, "/tmp/model.ckpt")
```
### Reusing the VGG16 network defined in TF-Slim on a different task, i.e. PASCAL-VOC.
Assuming one have already a pre-trained VGG16 model, one just need to replace
the last layer `fc8` with a new layer `fc8_pascal` and use `restore=False`.
```python
def vgg16_pascal(inputs):
with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005):
net = slim.ops.repeat_op(2, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1')
net = slim.ops.max_pool(net, [2, 2], scope='pool1')
net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 128, [3, 3], scope='conv2')
net = slim.ops.max_pool(net, [2, 2], scope='pool2')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3')
net = slim.ops.max_pool(net, [2, 2], scope='pool3')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv4')
net = slim.ops.max_pool(net, [2, 2], scope='pool4')
net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv5')
net = slim.ops.max_pool(net, [2, 2], scope='pool5')
net = slim.ops.flatten(net, scope='flatten5')
net = slim.ops.fc(net, 4096, scope='fc6')
net = slim.ops.dropout(net, 0.5, scope='dropout6')
net = slim.ops.fc(net, 4096, scope='fc7')
net = slim.ops.dropout(net, 0.5, scope='dropout7')
# To reuse vgg16 on PASCAL-VOC, just change the last layer.
net = slim.ops.fc(net, 21, activation=None, scope='fc8_pascal', restore=False)
return net
```
## Authors
Sergio Guadarrama and Nathan Silberman
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception-v3 expressed in TensorFlow-Slim.
Usage:
# Parameters for BatchNorm.
batch_norm_params = {
# Decay for the batch_norm moving averages.
'decay': BATCHNORM_MOVING_AVERAGE_DECAY,
# epsilon to prevent 0s in variance.
'epsilon': 0.001,
}
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004):
with slim.arg_scope([slim.ops.conv2d],
stddev=0.1,
activation=tf.nn.relu,
batch_norm_params=batch_norm_params):
# Force all Variables to reside on the CPU.
with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
logits, endpoints = slim.inception.inception_v3(
images,
dropout_keep_prob=0.8,
num_classes=num_classes,
is_training=for_training,
restore_logits=restore_logits,
scope=scope)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import ops
from inception.slim import scopes
def inception_v3(inputs,
dropout_keep_prob=0.8,
num_classes=1000,
is_training=True,
restore_logits=True,
scope=''):
"""Latest Inception from http://arxiv.org/abs/1512.00567.
"Rethinking the Inception Architecture for Computer Vision"
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
Zbigniew Wojna
Args:
inputs: a tensor of size [batch_size, height, width, channels].
dropout_keep_prob: dropout keep_prob.
num_classes: number of predicted classes.
is_training: whether is training or not.
restore_logits: whether or not the logits layers should be restored.
Useful for fine-tuning a model with different num_classes.
scope: Optional scope for op_scope.
Returns:
a list containing 'logits', 'aux_logits' Tensors.
"""
# end_points will collect relevant activations for external use, for example
# summaries or losses.
end_points = {}
with tf.op_scope([inputs], scope, 'inception_v3'):
with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout],
is_training=is_training):
with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
stride=1, padding='VALID'):
# 299 x 299 x 3
end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2,
scope='conv0')
# 149 x 149 x 32
end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3],
scope='conv1')
# 147 x 147 x 32
end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3],
padding='SAME', scope='conv2')
# 147 x 147 x 64
end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3],
stride=2, scope='pool1')
# 73 x 73 x 64
end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1],
scope='conv3')
# 71 x 71 x 80.
end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3],
scope='conv4')
# 69 x 69 x 192.
end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3],
stride=2, scope='pool2')
# 35 x 35 x 192.
net = end_points['pool2']
# Inception blocks
with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool],
stride=1, padding='SAME'):
# mixed: 35 x 35 x 256.
with tf.variable_scope('mixed_35x35x256a'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 64, [1, 1])
with tf.variable_scope('branch5x5'):
branch5x5 = ops.conv2d(net, 48, [1, 1])
branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 64, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool])
end_points['mixed_35x35x256a'] = net
# mixed_1: 35 x 35 x 288.
with tf.variable_scope('mixed_35x35x288a'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 64, [1, 1])
with tf.variable_scope('branch5x5'):
branch5x5 = ops.conv2d(net, 48, [1, 1])
branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 64, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool])
end_points['mixed_35x35x288a'] = net
# mixed_2: 35 x 35 x 288.
with tf.variable_scope('mixed_35x35x288b'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 64, [1, 1])
with tf.variable_scope('branch5x5'):
branch5x5 = ops.conv2d(net, 48, [1, 1])
branch5x5 = ops.conv2d(branch5x5, 64, [5, 5])
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 64, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
net = tf.concat(3, [branch1x1, branch5x5, branch3x3dbl, branch_pool])
end_points['mixed_35x35x288b'] = net
# mixed_3: 17 x 17 x 768.
with tf.variable_scope('mixed_17x17x768a'):
with tf.variable_scope('branch3x3'):
branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID')
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 64, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3])
branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3],
stride=2, padding='VALID')
with tf.variable_scope('branch_pool'):
branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
net = tf.concat(3, [branch3x3, branch3x3dbl, branch_pool])
end_points['mixed_17x17x768a'] = net
# mixed4: 17 x 17 x 768.
with tf.variable_scope('mixed_17x17x768b'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 192, [1, 1])
with tf.variable_scope('branch7x7'):
branch7x7 = ops.conv2d(net, 128, [1, 1])
branch7x7 = ops.conv2d(branch7x7, 128, [1, 7])
branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
with tf.variable_scope('branch7x7dbl'):
branch7x7dbl = ops.conv2d(net, 128, [1, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7])
branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool])
end_points['mixed_17x17x768b'] = net
# mixed_5: 17 x 17 x 768.
with tf.variable_scope('mixed_17x17x768c'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 192, [1, 1])
with tf.variable_scope('branch7x7'):
branch7x7 = ops.conv2d(net, 160, [1, 1])
branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
with tf.variable_scope('branch7x7dbl'):
branch7x7dbl = ops.conv2d(net, 160, [1, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool])
end_points['mixed_17x17x768c'] = net
# mixed_6: 17 x 17 x 768.
with tf.variable_scope('mixed_17x17x768d'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 192, [1, 1])
with tf.variable_scope('branch7x7'):
branch7x7 = ops.conv2d(net, 160, [1, 1])
branch7x7 = ops.conv2d(branch7x7, 160, [1, 7])
branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
with tf.variable_scope('branch7x7dbl'):
branch7x7dbl = ops.conv2d(net, 160, [1, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7])
branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool])
end_points['mixed_17x17x768d'] = net
# mixed_7: 17 x 17 x 768.
with tf.variable_scope('mixed_17x17x768e'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 192, [1, 1])
with tf.variable_scope('branch7x7'):
branch7x7 = ops.conv2d(net, 192, [1, 1])
branch7x7 = ops.conv2d(branch7x7, 192, [1, 7])
branch7x7 = ops.conv2d(branch7x7, 192, [7, 1])
with tf.variable_scope('branch7x7dbl'):
branch7x7dbl = ops.conv2d(net, 192, [1, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1])
branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch7x7, branch7x7dbl, branch_pool])
end_points['mixed_17x17x768e'] = net
# Auxiliary Head logits
aux_logits = tf.identity(end_points['mixed_17x17x768e'])
with tf.variable_scope('aux_logits'):
aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3,
padding='VALID')
aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj')
# Shape of feature map before the final layer.
shape = aux_logits.get_shape()
aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01,
padding='VALID')
aux_logits = ops.flatten(aux_logits)
aux_logits = ops.fc(aux_logits, num_classes, activation=None,
stddev=0.001, restore=restore_logits)
end_points['aux_logits'] = aux_logits
# mixed_8: 17 x 17 x 1280.
with tf.variable_scope('mixed_17x17x1280a'):
with tf.variable_scope('branch3x3'):
branch3x3 = ops.conv2d(net, 192, [1, 1])
branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2,
padding='VALID')
with tf.variable_scope('branch7x7x3'):
branch7x7x3 = ops.conv2d(net, 192, [1, 1])
branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7])
branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1])
branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3],
stride=2, padding='VALID')
with tf.variable_scope('branch_pool'):
branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
net = tf.concat(3, [branch3x3, branch7x7x3, branch_pool])
end_points['mixed_17x17x1280a'] = net
# mixed_9: 8 x 8 x 2048.
with tf.variable_scope('mixed_8x8x2048a'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 320, [1, 1])
with tf.variable_scope('branch3x3'):
branch3x3 = ops.conv2d(net, 384, [1, 1])
branch3x3 = tf.concat(3, [ops.conv2d(branch3x3, 384, [1, 3]),
ops.conv2d(branch3x3, 384, [3, 1])])
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 448, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
branch3x3dbl = tf.concat(3, [ops.conv2d(branch3x3dbl, 384, [1, 3]),
ops.conv2d(branch3x3dbl, 384, [3, 1])])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch3x3, branch3x3dbl, branch_pool])
end_points['mixed_8x8x2048a'] = net
# mixed_10: 8 x 8 x 2048.
with tf.variable_scope('mixed_8x8x2048b'):
with tf.variable_scope('branch1x1'):
branch1x1 = ops.conv2d(net, 320, [1, 1])
with tf.variable_scope('branch3x3'):
branch3x3 = ops.conv2d(net, 384, [1, 1])
branch3x3 = tf.concat(3, [ops.conv2d(branch3x3, 384, [1, 3]),
ops.conv2d(branch3x3, 384, [3, 1])])
with tf.variable_scope('branch3x3dbl'):
branch3x3dbl = ops.conv2d(net, 448, [1, 1])
branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
branch3x3dbl = tf.concat(3, [ops.conv2d(branch3x3dbl, 384, [1, 3]),
ops.conv2d(branch3x3dbl, 384, [3, 1])])
with tf.variable_scope('branch_pool'):
branch_pool = ops.avg_pool(net, [3, 3])
branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
net = tf.concat(3, [branch1x1, branch3x3, branch3x3dbl, branch_pool])
end_points['mixed_8x8x2048b'] = net
# Final pooling and prediction
with tf.variable_scope('logits'):
shape = net.get_shape()
net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool')
# 1 x 1 x 2048
net = ops.dropout(net, dropout_keep_prob, scope='dropout')
net = ops.flatten(net, scope='flatten')
# 2048
logits = ops.fc(net, num_classes, activation=None, scope='logits',
restore=restore_logits)
# 1000
end_points['logits'] = logits
end_points['predictions'] = tf.nn.softmax(logits, name='predictions')
return logits, end_points
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.inception."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import inception_model as inception
class InceptionTest(tf.test.TestCase):
def testBuildLogits(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testBuildEndPoints(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue('logits' in end_points)
logits = end_points['logits']
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('aux_logits' in end_points)
aux_logits = end_points['aux_logits']
self.assertListEqual(aux_logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['mixed_8x8x2048b']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 8, 8, 2048])
def testHalfSizeImages(self):
batch_size = 5
height, width = 150, 150
num_classes = 1000
with self.test_session():
inputs = tf.random_uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['mixed_8x8x2048b']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 2048])
def testUnknowBatchSize(self):
batch_size = 1
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random_uniform((batch_size, height, width, 3))
sess.run(tf.initialize_all_variables())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v3(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(logits, 1)
sess.run(tf.initialize_all_variables())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 150, 150
num_classes = 1000
with self.test_session() as sess:
train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
inception.inception_v3(train_inputs, num_classes)
tf.get_variable_scope().reuse_variables()
eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_v3(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(logits, 1)
sess.run(tf.initialize_all_variables())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains convenience wrappers for various Neural Network TensorFlow losses.
All the losses defined here add themselves to the LOSSES_COLLECTION
collection.
l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso.
l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay.
cross_entropy_loss: Define a cross entropy loss using
softmax_cross_entropy_with_logits. Useful for classification.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
# In order to gather all losses in a network, the user should use this
# key for get_collection, i.e:
# losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
LOSSES_COLLECTION = '_losses'
def l1_loss(tensor, weight=1.0, scope=None):
"""Define a L1Loss, useful for regularize, i.e. lasso.
Args:
tensor: tensor to regularize.
weight: scale the loss by this factor.
scope: Optional scope for op_scope.
Returns:
the L1 loss op.
"""
with tf.op_scope([tensor], scope, 'L1Loss'):
weight = tf.convert_to_tensor(weight,
dtype=tensor.dtype.base_dtype,
name='loss_weight')
loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value')
tf.add_to_collection(LOSSES_COLLECTION, loss)
return loss
def l2_loss(tensor, weight=1.0, scope=None):
"""Define a L2Loss, useful for regularize, i.e. weight decay.
Args:
tensor: tensor to regularize.
weight: an optional weight to modulate the loss.
scope: Optional scope for op_scope.
Returns:
the L2 loss op.
"""
with tf.op_scope([tensor], scope, 'L2Loss'):
weight = tf.convert_to_tensor(weight,
dtype=tensor.dtype.base_dtype,
name='loss_weight')
loss = tf.mul(weight, tf.nn.l2_loss(tensor), name='value')
tf.add_to_collection(LOSSES_COLLECTION, loss)
return loss
def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0,
weight=1.0, scope=None):
"""Define a Cross Entropy loss using softmax_cross_entropy_with_logits.
It can scale the loss by weight factor, and smooth the labels.
Args:
logits: [batch_size, num_classes] logits outputs of the network .
one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels.
label_smoothing: if greater than 0 then smooth the labels.
weight: scale the loss by this factor.
scope: Optional scope for op_scope.
Returns:
A tensor with the softmax_cross_entropy loss.
"""
logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape())
with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'):
num_classes = one_hot_labels.get_shape()[-1].value
one_hot_labels = tf.cast(one_hot_labels, logits.dtype)
if label_smoothing > 0:
smooth_positives = 1.0 - label_smoothing
smooth_negatives = label_smoothing / num_classes
one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
one_hot_labels,
name='xentropy')
weight = tf.convert_to_tensor(weight,
dtype=logits.dtype.base_dtype,
name='loss_weight')
loss = tf.mul(weight, tf.reduce_mean(cross_entropy), name='value')
tf.add_to_collection(LOSSES_COLLECTION, loss)
return loss
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.losses."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import losses
class LossesTest(tf.test.TestCase):
def testL1Loss(self):
with self.test_session():
shape = [5, 5, 5]
num_elem = 5 * 5 * 5
weights = tf.constant(1.0, shape=shape)
wd = 0.01
loss = losses.l1_loss(weights, wd)
self.assertEquals(loss.op.name, 'L1Loss/value')
self.assertAlmostEqual(loss.eval(), num_elem * wd, 5)
def testL2Loss(self):
with self.test_session():
shape = [5, 5, 5]
num_elem = 5 * 5 * 5
weights = tf.constant(1.0, shape=shape)
wd = 0.01
loss = losses.l2_loss(weights, wd)
self.assertEquals(loss.op.name, 'L2Loss/value')
self.assertAlmostEqual(loss.eval(), num_elem * wd / 2, 5)
class CrossEntropyLossTest(tf.test.TestCase):
def testCrossEntropyLossAllCorrect(self):
with self.test_session():
logits = tf.constant([[10.0, 0.0, 0.0],
[0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = tf.constant([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
loss = losses.cross_entropy_loss(logits, labels)
self.assertEquals(loss.op.name, 'CrossEntropyLoss/value')
self.assertAlmostEqual(loss.eval(), 0.0, 3)
def testCrossEntropyLossAllWrong(self):
with self.test_session():
logits = tf.constant([[10.0, 0.0, 0.0],
[0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = tf.constant([[0, 0, 1],
[1, 0, 0],
[0, 1, 0]])
loss = losses.cross_entropy_loss(logits, labels)
self.assertEquals(loss.op.name, 'CrossEntropyLoss/value')
self.assertAlmostEqual(loss.eval(), 10.0, 3)
def testCrossEntropyLossAllWrongWithWeight(self):
with self.test_session():
logits = tf.constant([[10.0, 0.0, 0.0],
[0.0, 10.0, 0.0],
[0.0, 0.0, 10.0]])
labels = tf.constant([[0, 0, 1],
[1, 0, 0],
[0, 1, 0]])
loss = losses.cross_entropy_loss(logits, labels, weight=0.5)
self.assertEquals(loss.op.name, 'CrossEntropyLoss/value')
self.assertAlmostEqual(loss.eval(), 5.0, 3)
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains convenience wrappers for typical Neural Network TensorFlow layers.
Additionally it maintains a collection with update_ops that need to be
updated after the ops have been computed, for exmaple to update moving means
and moving variances of batch_norm.
Ops that have different behavior during training or eval have an is_training
parameter. Additionally Ops that contain variables.variable have a trainable
parameter, which control if the ops variables are trainable or not.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.training import moving_averages
from inception.slim import losses
from inception.slim import scopes
from inception.slim import variables
# Used to keep the update ops done by batch_norm.
UPDATE_OPS_COLLECTION = '_update_ops_'
@scopes.add_arg_scope
def batch_norm(inputs,
decay=0.999,
scale=False,
epsilon=0.001,
moving_vars='moving_vars',
activation=None,
is_training=True,
trainable=True,
restore=True,
scope=None):
"""Adds a Batch Normalization layer.
Args:
inputs: a tensor of size [batch_size, height, width, channels]
or [batch_size, channels].
decay: decay for the moving average.
scale: If True, multiply by gamma. If False, gamma is
not used. When the next layer is linear (also e.g. ReLU), this can be
disabled since the scaling can be done by the next layer.
epsilon: small float added to variance to avoid dividing by zero.
moving_vars: collection to store the moving_mean and moving_variance.
activation: activation function.
is_training: whether or not the model is in training mode.
trainable: whether or not the variables should be trainable or not.
restore: whether or not the variables should be marked for restore.
scope: Optional scope for variable_op_scope.
Returns:
a tensor representing the output of the operation.
"""
inputs_shape = inputs.get_shape()
with tf.variable_op_scope([inputs], scope, 'BatchNorm'):
axis = range(len(inputs_shape) - 1)
params_shape = inputs_shape[-1:]
with scopes.arg_scope([variables.variable], restore=restore):
# Allocate parameters for the beta and gamma of the normalization.
beta = variables.variable('beta',
params_shape,
initializer=tf.zeros_initializer,
trainable=trainable)
if scale:
gamma = variables.variable('gamma',
params_shape,
initializer=tf.ones,
trainable=trainable)
else:
gamma = None
# Create moving_mean and moving_variance add them to moving_vars and
# GraphKeys.MOVING_AVERAGE_VARIABLES collections.
with scopes.arg_scope([variables.variable], trainable=False,
collections=[
moving_vars,
tf.GraphKeys.MOVING_AVERAGE_VARIABLES]):
moving_mean = variables.variable('moving_mean',
params_shape,
initializer=tf.zeros_initializer)
moving_variance = variables.variable('moving_variance',
params_shape,
initializer=tf.ones)
if is_training:
# Calculate the moments based on the individual batch.
mean, variance = tf.nn.moments(inputs, axis)
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, mean, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, variance, decay)
tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
else:
# Just use the moving_mean and moving_variance.
mean = moving_mean
variance = moving_variance
# Normalize the activations.
outputs = tf.nn.batch_normalization(
inputs, mean, variance, beta, gamma, epsilon)
outputs.set_shape(inputs.get_shape())
if activation:
outputs = activation(outputs)
return outputs
@scopes.add_arg_scope
def conv2d(inputs,
num_filters_out,
kernel_size,
stride=1,
padding='SAME',
activation=tf.nn.relu,
stddev=0.01,
bias=0.0,
weight_decay=0,
batch_norm_params=None,
is_training=True,
trainable=True,
restore=True,
scope=None):
"""Adds a 2D convolution followed by an optional batch_norm layer.
conv2d creates a variable called 'weights', representing the convolutional
kernel, that is convolved with the input. If `batch_norm_params` is None, a
second variable called 'biases' is added to the result of the convolution
operation.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_filters_out: the number of output filters.
kernel_size: a 2-D list comprising of the height and width of the filters.
stride: the stride in height and width of the convolution.
padding: one of 'VALID' or 'SAME'.
activation: activation function.
stddev: standard deviation of the truncated guassian weight distribution.
bias: the initial value of the biases.
weight_decay: the weight decay.
batch_norm_params: parameters for the batch_norm. If is None don't use it.
is_training: whether or not the model is in training mode.
trainable: whether or not the variables should be trainable or not.
restore: whether or not the variables should be marked for restore.
scope: Optional scope for variable_op_scope.
Returns:
a tensor representing the output of the operation.
Raises:
ValueError: if 'kernel_size' is not a 2-D list.
"""
if len(kernel_size) != 2:
raise ValueError('kernel_size must be a 2-D list.')
with tf.variable_op_scope([inputs], scope, 'Conv'):
num_filters_in = inputs.get_shape()[-1]
weights_shape = [kernel_size[0], kernel_size[1],
num_filters_in, num_filters_out]
weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
l2_regularizer = lambda t: losses.l2_loss(t, weight_decay)
weights = variables.variable('weights',
shape=weights_shape,
initializer=weights_initializer,
regularizer=l2_regularizer,
trainable=trainable,
restore=restore)
conv = tf.nn.conv2d(inputs, weights, [1, stride, stride, 1],
padding=padding)
if batch_norm_params is not None:
with scopes.arg_scope([batch_norm], is_training=is_training,
trainable=trainable, restore=restore):
outputs = batch_norm(conv, **batch_norm_params)
else:
bias_shape = [num_filters_out,]
bias_initializer = tf.constant_initializer(bias)
biases = variables.variable('biases',
shape=bias_shape,
initializer=bias_initializer,
trainable=trainable,
restore=restore)
outputs = tf.nn.bias_add(conv, biases)
if activation:
outputs = activation(outputs)
return outputs
@scopes.add_arg_scope
def fc(inputs,
num_units_out,
activation=tf.nn.relu,
stddev=0.01,
bias=0.0,
weight_decay=0,
batch_norm_params=None,
is_training=True,
trainable=True,
restore=True,
scope=None):
"""Adds a fully connected layer followed by an optional batch_norm layer.
FC creates a variable called 'weights', representing the fully connected
weight matrix, that is multiplied by the input. If `batch_norm` is None, a
second variable called 'biases' is added to the result of the initial
vector-matrix multiplication.
Args:
inputs: a [B x N] tensor where B is the batch size and N is the number of
input units in the layer.
num_units_out: the number of output units in the layer.
activation: activation function.
stddev: the standard deviation for the weights.
bias: the initial value of the biases.
weight_decay: the weight decay.
batch_norm_params: parameters for the batch_norm. If is None don't use it.
is_training: whether or not the model is in training mode.
trainable: whether or not the variables should be trainable or not.
restore: whether or not the variables should be marked for restore.
scope: Optional scope for variable_op_scope.
Returns:
the tensor variable representing the result of the series of operations.
"""
with tf.variable_op_scope([inputs], scope, 'FC'):
num_units_in = inputs.get_shape()[1]
weights_shape = [num_units_in, num_units_out]
weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
l2_regularizer = lambda t: losses.l2_loss(t, weight_decay)
weights = variables.variable('weights',
shape=weights_shape,
initializer=weights_initializer,
regularizer=l2_regularizer,
trainable=trainable,
restore=restore)
if batch_norm_params is not None:
outputs = tf.matmul(inputs, weights)
with scopes.arg_scope([batch_norm], is_training=is_training,
trainable=trainable, restore=restore):
outputs = batch_norm(outputs, **batch_norm_params)
else:
bias_shape = [num_units_out,]
bias_initializer = tf.constant_initializer(bias)
biases = variables.variable('biases',
shape=bias_shape,
initializer=bias_initializer,
trainable=trainable,
restore=restore)
outputs = tf.nn.xw_plus_b(inputs, weights, biases)
if activation:
outputs = activation(outputs)
return outputs
def one_hot_encoding(labels, num_classes, scope=None):
"""Transform numeric labels into onehot_labels.
Args:
labels: [batch_size] target labels.
num_classes: total number of classes.
scope: Optional scope for op_scope.
Returns:
one hot encoding of the labels.
"""
with tf.op_scope([labels], scope, 'OneHotEncoding'):
batch_size = labels.get_shape()[0]
indices = tf.expand_dims(tf.range(0, batch_size), 1)
labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
concated = tf.concat(1, [indices, labels])
onehot_labels = tf.sparse_to_dense(
concated, tf.pack([batch_size, num_classes]), 1.0, 0.0)
onehot_labels.set_shape([batch_size, num_classes])
return onehot_labels
@scopes.add_arg_scope
def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
"""Adds a Max Pooling layer.
It is assumed by the wrapper that the pooling is only done per image and not
in depth or batch.
Args:
inputs: a tensor of size [batch_size, height, width, depth].
kernel_size: the size of the pooling kernel over which the op is computed.
stride: the stride in height and width of the convolution.
padding: the padding method, either 'VALID' or 'SAME'.
scope: Optional scope for op_scope.
Returns:
a tensor representing the results of the pooling operation.
Raises:
ValueError: if 'kernel_size' is not a 2-D list
"""
if len(kernel_size) != 2:
raise ValueError('kernel_size must be a 2-D list.')
with tf.op_scope([inputs], scope, 'MaxPool'):
return tf.nn.max_pool(inputs,
ksize=[1, kernel_size[0], kernel_size[1], 1],
strides=[1, stride, stride, 1],
padding=padding)
@scopes.add_arg_scope
def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None):
"""Adds a Avg Pooling layer.
It is assumed by the wrapper that the pooling is only done per image and not
in depth or batch.
Args:
inputs: a tensor of size [batch_size, height, width, depth].
kernel_size: the size of the pooling kernel over which the op is computed.
stride: the stride in height and width of the convolution.
padding: the padding method, either 'VALID' or 'SAME'.
scope: Optional scope for op_scope.
Returns:
a tensor representing the results of the pooling operation.
Raises:
ValueError: if 'kernel_size' is not a 2-D list
"""
if len(kernel_size) != 2:
raise ValueError('kernel_size must be a 2-D list.')
with tf.op_scope([inputs], scope, 'AvgPool'):
return tf.nn.avg_pool(inputs,
ksize=[1, kernel_size[0], kernel_size[1], 1],
strides=[1, stride, stride, 1],
padding=padding)
@scopes.add_arg_scope
def dropout(inputs, keep_prob=0.5, is_training=True, scope=None):
"""Returns a dropout layer applied to the input.
Args:
inputs: the tensor to pass to the Dropout layer.
keep_prob: the probability of dropping each input unit.
is_training: whether or not the model is in training mode. If so, dropout is
applied and values scaled. Otherwise, inputs is returned.
scope: Optional scope for op_scope.
Returns:
a tensor representing the output of the operation.
"""
if is_training and keep_prob > 0:
with tf.op_scope([inputs], scope, 'Dropout'):
return tf.nn.dropout(inputs, keep_prob)
else:
return inputs
def flatten(inputs, scope=None):
"""Flattens the input while maintaining the batch_size.
Assumes that the first dimension represents the batch.
Args:
inputs: a tensor of size [batch_size, ...].
scope: Optional scope for op_scope.
Returns:
a flattened tensor with shape [batch_size, k].
Raises:
ValueError: if inputs.shape is wrong.
"""
if len(inputs.get_shape()) < 2:
raise ValueError('Inputs must be have a least 2 dimensions')
dims = inputs.get_shape()[1:]
k = dims.num_elements()
with tf.op_scope([inputs], scope, 'Flatten'):
return tf.reshape(inputs, [-1, k])
def repeat_op(repetitions, inputs, op, *args, **kwargs):
"""Build a sequential Tower starting from inputs by using an op repeatedly.
It creates new scopes for each operation by increasing the counter.
Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1')
it will repeat the given op under the following variable_scopes:
conv1/Conv
conv1/Conv_1
conv1/Conv_2
Args:
repetitions: number or repetitions.
inputs: a tensor of size [batch_size, height, width, channels].
op: an operation.
*args: args for the op.
**kwargs: kwargs for the op.
Returns:
a tensor result of applying the operation op, num times.
Raises:
ValueError: if the op is unknown or wrong.
"""
scope = kwargs.pop('scope', None)
with tf.variable_op_scope([inputs], scope, 'RepeatOp'):
tower = inputs
for _ in range(repetitions):
tower = op(tower, *args, **kwargs)
return tower
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
from inception.slim import losses
from inception.slim import ops
from inception.slim import scopes
from inception.slim import variables
class ConvTest(tf.test.TestCase):
def testCreateConv(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.conv2d(images, 32, [3, 3])
self.assertEquals(output.op.name, 'Conv/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 32])
def testCreateConvCreatesWeightsAndBiasesVars(self):
height, width = 3, 3
images = tf.random_uniform((5, height, width, 3), seed=1)
with self.test_session():
self.assertFalse(variables.get_variables('conv1/weights'))
self.assertFalse(variables.get_variables('conv1/biases'))
ops.conv2d(images, 32, [3, 3], scope='conv1')
self.assertTrue(variables.get_variables('conv1/weights'))
self.assertTrue(variables.get_variables('conv1/biases'))
def testCreateConvWithScope(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.conv2d(images, 32, [3, 3], scope='conv1')
self.assertEquals(output.op.name, 'conv1/Relu')
def testCreateConvWithoutActivation(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.conv2d(images, 32, [3, 3], activation=None)
self.assertEquals(output.op.name, 'Conv/BiasAdd')
def testCreateConvValid(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.conv2d(images, 32, [3, 3], padding='VALID')
self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 32])
def testCreateConvWithWD(self):
height, width = 3, 3
with self.test_session() as sess:
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.conv2d(images, 32, [3, 3], weight_decay=0.01)
wd = tf.get_collection(losses.LOSSES_COLLECTION)[0]
self.assertEquals(wd.op.name, 'Conv/weights/Regularizer/L2Loss/value')
sess.run(tf.initialize_all_variables())
self.assertTrue(sess.run(wd) <= 0.01)
def testReuseConvWithWD(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.conv2d(images, 32, [3, 3], weight_decay=0.01, scope='conv1')
self.assertEquals(len(tf.get_collection(losses.LOSSES_COLLECTION)), 1)
tf.get_variable_scope().reuse_variables()
ops.conv2d(images, 32, [3, 3], weight_decay=0.01, scope='conv1')
self.assertEquals(len(tf.get_collection(losses.LOSSES_COLLECTION)), 1)
def testConvWithBatchNorm(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
with scopes.arg_scope([ops.conv2d], batch_norm_params={}):
net = ops.conv2d(images, 32, [3, 3], scope='conv1')
net = ops.conv2d(net, 32, [3, 3], scope='conv2')
self.assertEquals(len(tf.get_collection('moving_vars')), 4)
self.assertEquals(len(variables.get_variables('conv1/BatchNorm')), 3)
self.assertEquals(len(variables.get_variables('conv2/BatchNorm')), 3)
class FCTest(tf.test.TestCase):
def testCreateFC(self):
height, width = 3, 3
with self.test_session():
inputs = tf.random_uniform((5, height * width * 3), seed=1)
output = ops.fc(inputs, 32)
self.assertEquals(output.op.name, 'FC/Relu')
self.assertListEqual(output.get_shape().as_list(), [5, 32])
def testCreateFCWithScope(self):
height, width = 3, 3
with self.test_session():
inputs = tf.random_uniform((5, height * width * 3), seed=1)
output = ops.fc(inputs, 32, scope='fc1')
self.assertEquals(output.op.name, 'fc1/Relu')
def testCreateFcCreatesWeightsAndBiasesVars(self):
height, width = 3, 3
inputs = tf.random_uniform((5, height * width * 3), seed=1)
with self.test_session():
self.assertFalse(variables.get_variables('fc1/weights'))
self.assertFalse(variables.get_variables('fc1/biases'))
ops.fc(inputs, 32, scope='fc1')
self.assertTrue(variables.get_variables('fc1/weights'))
self.assertTrue(variables.get_variables('fc1/biases'))
def testReuseVars(self):
height, width = 3, 3
inputs = tf.random_uniform((5, height * width * 3), seed=1)
with self.test_session():
ops.fc(inputs, 32, scope='fc1')
self.assertEquals(len(variables.get_variables('fc1')), 2)
tf.get_variable_scope().reuse_variables()
ops.fc(inputs, 32, scope='fc1')
self.assertEquals(len(variables.get_variables('fc1')), 2)
def testNonReuseVars(self):
height, width = 3, 3
inputs = tf.random_uniform((5, height * width * 3), seed=1)
with self.test_session():
ops.fc(inputs, 32)
self.assertEquals(len(variables.get_variables('FC')), 2)
ops.fc(inputs, 32)
self.assertEquals(len(variables.get_variables('FC')), 4)
def testCreateFCWithoutActivation(self):
height, width = 3, 3
with self.test_session():
inputs = tf.random_uniform((5, height * width * 3), seed=1)
output = ops.fc(inputs, 32, activation=None)
self.assertEquals(output.op.name, 'FC/xw_plus_b')
def testCreateFCWithWD(self):
height, width = 3, 3
with self.test_session() as sess:
inputs = tf.random_uniform((5, height * width * 3), seed=1)
ops.fc(inputs, 32, weight_decay=0.01)
wd = tf.get_collection(losses.LOSSES_COLLECTION)[0]
self.assertEquals(wd.op.name, 'FC/weights/Regularizer/L2Loss/value')
sess.run(tf.initialize_all_variables())
self.assertTrue(sess.run(wd) <= 0.01)
def testReuseFCWithWD(self):
height, width = 3, 3
with self.test_session():
inputs = tf.random_uniform((5, height * width * 3), seed=1)
ops.fc(inputs, 32, weight_decay=0.01, scope='fc')
self.assertEquals(len(tf.get_collection(losses.LOSSES_COLLECTION)), 1)
tf.get_variable_scope().reuse_variables()
ops.fc(inputs, 32, weight_decay=0.01, scope='fc')
self.assertEquals(len(tf.get_collection(losses.LOSSES_COLLECTION)), 1)
def testFCWithBatchNorm(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height * width * 3), seed=1)
with scopes.arg_scope([ops.fc], batch_norm_params={}):
net = ops.fc(images, 32, scope='fc1')
net = ops.fc(net, 32, scope='fc2')
self.assertEquals(len(tf.get_collection('moving_vars')), 4)
self.assertEquals(len(variables.get_variables('fc1/BatchNorm')), 3)
self.assertEquals(len(variables.get_variables('fc2/BatchNorm')), 3)
class MaxPoolTest(tf.test.TestCase):
def testCreateMaxPool(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.max_pool(images, [3, 3])
self.assertEquals(output.op.name, 'MaxPool/MaxPool')
self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3])
def testCreateMaxPoolWithScope(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.max_pool(images, [3, 3], scope='pool1')
self.assertEquals(output.op.name, 'pool1/MaxPool')
def testCreateMaxPoolSAME(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.max_pool(images, [3, 3], padding='SAME')
self.assertListEqual(output.get_shape().as_list(), [5, 2, 2, 3])
def testCreateMaxPoolStrideSAME(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.max_pool(images, [3, 3], stride=1, padding='SAME')
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3])
class AvgPoolTest(tf.test.TestCase):
def testCreateAvgPool(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.avg_pool(images, [3, 3])
self.assertEquals(output.op.name, 'AvgPool/AvgPool')
self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3])
def testCreateAvgPoolWithScope(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.avg_pool(images, [3, 3], scope='pool1')
self.assertEquals(output.op.name, 'pool1/AvgPool')
def testCreateAvgPoolSAME(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.avg_pool(images, [3, 3], padding='SAME')
self.assertListEqual(output.get_shape().as_list(), [5, 2, 2, 3])
def testCreateAvgPoolStrideSAME(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.avg_pool(images, [3, 3], stride=1, padding='SAME')
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3])
class OneHotEncodingTest(tf.test.TestCase):
def testOneHotEncodingCreate(self):
with self.test_session():
labels = tf.constant([0, 1, 2])
output = ops.one_hot_encoding(labels, num_classes=3)
self.assertEquals(output.op.name, 'OneHotEncoding/SparseToDense')
self.assertListEqual(output.get_shape().as_list(), [3, 3])
def testOneHotEncoding(self):
with self.test_session():
labels = tf.constant([0, 1, 2])
one_hot_labels = tf.constant([[1, 0, 0],
[0, 1, 0],
[0, 0, 1]])
output = ops.one_hot_encoding(labels, num_classes=3)
self.assertAllClose(output.eval(), one_hot_labels.eval())
class DropoutTest(tf.test.TestCase):
def testCreateDropout(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.dropout(images)
self.assertEquals(output.op.name, 'Dropout/dropout/mul_1')
output.get_shape().assert_is_compatible_with(images.get_shape())
def testCreateDropoutNoTraining(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1, name='images')
output = ops.dropout(images, is_training=False)
self.assertEquals(output, images)
class FlattenTest(tf.test.TestCase):
def testFlatten4D(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1, name='images')
output = ops.flatten(images)
self.assertEquals(output.get_shape().num_elements(),
images.get_shape().num_elements())
self.assertEqual(output.get_shape()[0], images.get_shape()[0])
def testFlatten3D(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width), seed=1, name='images')
output = ops.flatten(images)
self.assertEquals(output.get_shape().num_elements(),
images.get_shape().num_elements())
self.assertEqual(output.get_shape()[0], images.get_shape()[0])
def testFlattenBatchSize(self):
height, width = 3, 3
with self.test_session() as sess:
images = tf.random_uniform((5, height, width, 3), seed=1, name='images')
inputs = tf.placeholder(tf.int32, (None, height, width, 3))
output = ops.flatten(inputs)
self.assertEquals(output.get_shape().as_list(),
[None, height * width * 3])
output = sess.run(output, {inputs: images.eval()})
self.assertEquals(output.size,
images.get_shape().num_elements())
self.assertEqual(output.shape[0], images.get_shape()[0])
class BatchNormTest(tf.test.TestCase):
def testCreateOp(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
output = ops.batch_norm(images)
self.assertTrue(output.op.name.startswith('BatchNorm/batchnorm'))
self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3])
def testCreateVariables(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.batch_norm(images, scale=True)
beta = variables.get_variables_by_name('beta')[0]
gamma = variables.get_variables_by_name('gamma')[0]
self.assertEquals(beta.op.name, 'BatchNorm/beta')
self.assertEquals(gamma.op.name, 'BatchNorm/gamma')
moving_mean = tf.get_collection('moving_vars')[0]
moving_variance = tf.get_collection('moving_vars')[1]
self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean')
self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance')
def testMovingAverageVariables(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.batch_norm(images, scale=True)
moving_mean = tf.moving_average_variables()[0]
moving_variance = tf.moving_average_variables()[1]
self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean')
self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance')
def testUpdateOps(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.batch_norm(images)
update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
update_moving_mean = update_ops[0]
update_moving_variance = update_ops[1]
self.assertEquals(update_moving_mean.op.name,
'BatchNorm/AssignMovingAvg')
self.assertEquals(update_moving_variance.op.name,
'BatchNorm/AssignMovingAvg_1')
def testReuseVariables(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.batch_norm(images, scale=True, scope='bn')
tf.get_variable_scope().reuse_variables()
ops.batch_norm(images, scale=True, scope='bn')
beta = variables.get_variables_by_name('beta')
gamma = variables.get_variables_by_name('gamma')
self.assertEquals(len(beta), 1)
self.assertEquals(len(gamma), 1)
moving_vars = tf.get_collection('moving_vars')
self.assertEquals(len(moving_vars), 2)
def testReuseUpdateOps(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
ops.batch_norm(images, scope='bn')
self.assertEquals(len(tf.get_collection(ops.UPDATE_OPS_COLLECTION)), 2)
tf.get_variable_scope().reuse_variables()
ops.batch_norm(images, scope='bn')
self.assertEquals(len(tf.get_collection(ops.UPDATE_OPS_COLLECTION)), 4)
def testCreateMovingVars(self):
height, width = 3, 3
with self.test_session():
images = tf.random_uniform((5, height, width, 3), seed=1)
_ = ops.batch_norm(images, moving_vars='moving_vars')
moving_mean = tf.get_collection('moving_vars',
'BatchNorm/moving_mean')
self.assertEquals(len(moving_mean), 1)
self.assertEquals(moving_mean[0].op.name, 'BatchNorm/moving_mean')
moving_variance = tf.get_collection('moving_vars',
'BatchNorm/moving_variance')
self.assertEquals(len(moving_variance), 1)
self.assertEquals(moving_variance[0].op.name, 'BatchNorm/moving_variance')
def testComputeMovingVars(self):
height, width = 3, 3
with self.test_session() as sess:
image_shape = (10, height, width, 3)
image_values = np.random.rand(*image_shape)
expected_mean = np.mean(image_values, axis=(0, 1, 2))
expected_var = np.var(image_values, axis=(0, 1, 2))
images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
output = ops.batch_norm(images, decay=0.1)
update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
with tf.control_dependencies(update_ops):
barrier = tf.no_op(name='gradient_barrier')
output = control_flow_ops.with_dependencies([barrier], output)
# Initialize all variables
sess.run(tf.initialize_all_variables())
moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
mean, variance = sess.run([moving_mean, moving_variance])
# After initialization moving_mean == 0 and moving_variance == 1.
self.assertAllClose(mean, [0] * 3)
self.assertAllClose(variance, [1] * 3)
for _ in range(10):
sess.run([output])
mean = moving_mean.eval()
variance = moving_variance.eval()
# After 10 updates with decay 0.1 moving_mean == expected_mean and
# moving_variance == expected_var.
self.assertAllClose(mean, expected_mean)
self.assertAllClose(variance, expected_var)
def testEvalMovingVars(self):
height, width = 3, 3
with self.test_session() as sess:
image_shape = (10, height, width, 3)
image_values = np.random.rand(*image_shape)
expected_mean = np.mean(image_values, axis=(0, 1, 2))
expected_var = np.var(image_values, axis=(0, 1, 2))
images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
output = ops.batch_norm(images, decay=0.1, is_training=False)
update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
with tf.control_dependencies(update_ops):
barrier = tf.no_op(name='gradient_barrier')
output = control_flow_ops.with_dependencies([barrier], output)
# Initialize all variables
sess.run(tf.initialize_all_variables())
moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
mean, variance = sess.run([moving_mean, moving_variance])
# After initialization moving_mean == 0 and moving_variance == 1.
self.assertAllClose(mean, [0] * 3)
self.assertAllClose(variance, [1] * 3)
# Simulate assigment from saver restore.
init_assigns = [tf.assign(moving_mean, expected_mean),
tf.assign(moving_variance, expected_var)]
sess.run(init_assigns)
for _ in range(10):
sess.run([output], {images: np.random.rand(*image_shape)})
mean = moving_mean.eval()
variance = moving_variance.eval()
# Although we feed different images, the moving_mean and moving_variance
# shouldn't change.
self.assertAllClose(mean, expected_mean)
self.assertAllClose(variance, expected_var)
def testReuseVars(self):
height, width = 3, 3
with self.test_session() as sess:
image_shape = (10, height, width, 3)
image_values = np.random.rand(*image_shape)
expected_mean = np.mean(image_values, axis=(0, 1, 2))
expected_var = np.var(image_values, axis=(0, 1, 2))
images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
output = ops.batch_norm(images, decay=0.1, is_training=False)
update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
with tf.control_dependencies(update_ops):
barrier = tf.no_op(name='gradient_barrier')
output = control_flow_ops.with_dependencies([barrier], output)
# Initialize all variables
sess.run(tf.initialize_all_variables())
moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
moving_variance = variables.get_variables('BatchNorm/moving_variance')[0]
mean, variance = sess.run([moving_mean, moving_variance])
# After initialization moving_mean == 0 and moving_variance == 1.
self.assertAllClose(mean, [0] * 3)
self.assertAllClose(variance, [1] * 3)
# Simulate assigment from saver restore.
init_assigns = [tf.assign(moving_mean, expected_mean),
tf.assign(moving_variance, expected_var)]
sess.run(init_assigns)
for _ in range(10):
sess.run([output], {images: np.random.rand(*image_shape)})
mean = moving_mean.eval()
variance = moving_variance.eval()
# Although we feed different images, the moving_mean and moving_variance
# shouldn't change.
self.assertAllClose(mean, expected_mean)
self.assertAllClose(variance, expected_var)
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the new arg_scope used for TF-Slim ops.
Allows one to define models much more compactly by eliminating boilerplate
code. This is accomplished through the use of argument scoping (arg_scope).
Example of how to use scopes.arg_scope:
with slim.arg_scope(ops.conv2d, padding='SAME',
stddev=0.01, weight_decay=0.0005):
net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
net = ops.conv2d(net, 256, [5, 5], scope='conv2')
The first call to conv2d will use predefined args:
ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
stddev=0.01, weight_decay=0.0005, scope='conv1')
The second call to Conv will overwrite padding:
ops.conv2d(inputs, 256, [5, 5], padding='SAME',
stddev=0.01, weight_decay=0.0005, scope='conv2')
Example of how to use scopes.add_arg_scope:
@scopes.add_arg_scope
def conv2d(*args, **kwargs)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import functools
from tensorflow.python.framework import ops
_ARGSTACK_KEY = ("__arg_stack",)
_DECORATED_OPS = set()
def _get_arg_stack():
stack = ops.get_collection(_ARGSTACK_KEY)
if stack:
return stack[0]
else:
stack = [{}]
ops.add_to_collection(_ARGSTACK_KEY, stack)
return stack
def _current_arg_scope():
stack = _get_arg_stack()
return stack[-1]
def _add_op(op):
key_op = (op.__module__, op.__name__)
if key_op not in _DECORATED_OPS:
_DECORATED_OPS.add(key_op)
@contextlib.contextmanager
def arg_scope(list_ops, **kwargs):
"""Stores the default arguments for the given set of list_ops.
Args:
list_ops: List or tuple of operations to set argument scope for. Every op in
list_ops need to be decorated with @add_arg_scope to work.
**kwargs: keyword=value that will define the defaults for each op in
list_ops. All the ops need to accept the given set of arguments.
Yields:
the current_scope, which is a dictionary of {op: {arg: value}}
Raises:
TypeError: if list_ops is not a list or a tuple.
ValueError: if any op in list_ops has not be decorated with @add_arg_scope.
"""
if not isinstance(list_ops, (list, tuple)):
raise TypeError("list_ops is not a list or a tuple")
try:
current_scope = _current_arg_scope().copy()
for op in list_ops:
key_op = (op.__module__, op.__name__)
if not has_arg_scope(op):
raise ValueError("%s is not decorated with @add_arg_scope", key_op)
if key_op in current_scope:
current_kwargs = current_scope[key_op].copy()
current_kwargs.update(kwargs)
current_scope[key_op] = current_kwargs
else:
current_scope[key_op] = kwargs.copy()
_get_arg_stack().append(current_scope)
yield current_scope
finally:
_get_arg_stack().pop()
def add_arg_scope(func):
"""Decorates a function with args so it can be used within an arg_scope.
Args:
func: function to decorate.
Returns:
A tuple with the decorated function func_with_args().
"""
@functools.wraps(func)
def func_with_args(*args, **kwargs):
current_scope = _current_arg_scope()
current_args = kwargs
key_func = (func.__module__, func.__name__)
if key_func in current_scope:
current_args = current_scope[key_func].copy()
current_args.update(kwargs)
return func(*args, **current_args)
_add_op(func)
return func_with_args
def has_arg_scope(func):
"""Checks whether a func has been decorated with @add_arg_scope or not.
Args:
func: function to check.
Returns:
a boolean.
"""
key_op = (func.__module__, func.__name__)
return key_op in _DECORATED_OPS
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests slim.scopes."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import scopes
@scopes.add_arg_scope
def func1(*args, **kwargs):
return (args, kwargs)
@scopes.add_arg_scope
def func2(*args, **kwargs):
return (args, kwargs)
class ArgScopeTest(tf.test.TestCase):
def testEmptyArgScope(self):
with self.test_session():
self.assertEqual(scopes._current_arg_scope(), {})
def testSimpleArgScope(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
with self.test_session():
with scopes.arg_scope([func1], a=1, b=None, c=[1]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testSimpleArgScopeWithTuple(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
with self.test_session():
with scopes.arg_scope((func1,), a=1, b=None, c=[1]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testOverwriteArgScope(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': 2, 'c': [1]}
with scopes.arg_scope([func1], a=1, b=None, c=[1]):
args, kwargs = func1(0, b=2)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testNestedArgScope(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
with scopes.arg_scope([func1], a=1, b=None, c=[1]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
func1_kwargs['b'] = 2
with scopes.arg_scope([func1], b=2):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testSharedArgScope(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
with scopes.arg_scope([func1, func2], a=1, b=None, c=[1]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
args, kwargs = func2(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testSharedArgScopeTuple(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
with scopes.arg_scope((func1, func2), a=1, b=None, c=[1]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
args, kwargs = func2(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
def testPartiallySharedArgScope(self):
func1_args = (0,)
func1_kwargs = {'a': 1, 'b': None, 'c': [1]}
func2_args = (1,)
func2_kwargs = {'a': 1, 'b': None, 'd': [2]}
with scopes.arg_scope([func1, func2], a=1, b=None):
with scopes.arg_scope([func1], c=[1]), scopes.arg_scope([func2], d=[2]):
args, kwargs = func1(0)
self.assertTupleEqual(args, func1_args)
self.assertDictEqual(kwargs, func1_kwargs)
args, kwargs = func2(1)
self.assertTupleEqual(args, func2_args)
self.assertDictEqual(kwargs, func2_kwargs)
if __name__ == '__main__':
tf.test.main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TF-Slim grouped API. Please see README.md for details and usage."""
# pylint: disable=unused-import
# Collapse tf-slim into a single namespace.
from inception.slim import inception_model as inception
from inception.slim import losses
from inception.slim import ops
from inception.slim import scopes
from inception.slim import variables
from inception.slim.scopes import arg_scope
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains convenience wrappers for creating Variables in TensorFlow.
Usage:
weights_initializer = tf.truncated_normal_initializer(stddev=0.01)
l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005)
weights = variables.variable('weights',
shape=[100, 100],
initializer=weights_initializer,
regularizer=l2_regularizer,
device='/cpu:0')
biases = variables.variable('biases',
shape=[100],
initializer=tf.zeros_initializer,
device='/cpu:0')
# More complex example.
net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1')
net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2')
with slim.arg_scope(variables.Variables, restore=False):
net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3')
# Get all model variables from all the layers.
model_variables = slim.variables.get_variables()
# Get all model variables from a specific the layer, i.e 'conv1'.
conv1_variables = slim.variables.get_variables('conv1')
# Get all weights from all the layers.
weights = slim.variables.get_variables_by_name('weights')
# Get all bias from all the layers.
biases = slim.variables.get_variables_by_name('biases')
# Get all variables in the VARIABLES_TO_RESTORE collection
# (i.e. only those created by 'conv1' and 'conv2')
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
************************************************
* Initializing model variables from a checkpoint
************************************************
# Create some variables.
v1 = slim.variables.variable(name="v1", ..., restore=False)
v2 = slim.variables.variable(name="v2", ...) # By default restore=True
...
# The list of variables to restore should only contain 'v2'.
variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE)
restorer = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
# Restore variables from disk.
restorer.restore(sess, "/tmp/model.ckpt")
print("Model restored.")
# Do some work with the model
...
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import scopes
# Collection containing all the variables created using slim.variables
VARIABLES_COLLECTION = '_variables_'
# Collection containing all the slim.variables that are marked to_restore
VARIABLES_TO_RESTORE = '_variables_to_restore_'
def get_variable_given_name(var):
"""Gets the variable given name without the scope.
Args:
var: a variable.
Returns:
the given name of the variable without the scope.
"""
name = var.op.name
if '/' in name:
name = name.split('/')[-1]
return name
def default_collections(given_name, restore):
"""Define the set of default collections that variables should be added.
Args:
given_name: the given name of the variable.
restore: whether the variable should be added to the VARIABLES_TO_RESTORE
collection.
Returns:
a list of default collections.
"""
defaults = [tf.GraphKeys.VARIABLES, VARIABLES_COLLECTION]
defaults += [VARIABLES_COLLECTION + given_name]
if restore:
defaults += [VARIABLES_TO_RESTORE]
return defaults
def add_variable(var, restore=True):
"""Adds a variable to the default set of collections.
Args:
var: a variable.
restore: whether the variable should be added to the
VARIABLES_TO_RESTORE collection.
"""
given_name = get_variable_given_name(var)
for collection in default_collections(given_name, restore):
if var not in tf.get_collection(collection):
tf.add_to_collection(collection, var)
def get_variables(prefix=None, suffix=None):
"""Gets the list of variables, filtered by prefix and/or suffix.
Args:
prefix: an optional prefix for filtering the variables to return.
suffix: an optional suffix for filtering the variables to return.
Returns:
a list of variables with prefix and suffix.
"""
candidates = tf.get_collection(VARIABLES_COLLECTION, prefix)
if suffix is not None:
candidates = [var for var in candidates if var.op.name.endswith(suffix)]
return candidates
def get_variables_by_name(given_name, prefix=None):
"""Gets the list of variables were given that name.
Args:
given_name: name given to the variable without scope.
prefix: an optional prefix for filtering the variables to return.
Returns:
a list of variables with prefix and suffix.
"""
return tf.get_collection(VARIABLES_COLLECTION + given_name, prefix)
def get_unique_variable(name):
"""Gets the variable uniquely identified by that name.
Args:
name: a name that uniquely identifies the variable.
Returns:
a tensorflow variable.
Raises:
ValueError: if no variable uniquely identified by the name exists.
"""
candidates = tf.get_collection(tf.GraphKeys.VARIABLES, name)
if not candidates:
raise ValueError('Couldnt find variable %s' % name)
for candidate in candidates:
if candidate.op.name == name:
return candidate
raise ValueError('Variable %s does not uniquely identify a variable', name)
@scopes.add_arg_scope
def variable(name, shape=None, dtype=tf.float32, initializer=None,
regularizer=None, trainable=True, collections=None, device='',
restore=True):
"""Gets an existing variable with these parameters or creates a new one.
It also add itself to a group with its name.
Args:
name: the name of the new or existing variable.
shape: shape of the new or existing variable.
dtype: type of the new or existing variable (defaults to `DT_FLOAT`).
initializer: initializer for the variable if one is created.
regularizer: a (Tensor -> Tensor or None) function; the result of
applying it on a newly created variable will be added to the collection
GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
trainable: If `True` also add the variable to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
collections: A list of collection names to which the Variable will be added.
Note that the variable is always also added to the tf.GraphKeys.VARIABLES
collection.
device: Optional device to place the variable. It can be an string or a
function that is called to get the device for the variable.
restore: whether the variable should be added to the
VARIABLES_TO_RESTORE collection.
Returns:
The created or existing variable.
"""
# Instantiate the device for this variable if it is passed as a function.
if device and callable(device):
device = device()
collections = set(list(collections or []) + default_collections(name,
restore))
with tf.device(device):
return tf.get_variable(name, shape=shape, dtype=dtype,
initializer=initializer, regularizer=regularizer,
trainable=trainable, collections=collections)
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.variables."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from inception.slim import scopes
from inception.slim import variables
class VariablesTest(tf.test.TestCase):
def testCreateVariable(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
self.assertEquals(a.op.name, 'A/a')
self.assertListEqual(a.get_shape().as_list(), [5])
def testGetVariableGivenName(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
with tf.variable_scope('B'):
b = variables.variable('a', [5])
self.assertEquals('a', variables.get_variable_given_name(a))
self.assertEquals('a', variables.get_variable_given_name(b))
def testGetVariableGivenNameScoped(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
b = variables.variable('b', [5])
self.assertEquals([a], variables.get_variables_by_name('a'))
self.assertEquals([b], variables.get_variables_by_name('b'))
def testGetVariables(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
with tf.variable_scope('B'):
b = variables.variable('a', [5])
self.assertEquals([a], variables.get_variables('A'))
self.assertEquals([b], variables.get_variables('B'))
def testGetVariablesSuffix(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
with tf.variable_scope('A'):
b = variables.variable('b', [5])
self.assertEquals([a], variables.get_variables(suffix='a'))
self.assertEquals([b], variables.get_variables(suffix='b'))
def testGetVariableWithSingleVar(self):
with self.test_session():
with tf.variable_scope('parent'):
a = variables.variable('child', [5])
self.assertEquals(a, variables.get_unique_variable('parent/child'))
def testGetVariableWithDistractors(self):
with self.test_session():
with tf.variable_scope('parent'):
a = variables.variable('child', [5])
with tf.variable_scope('child'):
variables.variable('grandchild1', [7])
variables.variable('grandchild2', [9])
self.assertEquals(a, variables.get_unique_variable('parent/child'))
def testGetVariableThrowsExceptionWithNoMatch(self):
var_name = 'cant_find_me'
with self.test_session():
with self.assertRaises(ValueError):
variables.get_unique_variable(var_name)
def testGetThrowsExceptionWithChildrenButNoMatch(self):
var_name = 'parent/child'
with self.test_session():
with tf.variable_scope(var_name):
variables.variable('grandchild1', [7])
variables.variable('grandchild2', [9])
with self.assertRaises(ValueError):
variables.get_unique_variable(var_name)
def testGetVariablesToRestore(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
with tf.variable_scope('B'):
b = variables.variable('b', [5])
self.assertListEqual([a, b],
tf.get_collection(variables.VARIABLES_TO_RESTORE))
def testGetVariablesToRestorePartial(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [5])
with tf.variable_scope('B'):
b = variables.variable('b', [5], restore=False)
self.assertListEqual([a, b], variables.get_variables())
self.assertListEqual([a],
tf.get_collection(variables.VARIABLES_TO_RESTORE))
def testReuseVariable(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [])
with tf.variable_scope('A', reuse=True):
b = variables.variable('a', [])
self.assertEquals(a, b)
self.assertListEqual([a], variables.get_variables())
def testVariableWithDevice(self):
with self.test_session():
with tf.variable_scope('A'):
a = variables.variable('a', [], device='cpu:0')
b = variables.variable('b', [], device='cpu:1')
self.assertDeviceEqual(a.device, 'cpu:0')
self.assertDeviceEqual(b.device, 'cpu:1')
def testVariableWithDeviceFromScope(self):
with self.test_session():
with tf.device('/cpu:0'):
a = variables.variable('a', [])
b = variables.variable('b', [], device='cpu:1')
self.assertDeviceEqual(a.device, 'cpu:0')
self.assertDeviceEqual(b.device, 'cpu:1')
def testVariableCollection(self):
with self.test_session():
a = variables.variable('a', [], collections='A')
b = variables.variable('b', [], collections='B')
self.assertEquals(a, tf.get_collection('A')[0])
self.assertEquals(b, tf.get_collection('B')[0])
def testVariableCollections(self):
with self.test_session():
a = variables.variable('a', [], collections=['A', 'C'])
b = variables.variable('b', [], collections=['B', 'C'])
self.assertEquals(a, tf.get_collection('A')[0])
self.assertEquals(b, tf.get_collection('B')[0])
def testVariableCollectionsWithArgScope(self):
with self.test_session():
with scopes.arg_scope([variables.variable], collections='A'):
a = variables.variable('a', [])
b = variables.variable('b', [])
self.assertListEqual([a, b], tf.get_collection('A'))
def testVariableCollectionsWithArgScopeNested(self):
with self.test_session():
with scopes.arg_scope([variables.variable], collections='A'):
a = variables.variable('a', [])
with scopes.arg_scope([variables.variable], collections='B'):
b = variables.variable('b', [])
self.assertEquals(a, tf.get_collection('A')[0])
self.assertEquals(b, tf.get_collection('B')[0])
def testVariableCollectionsWithArgScopeNonNested(self):
with self.test_session():
with scopes.arg_scope([variables.variable], collections='A'):
a = variables.variable('a', [])
with scopes.arg_scope([variables.variable], collections='B'):
b = variables.variable('b', [])
variables.variable('c', [])
self.assertListEqual([a], tf.get_collection('A'))
self.assertListEqual([b], tf.get_collection('B'))
def testVariableRestoreWithArgScopeNested(self):
with self.test_session():
with scopes.arg_scope([variables.variable], restore=True):
a = variables.variable('a', [])
with scopes.arg_scope([variables.variable], trainable=False,
collections=['A', 'B']):
b = variables.variable('b', [])
c = variables.variable('c', [])
self.assertListEqual([a, b, c],
tf.get_collection(variables.VARIABLES_TO_RESTORE))
self.assertListEqual([a, c], tf.trainable_variables())
self.assertListEqual([b], tf.get_collection('A'))
self.assertListEqual([b], tf.get_collection('B'))
if __name__ == '__main__':
tf.test.main()
tensorflow/third_party
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment