Commit 68a18b70 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge pull request #1 from tensorflow/master

update to tensorflow/model master
parents bc70271a 2c4fea8d
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Code probability model used for entropy coding."""
import json
import tensorflow as tf
from entropy_coder.lib import blocks
from entropy_coder.model import entropy_coder_model
from entropy_coder.model import model_factory
# pylint: disable=not-callable
class BrnnPredictor(blocks.BlockBase):
"""BRNN prediction applied on one layer."""
def __init__(self, code_depth, name=None):
super(BrnnPredictor, self).__init__(name)
with self._BlockScope():
hidden_depth = 2 * code_depth
# What is coming from the previous layer/iteration
# is going through a regular Conv2D layer as opposed to the binary codes
# of the current layer/iteration which are going through a masked
# convolution.
self._adaptation0 = blocks.RasterScanConv2D(
hidden_depth, [7, 7], [1, 1], 'SAME',
strict_order=True,
bias=blocks.Bias(0), act=tf.tanh)
self._adaptation1 = blocks.Conv2D(
hidden_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
self._predictor = blocks.CompositionOperator([
blocks.LineOperator(
blocks.RasterScanConv2DLSTM(
depth=hidden_depth,
filter_size=[1, 3],
hidden_filter_size=[1, 3],
strides=[1, 1],
padding='SAME')),
blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
])
def _Apply(self, x, s):
# Code estimation using both:
# - the state from the previous iteration/layer,
# - the binary codes that are before in raster scan order.
h = tf.concat(values=[self._adaptation0(x), self._adaptation1(s)], axis=3)
estimated_codes = self._predictor(h)
return estimated_codes
class LayerPrediction(blocks.BlockBase):
"""Binary code prediction for one layer."""
def __init__(self, layer_count, code_depth, name=None):
super(LayerPrediction, self).__init__(name)
self._layer_count = layer_count
# No previous layer.
self._layer_state = None
self._current_layer = 0
with self._BlockScope():
# Layers used to do the conditional code prediction.
self._brnn_predictors = []
for _ in xrange(layer_count):
self._brnn_predictors.append(BrnnPredictor(code_depth))
# Layers used to generate the input of the LSTM operating on the
# iteration/depth domain.
hidden_depth = 2 * code_depth
self._state_blocks = []
for _ in xrange(layer_count):
self._state_blocks.append(blocks.CompositionOperator([
blocks.Conv2D(
hidden_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(
code_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
]))
# Memory of the RNN is equivalent to the size of 2 layers of binary
# codes.
hidden_depth = 2 * code_depth
self._layer_rnn = blocks.CompositionOperator([
blocks.Conv2DLSTM(
depth=hidden_depth,
filter_size=[1, 1],
hidden_filter_size=[1, 1],
strides=[1, 1],
padding='SAME'),
blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
])
def _Apply(self, x):
assert self._current_layer < self._layer_count
# Layer state is set to 0 when there is no previous iteration.
if self._layer_state is None:
self._layer_state = tf.zeros_like(x, dtype=tf.float32)
# Code estimation using both:
# - the state from the previous iteration/layer,
# - the binary codes that are before in raster scan order.
estimated_codes = self._brnn_predictors[self._current_layer](
x, self._layer_state)
# Compute the updated layer state.
h = self._state_blocks[self._current_layer](x)
self._layer_state = self._layer_rnn(h)
self._current_layer += 1
return estimated_codes
class ProgressiveModel(entropy_coder_model.EntropyCoderModel):
"""Progressive BRNN entropy coder model."""
def __init__(self):
super(ProgressiveModel, self).__init__()
def Initialize(self, global_step, optimizer, config_string):
if config_string is None:
raise ValueError('The progressive model requires a configuration.')
config = json.loads(config_string)
if 'coded_layer_count' not in config:
config['coded_layer_count'] = 0
self._config = config
self._optimizer = optimizer
self._global_step = global_step
def BuildGraph(self, input_codes):
"""Build the graph corresponding to the progressive BRNN model."""
layer_depth = self._config['layer_depth']
layer_count = self._config['layer_count']
code_shape = input_codes.get_shape()
code_depth = code_shape[-1].value
if self._config['coded_layer_count'] > 0:
prefix_depth = self._config['coded_layer_count'] * layer_depth
if code_depth < prefix_depth:
raise ValueError('Invalid prefix depth: {} VS {}'.format(
prefix_depth, code_depth))
input_codes = input_codes[:, :, :, :prefix_depth]
code_shape = input_codes.get_shape()
code_depth = code_shape[-1].value
if code_depth % layer_depth != 0:
raise ValueError(
'Code depth must be a multiple of the layer depth: {} vs {}'.format(
code_depth, layer_depth))
code_layer_count = code_depth // layer_depth
if code_layer_count > layer_count:
raise ValueError('Input codes have too many layers: {}, max={}'.format(
code_layer_count, layer_count))
# Block used to estimate binary codes.
layer_prediction = LayerPrediction(layer_count, layer_depth)
# Block used to compute code lengths.
code_length_block = blocks.CodeLength()
# Loop over all the layers.
code_length = []
code_layers = tf.split(
value=input_codes, num_or_size_splits=code_layer_count, axis=3)
for k in xrange(code_layer_count):
x = code_layers[k]
predicted_x = layer_prediction(x)
# Saturate the prediction to avoid infinite code length.
epsilon = 0.001
predicted_x = tf.clip_by_value(
predicted_x, -1 + epsilon, +1 - epsilon)
code_length.append(code_length_block(
blocks.ConvertSignCodeToZeroOneCode(x),
blocks.ConvertSignCodeToZeroOneCode(predicted_x)))
tf.summary.scalar('code_length_layer_{:02d}'.format(k), code_length[-1])
code_length = tf.stack(code_length)
self.loss = tf.reduce_mean(code_length)
tf.summary.scalar('loss', self.loss)
# Loop over all the remaining layers just to make sure they are
# instantiated. Otherwise, loading model params could fail.
dummy_x = tf.zeros_like(code_layers[0])
for _ in xrange(layer_count - code_layer_count):
dummy_predicted_x = layer_prediction(dummy_x)
# Average bitrate over total_line_count.
self.average_code_length = tf.reduce_mean(code_length)
if self._optimizer:
optim_op = self._optimizer.minimize(self.loss,
global_step=self._global_step)
block_updates = blocks.CreateBlockUpdates()
if block_updates:
with tf.get_default_graph().control_dependencies([optim_op]):
self.train_op = tf.group(*block_updates)
else:
self.train_op = optim_op
else:
self.train_op = None
def GetConfigStringForUnitTest(self):
s = '{\n'
s += '"layer_depth": 1,\n'
s += '"layer_count": 8\n'
s += '}\n'
return s
@model_factory.RegisterEntropyCoderModel('progressive')
def CreateProgressiveModel():
return ProgressiveModel()
# Image Compression with Neural Networks
This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and
decompressing images using an already trained Residual GRU model as descibed
in [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148). Please consult the paper for more details
on the architecture and compression results.
This code will allow you to perform the lossy compression on an model
already trained on compression. This code doesn't not currently contain the
Entropy Coding portions of our paper.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed. You will also need to [download](http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz)
and extract the model residual_gru.pb.
If you want to generate the perceptual similarity under MS-SSIM, you will also
need to [Install SciPy](https://www.scipy.org/install.html).
## Encoding
The Residual GRU network is fully convolutional, but requires the images
height and width in pixels by a multiple of 32. There is an image in this folder
called example.png that is 768x1024 if one is needed for testing. We also
rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at
time of release.
To encode an image, simply run the following command:
`python encoder.py --input_image=/your/image/here.png
--output_codes=output_codes.npz --iteration=15
--model=/path/to/model/residual_gru.pb
`
The iteration parameter specifies the lossy-quality to target for compression.
The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per
pixel) bpp and every increment results in an additional 1/8 bpp.
| Iteration | BPP | Compression Ratio |
|---: |---: |---: |
|0 | 0.125 | 192:1|
|1 | 0.250 | 96:1|
|2 | 0.375 | 64:1|
|3 | 0.500 | 48:1|
|4 | 0.625 | 38.4:1|
|5 | 0.750 | 32:1|
|6 | 0.875 | 27.4:1|
|7 | 1.000 | 24:1|
|8 | 1.125 | 21.3:1|
|9 | 1.250 | 19.2:1|
|10 | 1.375 | 17.4:1|
|11 | 1.500 | 16:1|
|12 | 1.625 | 14.7:1|
|13 | 1.750 | 13.7:1|
|14 | 1.875 | 12.8:1|
|15 | 2.000 | 12:1|
The output_codes file contains the numpy shape and a flattened, bit-packed
array of the codes. These can be inspected in python by using numpy.load().
## Decoding
After generating codes for an image, the lossy reconstructions for that image
can be done as follows:
`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/
--model=residual_gru.pb`
The output_directory will contain images decoded at each quality level.
## Comparing Similarity
One of our primary metrics for comparing how similar two images are
is MS-SSIM.
To generate these metrics on your images you can run:
`python msssim.py --original_image=/path/to/your/image.png
--compared_image=/tmp/decoded/image_15.png`
## Results
CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can
are available for reference. Each row of the CSV represents each of the Kodak
images in their dataset number (1-24). Each column of the CSV represents each
iteration of the model (1-16).
[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv)
[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv)
## FAQ
#### How do I train my own compression network?
We currently don't provide the code to build and train a compression
graph from scratch.
#### I get an InvalidArgumentError: Incompatible shapes.
This is usually due to the fact that our network only supports images that are
both height and width divisible by 32 pixel. Try padding your images to 32
pixel boundaries.
## Contact Info
Model repository maintained by Nick Johnston ([nickj-google](https://github.com/nickj-google)).
......@@ -3,7 +3,7 @@
Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)
###Introduction for dp_sgd/README.md
### Introduction for [dp_sgd/README.md](dp_sgd/README.md)
Machine learning techniques based on neural networks are achieving remarkable
results in a wide variety of domains. Often, the training of models requires
......@@ -18,7 +18,7 @@ manageable cost in software complexity, training efficiency, and model quality.
paper: https://arxiv.org/abs/1607.00133
###Introduction for multiple_teachers/README.md
### Introduction for [multiple_teachers/README.md](multiple_teachers/README.md)
This repository contains code to create a setup for learning privacy-preserving
student models by transferring knowledge from an ensemble of teachers trained
......
......@@ -8,14 +8,14 @@ Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)
<Introduction>
Machine learning techniques based on neural networks are achieving remarkable
results in a wide variety of domains. Often, the training of models requires
large, representative datasets, which may be crowdsourced and contain sensitive
information. The models should not expose private information in these datasets.
Addressing this goal, we develop new algorithmic techniques for learning and a
refined analysis of privacy costs within the framework of differential privacy.
Our implementation and experiments demonstrate that we can train deep neural
networks with non-convex objectives, under a modest privacy budget, and at a
Machine learning techniques based on neural networks are achieving remarkable
results in a wide variety of domains. Often, the training of models requires
large, representative datasets, which may be crowdsourced and contain sensitive
information. The models should not expose private information in these datasets.
Addressing this goal, we develop new algorithmic techniques for learning and a
refined analysis of privacy costs within the framework of differential privacy.
Our implementation and experiments demonstrate that we can train deep neural
networks with non-convex objectives, under a modest privacy budget, and at a
manageable cost in software complexity, training efficiency, and model quality.
paper: https://arxiv.org/abs/1607.00133
......@@ -46,7 +46,7 @@ https://github.com/panyx0718/models/tree/master/slim
# Download the data to the data/ directory.
# List the codes.
ls -R differential_privacy/
$ ls -R differential_privacy/
differential_privacy/:
dp_sgd __init__.py privacy_accountant README.md
......@@ -72,16 +72,16 @@ differential_privacy/privacy_accountant/tf:
accountant.py accountant_test.py BUILD
# List the data.
ls -R data/
$ ls -R data/
./data:
mnist_test.tfrecord mnist_train.tfrecord
# Build the codes.
bazel build -c opt differential_privacy/...
$ bazel build -c opt differential_privacy/...
# Run the mnist differntial privacy training codes.
bazel-bin/differential_privacy/dp_sgd/dp_mnist/dp_mnist \
$ bazel-bin/differential_privacy/dp_sgd/dp_mnist/dp_mnist \
--training_data_path=data/mnist_train.tfrecord \
--eval_data_path=data/mnist_test.tfrecord \
--save_path=/tmp/mnist_dir
......@@ -102,6 +102,6 @@ train_accuracy: 0.53
eval_accuracy: 0.53
...
ls /tmp/mnist_dir/
$ ls /tmp/mnist_dir/
checkpoint ckpt ckpt.meta results-0.json
```
......@@ -273,7 +273,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
images, network_parameters)
cost = tf.nn.softmax_cross_entropy_with_logits(
logits, tf.one_hot(labels, 10))
logits=logits, labels=tf.one_hot(labels, 10))
# The actual cost is the average across the examples.
cost = tf.reduce_sum(cost, [0]) / batch_size
......@@ -343,7 +343,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
# We need to maintain the intialization sequence.
for v in tf.trainable_variables():
sess.run(tf.initialize_variables([v]))
sess.run(tf.variables_initializer([v]))
sess.run(tf.global_variables_initializer())
sess.run(init_ops)
......
......@@ -27,7 +27,7 @@ def ComputeDPPrincipalProjection(data, projection_dims,
Args:
data: the input data, each row is a data vector.
projection_dims: the projection dimension.
sanitizer: the sanitizer used for acheiving privacy.
sanitizer: the sanitizer used for achieving privacy.
eps_delta: (eps, delta) pair.
sigma: if not None, use noise sigma; otherwise compute it using
eps_delta pair.
......
......@@ -233,10 +233,11 @@ def BatchClipByL2norm(t, upper_bound, name=None):
"""
assert upper_bound > 0
with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
with tf.name_scope(values=[t, upper_bound], name=name,
default_name="batch_clip_by_l2norm") as name:
saved_shape = tf.shape(t)
batch_size = tf.slice(saved_shape, [0], [1])
t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
tf.constant(1.0/upper_bound))
# Add a small number to avoid divide by 0
......@@ -264,9 +265,10 @@ def SoftThreshold(t, threshold_ratio, name=None):
"""
assert threshold_ratio >= 0
with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name:
with tf.name_scope(values=[t, threshold_ratio], name=name,
default_name="soft_thresholding") as name:
saved_shape = tf.shape(t)
t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1]))
t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
t_abs = tf.abs(t2)
t_x = tf.sign(t2) * tf.nn.relu(t_abs -
(tf.reduce_mean(t_abs, [0],
......@@ -286,7 +288,8 @@ def AddGaussianNoise(t, sigma, name=None):
the noisy tensor.
"""
with tf.op_scope([t, sigma], name, "add_gaussian_noise") as name:
with tf.name_scope(values=[t, sigma], name=name,
default_name="add_gaussian_noise") as name:
noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
return noisy_t
......
......@@ -189,7 +189,7 @@ class MatMulPXG(object):
z_grads, = z_grads
x_expanded = tf.expand_dims(x, 2)
z_grads_expanded = tf.expand_dims(z_grads, 1)
return tf.mul(x_expanded, z_grads_expanded)
return tf.multiply(x_expanded, z_grads_expanded)
pxg_registry.Register("MatMul", MatMulPXG)
......@@ -245,7 +245,7 @@ class Conv2DPXG(object):
num_x = int(conv_x.get_shape()[0])
assert num_x == 1, num_x
assert len(conv_px) == batch_size
conv = tf.concat(0, conv_px)
conv = tf.concat(axis=0, values=conv_px)
assert int(conv.get_shape()[0]) == batch_size
return conv, w_px
......@@ -274,7 +274,7 @@ class Conv2DPXG(object):
self.colocate_gradients_with_ops,
gate_gradients=self.gate_gradients)
return tf.pack(gradients_list)
return tf.stack(gradients_list)
pxg_registry.Register("Conv2D", Conv2DPXG)
......
......@@ -216,10 +216,10 @@ def main(unused_argv):
# If we are reproducing results from paper https://arxiv.org/abs/1610.05755,
# download the required binaries with label information.
##################################################################
# Binaries for MNIST results
paper_binaries_mnist = \
["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true",
["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true",
"https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_100_indices_used_by_student.npy?raw=true"]
if FLAGS.counts_file == "mnist_250_teachers_labels.npy" \
or FLAGS.indices_file == "mnist_250_teachers_100_indices_used_by_student.npy":
......@@ -254,7 +254,7 @@ def main(unused_argv):
total_log_mgf_nm = np.array([0.0 for _ in l_list])
total_ss_nm = np.array([0.0 for _ in l_list])
noise_eps = FLAGS.noise_eps
for i in indices:
total_log_mgf_nm += np.array(
[logmgf_from_counts(counts_mat[i], noise_eps, l)
......@@ -287,7 +287,7 @@ def main(unused_argv):
if min(eps_list_nm) == eps_list_nm[-1]:
print "Warning: May not have used enough values of l"
# Data indpendent bound, as mechanism is
# Data independent bound, as mechanism is
# 2*noise_eps DP.
data_ind_log_mgf = np.array([0.0 for _ in l_list])
data_ind_log_mgf += num_examples * np.array(
......
......@@ -75,7 +75,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd):
var = _variable_on_cpu(name, shape,
tf.truncated_normal_initializer(stddev=stddev))
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
......@@ -84,7 +84,7 @@ def inference(images, dropout=False):
"""Build the CNN model.
Args:
images: Images returned from distorted_inputs() or inputs().
dropout: Boolean controling whether to use dropout or not
dropout: Boolean controlling whether to use dropout or not
Returns:
Logits
"""
......@@ -95,9 +95,9 @@ def inference(images, dropout=False):
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
kernel = _variable_with_weight_decay('weights',
shape=first_conv_shape,
stddev=1e-4,
stddev=1e-4,
wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
......@@ -108,25 +108,25 @@ def inference(images, dropout=False):
# pool1
pool1 = tf.nn.max_pool(conv1,
ksize=[1, 3, 3, 1],
pool1 = tf.nn.max_pool(conv1,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='SAME',
padding='SAME',
name='pool1')
# norm1
norm1 = tf.nn.lrn(pool1,
4,
bias=1.0,
alpha=0.001 / 9.0,
norm1 = tf.nn.lrn(pool1,
4,
bias=1.0,
alpha=0.001 / 9.0,
beta=0.75,
name='norm1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 64, 128],
stddev=1e-4,
stddev=1e-4,
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
......@@ -137,18 +137,18 @@ def inference(images, dropout=False):
# norm2
norm2 = tf.nn.lrn(conv2,
4,
bias=1.0,
alpha=0.001 / 9.0,
norm2 = tf.nn.lrn(conv2,
4,
bias=1.0,
alpha=0.001 / 9.0,
beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2,
pool2 = tf.nn.max_pool(norm2,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='SAME',
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# local3
......@@ -156,9 +156,9 @@ def inference(images, dropout=False):
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights',
weights = _variable_with_weight_decay('weights',
shape=[dim, 384],
stddev=0.04,
stddev=0.04,
wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
......@@ -167,9 +167,9 @@ def inference(images, dropout=False):
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights',
weights = _variable_with_weight_decay('weights',
shape=[384, 192],
stddev=0.04,
stddev=0.04,
wd=0.004)
biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
......@@ -178,11 +178,11 @@ def inference(images, dropout=False):
# compute logits
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights',
weights = _variable_with_weight_decay('weights',
[192, FLAGS.nb_labels],
stddev=1/192.0,
stddev=1/192.0,
wd=0.0)
biases = _variable_on_cpu('biases',
biases = _variable_on_cpu('biases',
[FLAGS.nb_labels],
tf.constant_initializer(0.0))
logits = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
......@@ -194,7 +194,7 @@ def inference_deeper(images, dropout=False):
"""Build a deeper CNN model.
Args:
images: Images returned from distorted_inputs() or inputs().
dropout: Boolean controling whether to use dropout or not
dropout: Boolean controlling whether to use dropout or not
Returns:
Logits
"""
......@@ -386,7 +386,7 @@ def train_op_fun(total_loss, global_step):
"""
# Variables that affect learning rate.
nb_ex_per_train_epoch = int(60000 / FLAGS.nb_teachers)
num_batches_per_epoch = nb_ex_per_train_epoch / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * FLAGS.epochs_per_decay)
......@@ -398,7 +398,7 @@ def train_op_fun(total_loss, global_step):
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = moving_av(total_loss)
......@@ -413,7 +413,7 @@ def train_op_fun(total_loss, global_step):
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
tf.summary.histogram(var.op.name, var)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
......@@ -485,7 +485,7 @@ def train(images, labels, ckpt_path, dropout=False):
train_op = train_op_fun(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
saver = tf.train.Saver(tf.global_variables())
print("Graph constructed and saver created")
......
......@@ -47,7 +47,7 @@ def create_dir_if_needed(dest_directory):
def maybe_download(file_urls, directory):
"""
Download a set of files in temporary local folder
:param directory: the directory where to download
:param directory: the directory where to download
:return: a tuple of filepaths corresponding to the files given as input
"""
# Create directory if doesn't exist
......@@ -73,7 +73,7 @@ def maybe_download(file_urls, directory):
result.append(filepath)
# Test if file already exists
if not gfile.Exists(filepath):
if not tf.gfile.Exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
......@@ -124,7 +124,7 @@ def extract_svhn(local_url):
:return:
"""
with gfile.Open(local_url, mode='r') as file_obj:
with tf.gfile.Open(local_url, mode='r') as file_obj:
# Load MATLAB matrix using scipy IO
dict = loadmat(file_obj)
......
......@@ -64,11 +64,11 @@ def train_teacher(dataset, nb_teachers, teacher_id):
else:
print("Check value of dataset flag")
return False
# Retrieve subset of data for this teacher
data, labels = input.partition_dataset(train_data,
train_labels,
nb_teachers,
data, labels = input.partition_dataset(train_data,
train_labels,
nb_teachers,
teacher_id)
print("Length of training data: " + str(len(labels)))
......
......@@ -152,7 +152,7 @@ class MomentsAccountant(object):
We further assume that at each step, the mechanism operates on a random
sample with sampling probability q = batch_size / total_examples. Then
E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]
By distinguishign two cases of wether D < D' or D' < D, we have
By distinguishing two cases of whether D < D' or D' < D, we have
that
E[exp(L X)] <= max (I1, I2)
where
......@@ -361,12 +361,12 @@ class GaussianMomentsAccountant(MomentsAccountant):
exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
for j in range(t + 1)], dtype=tf.float64)
# x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
x = tf.mul(binomial, signs)
x = tf.multiply(binomial, signs)
# y[i, j] = x[i, j] * exp(exponents[j])
# = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
# Note: this computation is done by broadcasting pointwise multiplication
# between [t+1, t+1] tensor and [t+1] tensor.
y = tf.mul(x, tf.exp(exponents))
y = tf.multiply(x, tf.exp(exponents))
# z[i] = sum_j y[i, j]
# = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
z = tf.reduce_sum(y, 1)
......
# Domain Separation Networks
## Introduction
This code is the code used for the "Domain Separation Networks" paper
by Bousmalis K., Trigeorgis G., et al. which was presented at NIPS 2016. The
paper can be found here: https://arxiv.org/abs/1608.06019.
## Contact
This code was open-sourced by [Konstantinos Bousmalis](https://github.com/bousmalis) (konstantinos@google.com).
## Installation
You will need to have the following installed on your machine before trying out the DSN code.
* Tensorflow: https://www.tensorflow.org/install/
* Bazel: https://bazel.build/
## Important Note
Although we are making the code available, you are only able to use the MNIST
provider for now. We will soon provide a script to download and convert MNIST-M
as well. Check back here in a few weeks or wait for a relevant announcement from
[@bousmalis](https://twitter.com/bousmalis).
## Running the code for adapting MNIST to MNIST-M
In order to run the MNIST to MNIST-M experiments with DANNs and/or DANNs with
domain separation (DSNs) you will need to set the directory you used to download
MNIST and MNIST-M:
```
$ export DSN_DATA_DIR=/your/dir
```
Add models and models/slim to your `$PYTHONPATH`:
```
$ export PYTHONPATH=$PYTHONPATH:$PWD:$PWD/slim
```
Then you need to build the binaries with Bazel:
```
$ bazel build -c opt domain_adaptation/domain_separation/...
```
You can then train with the following command:
```
$ ./bazel-bin/domain_adaptation/domain_separation/dsn_train \
--similarity_loss=dann_loss \
--basic_tower=dann_mnist \
--source_dataset=mnist \
--target_dataset=mnist_m \
--learning_rate=0.0117249 \
--gamma_weight=0.251175 \
--weight_decay=1e-6 \
--layers_to_regularize=fc3 \
--nouse_separation \
--master="" \
--dataset_dir=${DSN_DATA_DIR} \
-v --use_logging
```
Evaluation can be invoked with the following command:
```
$ ./bazel-bin/domain_adaptation/domain_separation/dsn_eval \
-v --dataset mnist_m --split test --num_examples=9001 \
--dataset_dir=${DSN_DATA_DIR}
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment