Merge pull request #1 from tensorflow/master

update to tensorflow/model master

Merge pull request #1 from tensorflow/master
update to tensorflow/model master
68a18b70 · Toby Boyd · GitHub · bc70271a · 2c4fea8d · 68a18b70
Commit 68a18b70 authored Jun 08, 2017 by Toby Boyd Committed by GitHub Jun 08, 2017
20 changed files
--- a/compression/entropy_coder/progressive/__init__.py
+++ b/compression/entropy_coder/progressive/__init__.py
--- a/compression/entropy_coder/progressive/progressive.py
+++ b/compression/entropy_coder/progressive/progressive.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Code probability model used for entropy coding."""
+
+import json
+
+import tensorflow as tf
+
+from entropy_coder.lib import blocks
+from entropy_coder.model import entropy_coder_model
+from entropy_coder.model import model_factory
+
+# pylint: disable=not-callable
+
+
+class BrnnPredictor(blocks.BlockBase):
+  """BRNN prediction applied on one layer."""
+
+  def __init__(self, code_depth, name=None):
+    super(BrnnPredictor, self).__init__(name)
+
+    with self._BlockScope():
+      hidden_depth = 2 * code_depth
+
+      # What is coming from the previous layer/iteration
+      # is going through a regular Conv2D layer as opposed to the binary codes
+      # of the current layer/iteration which are going through a masked
+      # convolution.
+      self._adaptation0 = blocks.RasterScanConv2D(
+          hidden_depth, [7, 7], [1, 1], 'SAME',
+          strict_order=True,
+          bias=blocks.Bias(0), act=tf.tanh)
+      self._adaptation1 = blocks.Conv2D(
+          hidden_depth, [3, 3], [1, 1], 'SAME',
+          bias=blocks.Bias(0), act=tf.tanh)
+      self._predictor = blocks.CompositionOperator([
+          blocks.LineOperator(
+              blocks.RasterScanConv2DLSTM(
+                  depth=hidden_depth,
+                  filter_size=[1, 3],
+                  hidden_filter_size=[1, 3],
+                  strides=[1, 1],
+                  padding='SAME')),
+          blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
+                        bias=blocks.Bias(0), act=tf.tanh),
+          blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
+                        bias=blocks.Bias(0), act=tf.tanh)
+      ])
+
+  def _Apply(self, x, s):
+    # Code estimation using both:
+    # - the state from the previous iteration/layer,
+    # - the binary codes that are before in raster scan order.
+    h = tf.concat(values=[self._adaptation0(x), self._adaptation1(s)], axis=3)
+
+    estimated_codes = self._predictor(h)
+
+    return estimated_codes
+
+
+class LayerPrediction(blocks.BlockBase):
+  """Binary code prediction for one layer."""
+
+  def __init__(self, layer_count, code_depth, name=None):
+    super(LayerPrediction, self).__init__(name)
+
+    self._layer_count = layer_count
+
+    # No previous layer.
+    self._layer_state = None
+    self._current_layer = 0
+
+    with self._BlockScope():
+      # Layers used to do the conditional code prediction.
+      self._brnn_predictors = []
+      for _ in xrange(layer_count):
+        self._brnn_predictors.append(BrnnPredictor(code_depth))
+
+      # Layers used to generate the input of the LSTM operating on the
+      # iteration/depth domain.
+      hidden_depth = 2 * code_depth
+      self._state_blocks = []
+      for _ in xrange(layer_count):
+        self._state_blocks.append(blocks.CompositionOperator([
+            blocks.Conv2D(
+                hidden_depth, [3, 3], [1, 1], 'SAME',
+                bias=blocks.Bias(0), act=tf.tanh),
+            blocks.Conv2D(
+                code_depth, [3, 3], [1, 1], 'SAME',
+                bias=blocks.Bias(0), act=tf.tanh)
+        ]))
+
+      # Memory of the RNN is equivalent to the size of 2 layers of binary
+      # codes.
+      hidden_depth = 2 * code_depth
+      self._layer_rnn = blocks.CompositionOperator([
+          blocks.Conv2DLSTM(
+              depth=hidden_depth,
+              filter_size=[1, 1],
+              hidden_filter_size=[1, 1],
+              strides=[1, 1],
+              padding='SAME'),
+          blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
+                        bias=blocks.Bias(0), act=tf.tanh),
+          blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
+                        bias=blocks.Bias(0), act=tf.tanh)
+      ])
+
+  def _Apply(self, x):
+    assert self._current_layer < self._layer_count
+
+    # Layer state is set to 0 when there is no previous iteration.
+    if self._layer_state is None:
+      self._layer_state = tf.zeros_like(x, dtype=tf.float32)
+
+    # Code estimation using both:
+    # - the state from the previous iteration/layer,
+    # - the binary codes that are before in raster scan order.
+    estimated_codes = self._brnn_predictors[self._current_layer](
+        x, self._layer_state)
+
+    # Compute the updated layer state.
+    h = self._state_blocks[self._current_layer](x)
+    self._layer_state = self._layer_rnn(h)
+    self._current_layer += 1
+
+    return estimated_codes
+
+
+class ProgressiveModel(entropy_coder_model.EntropyCoderModel):
+  """Progressive BRNN entropy coder model."""
+
+  def __init__(self):
+    super(ProgressiveModel, self).__init__()
+
+  def Initialize(self, global_step, optimizer, config_string):
+    if config_string is None:
+      raise ValueError('The progressive model requires a configuration.')
+    config = json.loads(config_string)
+    if 'coded_layer_count' not in config:
+      config['coded_layer_count'] = 0
+
+    self._config = config
+    self._optimizer = optimizer
+    self._global_step = global_step
+
+  def BuildGraph(self, input_codes):
+    """Build the graph corresponding to the progressive BRNN model."""
+    layer_depth = self._config['layer_depth']
+    layer_count = self._config['layer_count']
+
+    code_shape = input_codes.get_shape()
+    code_depth = code_shape[-1].value
+    if self._config['coded_layer_count'] > 0:
+      prefix_depth = self._config['coded_layer_count'] * layer_depth
+      if code_depth < prefix_depth:
+        raise ValueError('Invalid prefix depth: {} VS {}'.format(
+            prefix_depth, code_depth))
+      input_codes = input_codes[:, :, :, :prefix_depth]
+
+    code_shape = input_codes.get_shape()
+    code_depth = code_shape[-1].value
+    if code_depth % layer_depth != 0:
+      raise ValueError(
+          'Code depth must be a multiple of the layer depth: {} vs {}'.format(
+              code_depth, layer_depth))
+    code_layer_count = code_depth // layer_depth
+    if code_layer_count > layer_count:
+      raise ValueError('Input codes have too many layers: {}, max={}'.format(
+          code_layer_count, layer_count))
+
+    # Block used to estimate binary codes.
+    layer_prediction = LayerPrediction(layer_count, layer_depth)
+
+    # Block used to compute code lengths.
+    code_length_block = blocks.CodeLength()
+
+    # Loop over all the layers.
+    code_length = []
+    code_layers = tf.split(
+        value=input_codes, num_or_size_splits=code_layer_count, axis=3)
+    for k in xrange(code_layer_count):
+      x = code_layers[k]
+      predicted_x = layer_prediction(x)
+      # Saturate the prediction to avoid infinite code length.
+      epsilon = 0.001
+      predicted_x = tf.clip_by_value(
+          predicted_x, -1 + epsilon, +1 - epsilon)
+      code_length.append(code_length_block(
+          blocks.ConvertSignCodeToZeroOneCode(x),
+          blocks.ConvertSignCodeToZeroOneCode(predicted_x)))
+      tf.summary.scalar('code_length_layer_{:02d}'.format(k), code_length[-1])
+    code_length = tf.stack(code_length)
+    self.loss = tf.reduce_mean(code_length)
+    tf.summary.scalar('loss', self.loss)
+
+    # Loop over all the remaining layers just to make sure they are
+    # instantiated. Otherwise, loading model params could fail.
+    dummy_x = tf.zeros_like(code_layers[0])
+    for _ in xrange(layer_count - code_layer_count):
+      dummy_predicted_x = layer_prediction(dummy_x)
+
+    # Average bitrate over total_line_count.
+    self.average_code_length = tf.reduce_mean(code_length)
+
+    if self._optimizer:
+      optim_op = self._optimizer.minimize(self.loss,
+                                          global_step=self._global_step)
+      block_updates = blocks.CreateBlockUpdates()
+      if block_updates:
+        with tf.get_default_graph().control_dependencies([optim_op]):
+          self.train_op = tf.group(*block_updates)
+      else:
+        self.train_op = optim_op
+    else:
+      self.train_op = None
+
+  def GetConfigStringForUnitTest(self):
+    s = '{\n'
+    s += '"layer_depth": 1,\n'
+    s += '"layer_count": 8\n'
+    s += '}\n'
+    return s
+
+
+@model_factory.RegisterEntropyCoderModel('progressive')
+def CreateProgressiveModel():
+  return ProgressiveModel()
--- a/compression/image_encoder/README.md
+++ b/compression/image_encoder/README.md
+# Image Compression with Neural Networks
+
+This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and
+decompressing images using an already trained  Residual GRU model as descibed
+in [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148). Please consult the paper for more details
+on the architecture and compression results.
+
+This code will allow you to perform the lossy compression on an model
+already trained on compression. This code doesn't not currently contain the
+Entropy Coding portions of our paper.
+
+
+## Prerequisites
+The only software requirements for running the encoder and decoder is having
+Tensorflow installed. You will also need to [download](http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz)
+and extract the model residual_gru.pb.
+
+If you want to generate the perceptual similarity under MS-SSIM, you will also
+need to [Install SciPy](https://www.scipy.org/install.html).
+
+## Encoding
+The Residual GRU network is fully convolutional, but requires the images
+height and width in pixels by a multiple of 32. There is an image in this folder
+called example.png that is 768x1024 if one is needed for testing. We also
+rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at
+time of release.
+
+To encode an image, simply run the following command:
+
+`python encoder.py --input_image=/your/image/here.png
+--output_codes=output_codes.npz --iteration=15
+--model=/path/to/model/residual_gru.pb
+`
+
+The iteration parameter specifies the lossy-quality to target for compression.
+The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per
+pixel) bpp and every increment results in an additional 1/8 bpp.
+
+| Iteration | BPP | Compression Ratio |
+|---: |---: |---: |
+|0 | 0.125 | 192:1|
+|1 | 0.250 | 96:1|
+|2 | 0.375 | 64:1|
+|3 | 0.500 | 48:1|
+|4 | 0.625 | 38.4:1|
+|5 | 0.750 | 32:1|
+|6 | 0.875 | 27.4:1|
+|7 | 1.000 | 24:1|
+|8 | 1.125 | 21.3:1|
+|9 | 1.250 | 19.2:1|
+|10 | 1.375 | 17.4:1|
+|11 | 1.500 | 16:1|
+|12 | 1.625 | 14.7:1|
+|13 | 1.750 | 13.7:1|
+|14 | 1.875 | 12.8:1|
+|15 | 2.000 | 12:1|
+
+The output_codes file contains the numpy shape and a flattened, bit-packed
+array of the codes. These can be inspected in python by using numpy.load().
+
+
+## Decoding
+After generating codes for an image, the lossy reconstructions for that image
+can be done as follows:
+
+`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/
+--model=residual_gru.pb`
+
+The output_directory will contain images decoded at each quality level.
+
+
+## Comparing Similarity
+One of our primary metrics for comparing how similar two images are
+is MS-SSIM.
+
+To generate these metrics on your images you can run:
+`python msssim.py --original_image=/path/to/your/image.png
+--compared_image=/tmp/decoded/image_15.png`
+
+
+## Results
+CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can 
+are available for reference. Each row of the CSV represents each of the Kodak
+images in their dataset number (1-24). Each column of the CSV represents each
+iteration of the model (1-16).
+
+[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv)
+
+[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv)
+
+
+## FAQ
+
+#### How do I train my own compression network?
+We currently don't provide the code to build and train a compression
+graph from scratch.
+
+#### I get an InvalidArgumentError: Incompatible shapes.
+This is usually due to the fact that our network only supports images that are
+both height and width divisible by 32 pixel. Try padding your images to 32
+pixel boundaries.
+
+
+## Contact Info
+Model repository maintained by Nick Johnston ([nickj-google](https://github.com/nickj-google)).
--- a/compression/decoder.py
+++ b/compression/decoder.py
--- a/compression/encoder.py
+++ b/compression/encoder.py
--- a/compression/example.png
+++ b/compression/example.png
--- a/compression/msssim.py
+++ b/compression/msssim.py
--- a/differential_privacy/README.md
+++ b/differential_privacy/README.md
@@ -3,7 +3,7 @@
 Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)


-###Introduction for dp_sgd/README.md
+### Introduction for [dp_sgd/README.md](dp_sgd/README.md)

 Machine learning techniques based on neural networks are achieving remarkable 
 results in a wide variety of domains. Often, the training of models requires 
@@ -18,7 +18,7 @@ manageable cost in software complexity, training efficiency, and model quality.
 paper: https://arxiv.org/abs/1607.00133


-###Introduction for multiple_teachers/README.md
+### Introduction for [multiple_teachers/README.md](multiple_teachers/README.md)

 This repository contains code to create a setup for learning privacy-preserving 
 student models by transferring knowledge from an ensemble of teachers trained 

--- a/differential_privacy/dp_sgd/README.md
+++ b/differential_privacy/dp_sgd/README.md
@@ -8,14 +8,14 @@ Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)

 <Introduction>

-Machine learning techniques based on neural networks are achieving remarkable 
-results in a wide variety of domains. Often, the training of models requires 
-large, representative datasets, which may be crowdsourced and contain sensitive 
-information. The models should not expose private information in these datasets. 
-Addressing this goal, we develop new algorithmic techniques for learning and a 
-refined analysis of privacy costs within the framework of differential privacy. 
-Our implementation and experiments demonstrate that we can train deep neural 
-networks with non-convex objectives, under a modest privacy budget, and at a 
+Machine learning techniques based on neural networks are achieving remarkable
+results in a wide variety of domains. Often, the training of models requires
+large, representative datasets, which may be crowdsourced and contain sensitive
+information. The models should not expose private information in these datasets.
+Addressing this goal, we develop new algorithmic techniques for learning and a
+refined analysis of privacy costs within the framework of differential privacy.
+Our implementation and experiments demonstrate that we can train deep neural
+networks with non-convex objectives, under a modest privacy budget, and at a
 manageable cost in software complexity, training efficiency, and model quality.

 paper: https://arxiv.org/abs/1607.00133
@@ -46,7 +46,7 @@ https://github.com/panyx0718/models/tree/master/slim
 # Download the data to the data/ directory.

 # List the codes.
-ls -R differential_privacy/
+$ ls -R differential_privacy/
 differential_privacy/:
 dp_sgd  __init__.py  privacy_accountant  README.md

@@ -72,16 +72,16 @@ differential_privacy/privacy_accountant/tf:
 accountant.py  accountant_test.py  BUILD

 # List the data.
-ls -R data/
+$ ls -R data/

 ./data:
 mnist_test.tfrecord  mnist_train.tfrecord

 # Build the codes.
-bazel build -c opt differential_privacy/...
+$ bazel build -c opt differential_privacy/...

 # Run the mnist differntial privacy training codes.
-bazel-bin/differential_privacy/dp_sgd/dp_mnist/dp_mnist \
+$ bazel-bin/differential_privacy/dp_sgd/dp_mnist/dp_mnist \
    --training_data_path=data/mnist_train.tfrecord \
    --eval_data_path=data/mnist_test.tfrecord \
    --save_path=/tmp/mnist_dir
@@ -102,6 +102,6 @@ train_accuracy: 0.53
 eval_accuracy: 0.53
 ...

-ls /tmp/mnist_dir/
+$ ls /tmp/mnist_dir/
 checkpoint  ckpt  ckpt.meta  results-0.json
 ```
--- a/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
+++ b/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
@@ -273,7 +273,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
        images, network_parameters)

    cost = tf.nn.softmax_cross_entropy_with_logits(
-        logits, tf.one_hot(labels, 10))
+        logits=logits, labels=tf.one_hot(labels, 10))

    # The actual cost is the average across the examples.
    cost = tf.reduce_sum(cost, [0]) / batch_size
@@ -343,7 +343,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,

    # We need to maintain the intialization sequence.
    for v in tf.trainable_variables():
-      sess.run(tf.initialize_variables([v]))
+      sess.run(tf.variables_initializer([v]))
    sess.run(tf.global_variables_initializer())
    sess.run(init_ops)


--- a/differential_privacy/dp_sgd/dp_optimizer/dp_pca.py
+++ b/differential_privacy/dp_sgd/dp_optimizer/dp_pca.py
@@ -27,7 +27,7 @@ def ComputeDPPrincipalProjection(data, projection_dims,
  Args:
    data: the input data, each row is a data vector.
    projection_dims: the projection dimension.
-    sanitizer: the sanitizer used for acheiving privacy.
+    sanitizer: the sanitizer used for achieving privacy.
    eps_delta: (eps, delta) pair.
    sigma: if not None, use noise sigma; otherwise compute it using
      eps_delta pair.

--- a/differential_privacy/dp_sgd/dp_optimizer/utils.py
+++ b/differential_privacy/dp_sgd/dp_optimizer/utils.py
@@ -233,10 +233,11 @@ def BatchClipByL2norm(t, upper_bound, name=None):
  """

  assert upper_bound > 0
-  with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
+  with tf.name_scope(values=[t, upper_bound], name=name,
+                     default_name="batch_clip_by_l2norm") as name:
    saved_shape = tf.shape(t)
    batch_size = tf.slice(saved_shape, [0], [1])
-    t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
+    t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
                              tf.constant(1.0/upper_bound))
    # Add a small number to avoid divide by 0
@@ -264,9 +265,10 @@ def SoftThreshold(t, threshold_ratio, name=None):
  """

  assert threshold_ratio >= 0
-  with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name:
+  with tf.name_scope(values=[t, threshold_ratio], name=name,
+                     default_name="soft_thresholding") as name:
    saved_shape = tf.shape(t)
-    t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1]))
+    t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
    t_abs = tf.abs(t2)
    t_x = tf.sign(t2) * tf.nn.relu(t_abs -
                                   (tf.reduce_mean(t_abs, [0],
@@ -286,7 +288,8 @@ def AddGaussianNoise(t, sigma, name=None):
    the noisy tensor.
  """

-  with tf.op_scope([t, sigma], name, "add_gaussian_noise") as name:
+  with tf.name_scope(values=[t, sigma], name=name,
+                     default_name="add_gaussian_noise") as name:
    noisy_t = t + tf.random_normal(tf.shape(t), stddev=sigma)
  return noisy_t


--- a/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
+++ b/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
@@ -189,7 +189,7 @@ class MatMulPXG(object):
    z_grads, = z_grads
    x_expanded = tf.expand_dims(x, 2)
    z_grads_expanded = tf.expand_dims(z_grads, 1)
-    return tf.mul(x_expanded, z_grads_expanded)
+    return tf.multiply(x_expanded, z_grads_expanded)


 pxg_registry.Register("MatMul", MatMulPXG)
@@ -245,7 +245,7 @@ class Conv2DPXG(object):
      num_x = int(conv_x.get_shape()[0])
      assert num_x == 1, num_x
    assert len(conv_px) == batch_size
-    conv = tf.concat(0, conv_px)
+    conv = tf.concat(axis=0, values=conv_px)
    assert int(conv.get_shape()[0]) == batch_size
    return conv, w_px

@@ -274,7 +274,7 @@ class Conv2DPXG(object):
                                  self.colocate_gradients_with_ops,
                                  gate_gradients=self.gate_gradients)

-    return tf.pack(gradients_list)
+    return tf.stack(gradients_list)

 pxg_registry.Register("Conv2D", Conv2DPXG)


--- a/differential_privacy/multiple_teachers/analysis.py
+++ b/differential_privacy/multiple_teachers/analysis.py
@@ -216,10 +216,10 @@ def main(unused_argv):
  # If we are reproducing results from paper https://arxiv.org/abs/1610.05755,
  # download the required binaries with label information.
  ##################################################################
-  
+
  # Binaries for MNIST results
  paper_binaries_mnist = \
-    ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true", 
+    ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true",
    "https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_100_indices_used_by_student.npy?raw=true"]
  if FLAGS.counts_file == "mnist_250_teachers_labels.npy" \
    or FLAGS.indices_file == "mnist_250_teachers_100_indices_used_by_student.npy":
@@ -254,7 +254,7 @@ def main(unused_argv):
  total_log_mgf_nm = np.array([0.0 for _ in l_list])
  total_ss_nm = np.array([0.0 for _ in l_list])
  noise_eps = FLAGS.noise_eps
-  
+
  for i in indices:
    total_log_mgf_nm += np.array(
        [logmgf_from_counts(counts_mat[i], noise_eps, l)
@@ -287,7 +287,7 @@ def main(unused_argv):
  if min(eps_list_nm) == eps_list_nm[-1]:
    print "Warning: May not have used enough values of l"

-  # Data indpendent bound, as mechanism is
+  # Data independent bound, as mechanism is
  # 2*noise_eps DP.
  data_ind_log_mgf = np.array([0.0 for _ in l_list])
  data_ind_log_mgf += num_examples * np.array(

--- a/differential_privacy/multiple_teachers/deep_cnn.py
+++ b/differential_privacy/multiple_teachers/deep_cnn.py
@@ -75,7 +75,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd):
  var = _variable_on_cpu(name, shape,
                         tf.truncated_normal_initializer(stddev=stddev))
  if wd is not None:
-    weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
+    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var

@@ -84,7 +84,7 @@ def inference(images, dropout=False):
  """Build the CNN model.
  Args:
    images: Images returned from distorted_inputs() or inputs().
-    dropout: Boolean controling whether to use dropout or not
+    dropout: Boolean controlling whether to use dropout or not
  Returns:
    Logits
  """
@@ -95,9 +95,9 @@ def inference(images, dropout=False):

  # conv1
  with tf.variable_scope('conv1') as scope:
-    kernel = _variable_with_weight_decay('weights', 
+    kernel = _variable_with_weight_decay('weights',
                                         shape=first_conv_shape,
-                                         stddev=1e-4, 
+                                         stddev=1e-4,
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
@@ -108,25 +108,25 @@ def inference(images, dropout=False):


  # pool1
-  pool1 = tf.nn.max_pool(conv1, 
-                         ksize=[1, 3, 3, 1], 
+  pool1 = tf.nn.max_pool(conv1,
+                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
-                         padding='SAME', 
+                         padding='SAME',
                         name='pool1')
-  
+
  # norm1
-  norm1 = tf.nn.lrn(pool1, 
-                    4, 
-                    bias=1.0, 
-                    alpha=0.001 / 9.0, 
+  norm1 = tf.nn.lrn(pool1,
+                    4,
+                    bias=1.0,
+                    alpha=0.001 / 9.0,
                    beta=0.75,
                    name='norm1')

  # conv2
  with tf.variable_scope('conv2') as scope:
-    kernel = _variable_with_weight_decay('weights', 
+    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 128],
-                                         stddev=1e-4, 
+                                         stddev=1e-4,
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
@@ -137,18 +137,18 @@ def inference(images, dropout=False):


  # norm2
-  norm2 = tf.nn.lrn(conv2, 
-                    4, 
-                    bias=1.0, 
-                    alpha=0.001 / 9.0, 
+  norm2 = tf.nn.lrn(conv2,
+                    4,
+                    bias=1.0,
+                    alpha=0.001 / 9.0,
                    beta=0.75,
                    name='norm2')
-  
+
  # pool2
-  pool2 = tf.nn.max_pool(norm2, 
+  pool2 = tf.nn.max_pool(norm2,
                         ksize=[1, 3, 3, 1],
-                         strides=[1, 2, 2, 1], 
-                         padding='SAME', 
+                         strides=[1, 2, 2, 1],
+                         padding='SAME',
                         name='pool2')

  # local3
@@ -156,9 +156,9 @@ def inference(images, dropout=False):
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
    dim = reshape.get_shape()[1].value
-    weights = _variable_with_weight_decay('weights', 
+    weights = _variable_with_weight_decay('weights',
                                          shape=[dim, 384],
-                                          stddev=0.04, 
+                                          stddev=0.04,
                                          wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
@@ -167,9 +167,9 @@ def inference(images, dropout=False):

  # local4
  with tf.variable_scope('local4') as scope:
-    weights = _variable_with_weight_decay('weights', 
+    weights = _variable_with_weight_decay('weights',
                                          shape=[384, 192],
-                                          stddev=0.04, 
+                                          stddev=0.04,
                                          wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
@@ -178,11 +178,11 @@ def inference(images, dropout=False):

  # compute logits
  with tf.variable_scope('softmax_linear') as scope:
-    weights = _variable_with_weight_decay('weights', 
+    weights = _variable_with_weight_decay('weights',
                                          [192, FLAGS.nb_labels],
-                                          stddev=1/192.0, 
+                                          stddev=1/192.0,
                                          wd=0.0)
-    biases = _variable_on_cpu('biases', 
+    biases = _variable_on_cpu('biases',
                              [FLAGS.nb_labels],
                              tf.constant_initializer(0.0))
    logits = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
@@ -194,7 +194,7 @@ def inference_deeper(images, dropout=False):
  """Build a deeper CNN model.
  Args:
    images: Images returned from distorted_inputs() or inputs().
-    dropout: Boolean controling whether to use dropout or not
+    dropout: Boolean controlling whether to use dropout or not
  Returns:
    Logits
  """
@@ -386,7 +386,7 @@ def train_op_fun(total_loss, global_step):
  """
  # Variables that affect learning rate.
  nb_ex_per_train_epoch = int(60000 / FLAGS.nb_teachers)
-  
+
  num_batches_per_epoch = nb_ex_per_train_epoch / FLAGS.batch_size
  decay_steps = int(num_batches_per_epoch * FLAGS.epochs_per_decay)

@@ -398,7 +398,7 @@ def train_op_fun(total_loss, global_step):
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
-  tf.scalar_summary('learning_rate', lr)
+  tf.summary.scalar('learning_rate', lr)

  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = moving_av(total_loss)
@@ -413,7 +413,7 @@ def train_op_fun(total_loss, global_step):

  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
-    tf.histogram_summary(var.op.name, var)
+    tf.summary.histogram(var.op.name, var)

  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
@@ -485,7 +485,7 @@ def train(images, labels, ckpt_path, dropout=False):
    train_op = train_op_fun(loss, global_step)

    # Create a saver.
-    saver = tf.train.Saver(tf.all_variables())
+    saver = tf.train.Saver(tf.global_variables())

    print("Graph constructed and saver created")


--- a/differential_privacy/multiple_teachers/input.py
+++ b/differential_privacy/multiple_teachers/input.py
@@ -47,7 +47,7 @@ def create_dir_if_needed(dest_directory):
 def maybe_download(file_urls, directory):
  """
  Download a set of files in temporary local folder
-  :param directory: the directory where to download 
+  :param directory: the directory where to download
  :return: a tuple of filepaths corresponding to the files given as input
  """
  # Create directory if doesn't exist
@@ -73,7 +73,7 @@ def maybe_download(file_urls, directory):
    result.append(filepath)

    # Test if file already exists
-    if not gfile.Exists(filepath):
+    if not tf.gfile.Exists(filepath):
      def _progress(count, block_size, total_size):
        sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
            float(count * block_size) / float(total_size) * 100.0))
@@ -124,7 +124,7 @@ def extract_svhn(local_url):
  :return:
  """

-  with gfile.Open(local_url, mode='r') as file_obj:
+  with tf.gfile.Open(local_url, mode='r') as file_obj:
    # Load MATLAB matrix using scipy IO
    dict = loadmat(file_obj)


--- a/differential_privacy/multiple_teachers/train_teachers.py
+++ b/differential_privacy/multiple_teachers/train_teachers.py
@@ -64,11 +64,11 @@ def train_teacher(dataset, nb_teachers, teacher_id):
  else:
    print("Check value of dataset flag")
    return False
-    
+
  # Retrieve subset of data for this teacher
-  data, labels = input.partition_dataset(train_data, 
-                                         train_labels, 
-                                         nb_teachers, 
+  data, labels = input.partition_dataset(train_data,
+                                         train_labels,
+                                         nb_teachers,
                                         teacher_id)

  print("Length of training data: " + str(len(labels)))

--- a/differential_privacy/privacy_accountant/tf/accountant.py
+++ b/differential_privacy/privacy_accountant/tf/accountant.py
@@ -152,7 +152,7 @@ class MomentsAccountant(object):
  We further assume that at each step, the mechanism operates on a random
  sample with sampling probability q = batch_size / total_examples. Then
    E[exp(L X)] = E[(Pr[M(D)==x / Pr[M(D')==x])^L]
-  By distinguishign two cases of wether D < D' or D' < D, we have
+  By distinguishing two cases of whether D < D' or D' < D, we have
  that
    E[exp(L X)] <= max (I1, I2)
  where
@@ -361,12 +361,12 @@ class GaussianMomentsAccountant(MomentsAccountant):
    exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
                             for j in range(t + 1)], dtype=tf.float64)
    # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
-    x = tf.mul(binomial, signs)
+    x = tf.multiply(binomial, signs)
    # y[i, j] = x[i, j] * exp(exponents[j])
    #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    # Note: this computation is done by broadcasting pointwise multiplication
    # between [t+1, t+1] tensor and [t+1] tensor.
-    y = tf.mul(x, tf.exp(exponents))
+    y = tf.multiply(x, tf.exp(exponents))
    # z[i] = sum_j y[i, j]
    #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
    z = tf.reduce_sum(y, 1)

--- a/domain_adaptation/README.md
+++ b/domain_adaptation/README.md
+# Domain Separation Networks
+
+
+## Introduction
+This code is the code used for the "Domain Separation Networks" paper
+by Bousmalis K., Trigeorgis G., et al. which was presented at NIPS 2016. The
+paper can be found here: https://arxiv.org/abs/1608.06019.
+
+## Contact
+This code was open-sourced by [Konstantinos Bousmalis](https://github.com/bousmalis) (konstantinos@google.com).
+
+## Installation
+You will need to have the following installed on your machine before trying out the DSN code.
+
+*  Tensorflow: https://www.tensorflow.org/install/
+*  Bazel: https://bazel.build/
+
+## Important Note
+Although we are making the code available, you are only able to use the MNIST
+provider for now. We will soon provide a script to download and convert MNIST-M
+as well. Check back here in a few weeks or wait for a relevant announcement from
+[@bousmalis](https://twitter.com/bousmalis).
+
+## Running the code for adapting MNIST to MNIST-M
+In order to run the MNIST to MNIST-M experiments with DANNs and/or DANNs with
+domain separation (DSNs) you will need to set the directory you used to download
+MNIST and MNIST-M:
+
+```
+$ export DSN_DATA_DIR=/your/dir
+```
+
+Add models and models/slim to your `$PYTHONPATH`:
+
+```
+$ export PYTHONPATH=$PYTHONPATH:$PWD:$PWD/slim
+```
+
+Then you need to build the binaries with Bazel:
+
+```
+$ bazel build -c opt domain_adaptation/domain_separation/...
+```
+
+You can then train with the following command:
+
+```
+$ ./bazel-bin/domain_adaptation/domain_separation/dsn_train  \
+      --similarity_loss=dann_loss  \
+      --basic_tower=dann_mnist  \
+      --source_dataset=mnist  \
+      --target_dataset=mnist_m  \
+      --learning_rate=0.0117249  \
+      --gamma_weight=0.251175  \
+      --weight_decay=1e-6  \
+      --layers_to_regularize=fc3  \
+      --nouse_separation  \
+      --master=""  \
+      --dataset_dir=${DSN_DATA_DIR}  \
+      -v --use_logging
+```
+
+Evaluation can be invoked with the following command:
+
+```
+$ ./bazel-bin/domain_adaptation/domain_separation/dsn_eval  \
+    -v --dataset mnist_m --split test --num_examples=9001  \
+    --dataset_dir=${DSN_DATA_DIR}
+```
--- a/domain_adaptation/WORKSPACE
+++ b/domain_adaptation/WORKSPACE