Merge pull request #3862 from ilyamironov/master

Added new model (differential_privacy/pate)

Merge pull request #3862 from ilyamironov/master
Added new model (differential_privacy/pate)
8d6663c7 · Lukasz Kaiser · GitHub · abd50423 · f2b07ae8 · 8d6663c7
Unverified Commit 8d6663c7 authored Apr 03, 2018 by Lukasz Kaiser Committed by GitHub Apr 03, 2018
17 changed files
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -10,7 +10,7 @@
 /research/compression/ @nmjohn
 /research/deeplab/ @aquariusjay @yknzhu @gpapan
 /research/delf/ @andrefaraujo
-/research/differential_privacy/ @panyx0718
+/research/differential_privacy/ @panyx0718 @mironov
 /research/domain_adaptation/ @bousmalis @dmrd
 /research/gan/ @joel-shor
 /research/im2txt/ @cshallue

--- a/research/README.md
+++ b/research/README.md
@@ -28,8 +28,8 @@ installation](https://www.tensorflow.org/install).
    pre-trained Residual GRU network.
 -   [deeplab](deeplab): deep labelling for semantic image segmentation.
 -   [delf](delf): deep local features for image matching and retrieval.
-   [differential_privacy](differential_privacy): privacy-preserving student
+-   [differential_privacy](differential_privacy): differential privacy for training
-    models from multiple teachers.
+    data.
 -   [domain_adaptation](domain_adaptation): domain separation networks.
 -   [gan](gan): generative adversarial networks.
 -   [im2txt](im2txt): image-to-text neural network for image captioning.

--- a/research/differential_privacy/pate/ICLR2018/README.md
+++ b/research/differential_privacy/pate/ICLR2018/README.md
+Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas
+Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar
+Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908).
+### Requirements
+* Python, version &ge; 2.7
+* absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
+* matplotlib
+* numpy
+* scipy
+* sympy (for smooth sensitivity analysis)  
+* write access to current directory (otherwise, output directories in download.py and *.sh scripts
+must be changed)
+## Reproducing Figures 1 and 5, and Table 2
+Before running any of the analysis scripts, create the data/ directory and download votes files by running\
+`$ python download.py`
+To generate Figures 1 and 5 run\
+`$ sh generate_figures.sh`\
+The output is written to the figures/ directory.
+For Table 2 run (may take several hours)\
+`$ sh generate_table.sh`\
+The output is written to the console.
+For data-independent bounds (for comparing with Table 2), run\
+`$ sh generate_table_data_independent.sh`\
+The output is written to the console.
+## Files in this directory
+*   generate_figures.sh --- Master script for generating Figures 1 and 5.
+*   generate_table.sh --- Master script for generating Table 2.
+*   generate_table_data_independent.sh --- Master script for computing data-independent
+    bounds.
+*   rdp_bucketized.py --- Script for producing Figures 1 (right) and 5 (right).
+*   rdp_cumulative.py --- Script for producing Figure 1 (left, middle), Figure 5
+    (left), and partition.pdf (a detailed breakdown of privacy costs per
+    source).
+*   smooth_sensitivity_table.py --- Script for generating Table 2.
+*   rdp_flow.py and plot_ls_q.py are currently not used.
+*   download.py --- Utility script for populating the data/ directory.
+All Python files take flags. Run script_name.py --help for help on flags.
--- a/research/differential_privacy/pate/ICLR2018/download.py
+++ b/research/differential_privacy/pate/ICLR2018/download.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to download votes files to the data/ directory.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import urllib
+import os
+import tarfile
+FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz'
+DATA_DIR = 'data/'
+def download():
+  print('Downloading ' + FILE_URI)
+  tar_filename, _ = urllib.request.urlretrieve(FILE_URI)
+  print('Unpacking ' + tar_filename)
+  with tarfile.open(tar_filename, "r:gz") as tar:
+    tar.extractall(DATA_DIR)
+  print('Done!')
+if __name__ == '__main__':
+  if not os.path.exists(DATA_DIR):
+    print('Data directory does not exist. Creating ' + DATA_DIR)
+    os.makedirs(DATA_DIR)
+  download()
--- a/research/differential_privacy/pate/ICLR2018/generate_figures.sh
+++ b/research/differential_privacy/pate/ICLR2018/generate_figures.sh
+#!/bin/bash
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+counts_file="data/glyph_5000_teachers.npy"
+output_dir="figures/"
+executable1="python rdp_bucketized.py"
+executable2="python rdp_cumulative.py"
+mkdir -p $output_dir
+if [ ! -d "$output_dir" ]; then
+  echo "Directory $output_dir does not exist."
+  exit 1
+fi
+$executable1 \
+  --plot=small \
+  --counts_file=$counts_file \
+  --plot_file=$output_dir"noisy_thresholding_check_perf.pdf"
+$executable1 \
+  --plot=large \
+  --counts_file=$counts_file \
+  --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf"
+$executable2 \
+  --cache=False \
+  --counts_file=$counts_file \
+  --figures_dir=$output_dir
--- a/research/differential_privacy/pate/ICLR2018/generate_table.sh
+++ b/research/differential_privacy/pate/ICLR2018/generate_table.sh
+#!/bin/bash
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+echo "Reproducing Table 2. Takes a couple of hours."
+executable="python smooth_sensitivity_table.py"
+data_dir="data"
+echo
+echo "######## MNIST ########"
+echo
+$executable \
+  --counts_file=$data_dir"/mnist_250_teachers.npy" \
+  --threshold=200 \
+  --sigma1=150 \
+  --sigma2=40 \
+  --queries=640 \
+  --delta=1e-5
+echo
+echo "######## SVHN ########"
+echo
+$executable \
+  --counts_file=$data_dir"/svhn_250_teachers.npy" \
+  --threshold=300 \
+  --sigma1=200 \
+  --sigma2=40 \
+  --queries=8500 \
+  --delta=1e-6
+echo
+echo "######## Adult ########"
+echo
+$executable \
+  --counts_file=$data_dir"/adult_250_teachers.npy" \
+  --threshold=300 \
+  --sigma1=200 \
+  --sigma2=40 \
+  --queries=1500 \
+  --delta=1e-5
+echo
+echo "######## Glyph (Confident) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_5000_teachers.npy" \
+  --threshold=1000 \
+  --sigma1=500 \
+  --sigma2=100 \
+  --queries=12000 \
+  --delta=1e-8
+echo
+echo "######## Glyph (Interactive, Round 1) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_round1.npy" \
+  --threshold=3500 \
+  --sigma1=1500 \
+  --sigma2=100 \
+  --delta=1e-8
+echo
+echo "######## Glyph (Interactive, Round 2) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_round2.npy" \
+  --baseline_file=$data_dir"/glyph_round2_student.npy" \
+  --threshold=3500 \
+  --sigma1=2000 \
+  --sigma2=200 \
+  --teachers=5000 \
+  --delta=1e-8
--- a/research/differential_privacy/pate/ICLR2018/generate_table_data_independent.sh
+++ b/research/differential_privacy/pate/ICLR2018/generate_table_data_independent.sh
+#!/bin/bash
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+echo "Table 2 with data-independent analysis."
+executable="python smooth_sensitivity_table.py"
+data_dir="data"
+echo
+echo "######## MNIST ########"
+echo
+$executable \
+  --counts_file=$data_dir"/mnist_250_teachers.npy" \
+  --threshold=200 \
+  --sigma1=150 \
+  --sigma2=40 \
+  --queries=640 \
+  --delta=1e-5 \
+  --data_independent
+echo
+echo "######## SVHN ########"
+echo
+$executable \
+  --counts_file=$data_dir"/svhn_250_teachers.npy" \
+  --threshold=300 \
+  --sigma1=200 \
+  --sigma2=40 \
+  --queries=8500 \
+  --delta=1e-6 \
+  --data_independent
+echo
+echo "######## Adult ########"
+echo
+$executable \
+  --counts_file=$data_dir"/adult_250_teachers.npy" \
+  --threshold=300 \
+  --sigma1=200 \
+  --sigma2=40 \
+  --queries=1500 \
+  --delta=1e-5 \
+  --data_independent
+echo
+echo "######## Glyph (Confident) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_5000_teachers.npy" \
+  --threshold=1000 \
+  --sigma1=500 \
+  --sigma2=100 \
+  --queries=12000 \
+  --delta=1e-8 \
+  --data_independent
+echo
+echo "######## Glyph (Interactive, Round 1) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_round1.npy" \
+  --threshold=3500 \
+  --sigma1=1500 \
+  --sigma2=100 \
+  --delta=1e-8 \
+  --data_independent
+echo
+echo "######## Glyph (Interactive, Round 2) ########"
+echo
+$executable \
+  --counts_file=$data_dir"/glyph_round2.npy" \
+  --baseline_file=$data_dir"/glyph_round2_student.npy" \
+  --threshold=3500 \
+  --sigma1=2000 \
+  --sigma2=200 \
+  --teachers=5000 \
+  --delta=1e-8 \
+  --order=8.5 \
+  --data_independent
--- a/research/differential_privacy/pate/ICLR2018/plot_ls_q.py
+++ b/research/differential_privacy/pate/ICLR2018/plot_ls_q.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Plots LS(q).
+A script in support of the PATE2 paper. NOT PRESENTLY USED.
+The output is written to a specified directory as a pdf file.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import os
+import sys
+sys.path.append('..')  # Main modules reside in the parent directory.
+from absl import app
+from absl import flags
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
+import numpy as np
+import smooth_sensitivity as pate_ss
+plt.style.use('ggplot')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('figures_dir', '', 'Path where the output is written to.')
+def compute_ls_q(sigma, order, num_classes):
+  def beta(q):
+    return pate_ss._compute_rdp_gnmax(sigma, math.log(q), order)
+  def bu(q):
+    return pate_ss._compute_bu_gnmax(q, sigma, order)
+  def bl(q):
+    return pate_ss._compute_bl_gnmax(q, sigma, order)
+  def delta_beta(q):
+    if q == 0 or q > .8:
+      return 0
+    beta_q = beta(q)
+    beta_bu_q = beta(bu(q))
+    beta_bl_q = beta(bl(q))
+    assert beta_bl_q <= beta_q <= beta_bu_q
+    return beta_bu_q - beta_q  # max(beta_bu_q - beta_q, beta_q - beta_bl_q)
+  logq0 = pate_ss.compute_logq0_gnmax(sigma, order)
+  logq1 = pate_ss._compute_logq1(sigma, order, num_classes)
+  print(math.exp(logq1), math.exp(logq0))
+  xs = np.linspace(0, .1, num=1000, endpoint=True)
+  ys = [delta_beta(x) for x in xs]
+  return xs, ys
+def main(argv):
+  del argv  # Unused.
+  sigma = 20
+  order = 20.
+  num_classes = 10
+  # sigma = 20
+  # order = 25.
+  # num_classes = 10
+  x_axis, ys = compute_ls_q(sigma, order, num_classes)
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  ax.plot(x_axis, ys, alpha=.8, linewidth=5)
+  plt.xlabel('Number of queries answered', fontsize=16)
+  plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(FLAGS.figures_dir, 'ls_of_q.pdf')
+  print('Saving the graph to ' + fout_name)
+  plt.show()
+  plt.close('all')
+if __name__ == '__main__':
+  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/rdp_bucketized.py
+++ b/research/differential_privacy/pate/ICLR2018/rdp_bucketized.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Illustrates how noisy thresholding check changes distribution of queries.
+A script in support of the paper "Scalable Private Learning with PATE" by
+Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
+Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
+The input is a file containing a numpy array of votes, one query per row, one
+class per column. Ex:
+  43, 1821, ..., 3
+  31, 16, ..., 0
+  ...
+  0, 86, ..., 438
+The output is one of two graphs depending on the setting of the plot variable.
+The output is written to a pdf file.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import os
+import sys
+sys.path.append('..')  # Main modules reside in the parent directory.
+from absl import app
+from absl import flags
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
+import numpy as np
+import core as pate
+plt.style.use('ggplot')
+FLAGS = flags.FLAGS
+flags.DEFINE_enum('plot', 'small', ['small', 'large'], 'Selects which of'
+                  'the two plots is produced.')
+flags.DEFINE_string('counts_file', '', 'Counts file.')
+flags.DEFINE_string('plot_file', '', 'Plot file to write.')
+def compute_count_per_bin(bin_num, votes):
+  """Tabulates number of examples in each bin.
+  Args:
+    bin_num: Number of bins.
+    votes: A matrix of votes, where each row contains votes in one instance.
+  Returns:
+    Array of counts of length bin_num.
+  """
+  sums = np.sum(votes, axis=1)
+  # Check that all rows contain the same number of votes.
+  assert max(sums) == min(sums)
+  s = max(sums)
+  counts = np.zeros(bin_num)
+  n = votes.shape[0]
+  for i in xrange(n):
+    v = votes[i,]
+    bin_idx = int(math.floor(max(v) * bin_num / s))
+    assert 0 <= bin_idx < bin_num
+    counts[bin_idx] += 1
+  return counts
+def compute_privacy_cost_per_bins(bin_num, votes, sigma2, order):
+  """Outputs average privacy cost per bin.
+  Args:
+    bin_num: Number of bins.
+    votes: A matrix of votes, where each row contains votes in one instance.
+    sigma2: The scale (std) of the Gaussian noise. (Same as sigma_2 in
+            Algorithms 1 and 2.)
+    order: The Renyi order for which privacy cost is computed.
+  Returns:
+    Expected eps of RDP (ignoring delta) per example in each bin.
+  """
+  n = votes.shape[0]
+  bin_counts = np.zeros(bin_num)
+  bin_rdp = np.zeros(bin_num)  # RDP at order=order
+  for i in xrange(n):
+    v = votes[i,]
+    logq = pate.compute_logq_gaussian(v, sigma2)
+    rdp_at_order = pate.rdp_gaussian(logq, sigma2, order)
+    bin_idx = int(math.floor(max(v) * bin_num / sum(v)))
+    assert 0 <= bin_idx < bin_num
+    bin_counts[bin_idx] += 1
+    bin_rdp[bin_idx] += rdp_at_order
+    if (i + 1) % 1000 == 0:
+      print('example {}'.format(i + 1))
+      sys.stdout.flush()
+  return bin_rdp / bin_counts
+def compute_expected_answered_per_bin(bin_num, votes, threshold, sigma1):
+  """Computes expected number of answers per bin.
+  Args:
+    bin_num: Number of bins.
+    votes: A matrix of votes, where each row contains votes in one instance.
+    threshold: The threshold against which check is performed.
+    sigma1: The std of the Gaussian noise with which check is performed. (Same
+      as sigma_1 in Algorithms 1 and 2.)
+  Returns:
+    Expected number of queries answered per bin.
+  """
+  n = votes.shape[0]
+  bin_answered = np.zeros(bin_num)
+  for i in xrange(n):
+    v = votes[i,]
+    p = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
+    bin_idx = int(math.floor(max(v) * bin_num / sum(v)))
+    assert 0 <= bin_idx < bin_num
+    bin_answered[bin_idx] += p
+    if (i + 1) % 1000 == 0:
+      print('example {}'.format(i + 1))
+      sys.stdout.flush()
+  return bin_answered
+def main(argv):
+  del argv  # Unused.
+  fin_name = os.path.expanduser(FLAGS.counts_file)
+  print('Reading raw votes from ' + fin_name)
+  sys.stdout.flush()
+  votes = np.load(fin_name)
+  votes = votes[:4000,]  # truncate to 4000 samples
+  if FLAGS.plot == 'small':
+    bin_num = 5
+    m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500)
+  elif FLAGS.plot == 'large':
+    bin_num = 10
+    m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500)
+    a_check = compute_expected_answered_per_bin(bin_num, votes, 5000, 1500)
+    eps = compute_privacy_cost_per_bins(bin_num, votes, 100, 50)
+  counts = compute_count_per_bin(bin_num, votes)
+  bins = np.linspace(0, 100, num=bin_num, endpoint=False)
+  plt.close('all')
+  fig, ax = plt.subplots()
+  if FLAGS.plot == 'small':
+    fig.set_figheight(4.7)
+    fig.set_figwidth(5)
+    ax.bar(
+        bins,
+        counts,
+        20,
+        color='orangered',
+        linestyle='dashed',
+        linewidth=5,
+        edgecolor='red',
+        fill=False,
+        alpha=.5,
+        align='edge',
+        label='LNMax answers')
+    ax.bar(
+        bins,
+        m_check,
+        20,
+        color='b',
+        alpha=.5,
+        linewidth=0,
+        edgecolor='g',
+        align='edge',
+        label='Confident-GNMax\nanswers')
+  elif FLAGS.plot == 'large':
+    fig.set_figheight(4.7)
+    fig.set_figwidth(7)
+    ax.bar(
+        bins,
+        counts,
+        10,
+        linestyle='dashed',
+        linewidth=5,
+        edgecolor='red',
+        fill=False,
+        alpha=.5,
+        align='edge',
+        label='LNMax answers')
+    ax.bar(
+        bins,
+        m_check,
+        10,
+        color='g',
+        alpha=.5,
+        linewidth=0,
+        edgecolor='g',
+        align='edge',
+        label='Confident-GNMax\nanswers (moderate)')
+    ax.bar(
+        bins,
+        a_check,
+        10,
+        color='b',
+        alpha=.5,
+        align='edge',
+        label='Confident-GNMax\nanswers (aggressive)')
+    ax2 = ax.twinx()
+    bin_centers = [x + 5 for x in bins]
+    ax2.plot(bin_centers, eps, 'ko', alpha=.8)
+    ax2.set_ylim([1e-200, 1.])
+    ax2.set_yscale('log')
+    ax2.grid(False)
+    ax2.set_yticks([1e-3, 1e-50, 1e-100, 1e-150, 1e-200])
+    plt.tick_params(which='minor', right='off')
+    ax2.set_ylabel(r'Per query privacy cost $\varepsilon$', fontsize=16)
+  plt.xlim([0, 100])
+  # ax.set_yscale('log')
+  ax.set_xlabel('Percentage of teachers that agree', fontsize=16)
+  ax.set_ylabel('Number of queries answered', fontsize=16)
+  vals = ax.get_xticks()
+  ax.set_xticklabels([str(int(x)) + '%' for x in vals])
+  ax.tick_params(labelsize=14)
+  ax.legend(loc=2, prop={'size': 16})
+  # simple: 'figures/noisy_thresholding_check_perf.pdf')
+  # detailed: 'figures/noisy_thresholding_check_perf_details.pdf'
+  print('Saving the graph to ' + FLAGS.plot_file)
+  plt.savefig(os.path.expanduser(FLAGS.plot_file), bbox_inches='tight')
+  plt.show()
+if __name__ == '__main__':
+  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/rdp_cumulative.py
+++ b/research/differential_privacy/pate/ICLR2018/rdp_cumulative.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Plots three graphs illustrating cost of privacy per answered query.
+A script in support of the paper "Scalable Private Learning with PATE" by
+Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
+Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
+The input is a file containing a numpy array of votes, one query per row, one
+class per column. Ex:
+  43, 1821, ..., 3
+  31, 16, ..., 0
+  ...
+  0, 86, ..., 438
+The output is written to a specified directory and consists of three pdf files.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import os
+import pickle
+import sys
+sys.path.append('..')  # Main modules reside in the parent directory.
+from absl import app
+from absl import flags
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
+import numpy as np
+import core as pate
+plt.style.use('ggplot')
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean('cache', False,
+                     'Read results of privacy analysis from cache.')
+flags.DEFINE_string('counts_file', '', 'Counts file.')
+flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
+def run_analysis(votes, mechanism, noise_scale, params):
+  """Computes data-dependent privacy.
+  Args:
+    votes: A matrix of votes, where each row contains votes in one instance.
+    mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf')
+    noise_scale: A mechanism privacy parameter.
+    params: Other privacy parameters.
+  Returns:
+    Four lists: cumulative privacy cost epsilon, how privacy budget is split,
+    how many queries were answered, optimal order.
+  """
+  def compute_partition(order_opt, eps):
+    order_opt_idx = np.searchsorted(orders, order_opt)
+    if mechanism == 'gnmax_conf':
+      p = (rdp_select_cum[order_opt_idx],
+           rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx],
+           -math.log(delta) / (order_opt - 1))
+    else:
+      p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1))
+    return [x / eps for x in p]  # Ensures that sum(x) == 1
+  # Short list of orders.
+  # orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1),
+  #                   np.logspace(np.log10(50), np.log10(1000), num=20))))
+  # Long list of orders.
+  orders = np.concatenate((np.arange(2, 100 + 1, .5),
+                           np.logspace(np.log10(100), np.log10(500), num=100)))
+  delta = 1e-8
+  n = votes.shape[0]
+  eps_total = np.zeros(n)
+  partition = np.full(n, None, dtype=object)
+  order_opt = np.full(n, None, dtype=float)
+  answered = np.zeros(n)
+  rdp_cum = np.zeros(len(orders))
+  rdp_sqrd_cum = np.zeros(len(orders))
+  rdp_select_cum = np.zeros(len(orders))
+  answered_sum = 0
+  for i in xrange(n):
+    v = votes[i,]
+    if mechanism == 'lnmax':
+      logq_lnmax = pate.compute_logq_laplace(v, noise_scale)
+      rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders)
+      rdp_sqrd = rdp_query**2
+      pr_answered = 1
+    elif mechanism == 'gnmax':
+      logq_gmax = pate.compute_logq_gaussian(v, noise_scale)
+      rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders)
+      rdp_sqrd = rdp_query**2
+      pr_answered = 1
+    elif mechanism == 'gnmax_conf':
+      logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v)
+      logq_step2 = pate.compute_logq_gaussian(v, noise_scale)
+      q_step1 = np.exp(logq_step1)
+      logq_step1_min = min(logq_step1, math.log1p(-q_step1))
+      rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min,
+                                          2**.5 * params['sigma1'], orders)
+      rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders)
+      rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2
+      # The expression below evaluates
+      #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
+      rdp_sqrd = (
+          rdp_gnmax_step1**2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2 +
+          q_step1 * rdp_gnmax_step2**2)
+      rdp_select_cum += rdp_gnmax_step1
+      pr_answered = q_step1
+    rdp_cum += rdp_query
+    rdp_sqrd_cum += rdp_sqrd
+    answered_sum += pr_answered
+    answered[i] = answered_sum
+    eps_total[i], order_opt[i] = pate.compute_eps_from_delta(
+        orders, rdp_cum, delta)
+    partition[i] = compute_partition(order_opt[i], eps_total[i])
+    if i > 0 and (i + 1) % 1000 == 0:
+      rdp_var = rdp_sqrd_cum / i - (
+          rdp_cum / i)**2  # Ignore Bessel's correction.
+      order_opt_idx = np.searchsorted(orders, order_opt[i])
+      eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
+      print(
+          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
+          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
+              i + 1, answered_sum, eps_total[i], eps_std, order_opt[i],
+              -math.log(delta) / (order_opt[i] - 1)))
+      sys.stdout.flush()
+  return eps_total, partition, answered, order_opt
+def print_plot_small(figures_dir, eps_lap, eps_gnmax, answered_gnmax):
+  """Plots a graph of LNMax vs GNMax.
+  Args:
+    figures_dir: A name of the directory where to save the plot.
+    eps_lap: The cumulative privacy costs of the Laplace mechanism.
+    eps_gnmax: The cumulative privacy costs of the Gaussian mechanism
+    answered_gnmax: The cumulative count of queries answered.
+  """
+  xlim = 6000
+  x_axis = range(0, int(xlim), 10)
+  y_lap = np.zeros(len(x_axis))
+  y_gnmax = np.full(len(x_axis), None, dtype=float)
+  for i in xrange(len(x_axis)):
+    x = x_axis[i]
+    y_lap[i] = eps_lap[x]
+    idx = np.searchsorted(answered_gnmax, x)
+    if idx < len(eps_gnmax):
+      y_gnmax[i] = eps_gnmax[idx]
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  ax.plot(
+      x_axis, y_lap, color='r', ls='--', label='LNMax', alpha=.5, linewidth=5)
+  ax.plot(
+      x_axis,
+      y_gnmax,
+      color='g',
+      ls='-',
+      label='Confident-GNMax',
+      alpha=.5,
+      linewidth=5)
+  plt.xticks(np.arange(0, 7000, 1000))
+  plt.xlim([0, 6000])
+  plt.ylim([0, 6.])
+  plt.xlabel('Number of queries answered', fontsize=16)
+  plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
+  plt.legend(loc=2, fontsize=13)  # loc=2 -- upper left
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(figures_dir, 'lnmax_vs_gnmax.pdf')
+  print('Saving the graph to ' + fout_name)
+  fig.savefig(fout_name, bbox_inches='tight')
+  plt.show()
+def print_plot_large(figures_dir, eps_lap, eps_gnmax1, answered_gnmax1,
+                     eps_gnmax2, partition_gnmax2, answered_gnmax2):
+  """Plots a graph of LNMax vs GNMax with two parameters.
+  Args:
+    figures_dir: A name of the  directory where to save the plot.
+    eps_lap: The cumulative privacy costs of the Laplace mechanism.
+    eps_gnmax1: The cumulative privacy costs of the Gaussian mechanism (set 1).
+    answered_gnmax1: The cumulative count of queries answered (set 1).
+    eps_gnmax2: The cumulative privacy costs of the Gaussian mechanism (set 2).
+    partition_gnmax2: Allocation of eps for set 2.
+    answered_gnmax2: The cumulative count of queries answered (set 2).
+  """
+  xlim = 6000
+  x_axis = range(0, int(xlim), 10)
+  lenx = len(x_axis)
+  y_lap = np.zeros(lenx)
+  y_gnmax1 = np.full(lenx, np.nan, dtype=float)
+  y_gnmax2 = np.full(lenx, np.nan, dtype=float)
+  y1_gnmax2 = np.full(lenx, np.nan, dtype=float)
+  for i in xrange(lenx):
+    x = x_axis[i]
+    y_lap[i] = eps_lap[x]
+    idx1 = np.searchsorted(answered_gnmax1, x)
+    if idx1 < len(eps_gnmax1):
+      y_gnmax1[i] = eps_gnmax1[idx1]
+    idx2 = np.searchsorted(answered_gnmax2, x)
+    if idx2 < len(eps_gnmax2):
+      y_gnmax2[i] = eps_gnmax2[idx2]
+      fraction_step1, fraction_step2, _ = partition_gnmax2[idx2]
+      y1_gnmax2[i] = eps_gnmax2[idx2] * fraction_step1 / (
+          fraction_step1 + fraction_step2)
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  ax.plot(
+      x_axis,
+      y_lap,
+      color='r',
+      ls='dashed',
+      label='LNMax',
+      alpha=.5,
+      linewidth=5)
+  ax.plot(
+      x_axis,
+      y_gnmax1,
+      color='g',
+      ls='-',
+      label='Confident-GNMax (moderate)',
+      alpha=.5,
+      linewidth=5)
+  ax.plot(
+      x_axis,
+      y_gnmax2,
+      color='b',
+      ls='-',
+      label='Confident-GNMax (aggressive)',
+      alpha=.5,
+      linewidth=5)
+  ax.fill_between(
+      x_axis, [0] * lenx,
+      y1_gnmax2.tolist(),
+      facecolor='b',
+      alpha=.3,
+      hatch='\\')
+  ax.plot(
+      x_axis,
+      y1_gnmax2,
+      color='b',
+      ls='-',
+      label='_nolegend_',
+      alpha=.5,
+      linewidth=1)
+  ax.fill_between(
+      x_axis, y1_gnmax2.tolist(), y_gnmax2.tolist(), facecolor='b', alpha=.3)
+  plt.xticks(np.arange(0, 7000, 1000))
+  plt.xlim([0, xlim])
+  plt.ylim([0, 1.])
+  plt.xlabel('Number of queries answered', fontsize=16)
+  plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
+  plt.legend(loc=2, fontsize=13)  # loc=2 -- upper left
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(figures_dir, 'lnmax_vs_2xgnmax_large.pdf')
+  print('Saving the graph to ' + fout_name)
+  fig.savefig(fout_name, bbox_inches='tight')
+  plt.show()
+def print_plot_partition(figures_dir, eps, partition, answered, order_opt):
+  """Plots an expert version of the privacy-per-answered-query graph.
+  Args:
+    figures_dir: A name of the directory where to save the plot.
+    eps: The cumulative privacy cost.
+    partition: Allocation of the privacy cost.
+    answered: Cumulative number of queries answered.
+    order_opt: The list of optimal orders.
+  """
+  xlim = 6000
+  x = range(0, int(xlim), 10)
+  lenx = len(x)
+  y = np.full(lenx, np.nan, dtype=float)
+  y1 = np.full(lenx, np.nan, dtype=float)
+  y2 = np.full(lenx, np.nan, dtype=float)
+  y_right = np.full(lenx, np.nan, dtype=float)
+  for i in xrange(lenx):
+    idx = np.searchsorted(answered, x[i])
+    if idx < len(eps):
+      y[i] = eps[idx]
+      fraction_step1, _, fraction_delta = partition[idx]
+      y1[i] = fraction_delta * y[i]
+      y2[i] = (fraction_delta + fraction_step1) * y[i]
+      y_right[i] = order_opt[idx]
+  # plt.close('all')
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  l1 = ax.plot(
+      x, y, color='b', ls='-', label=r'Total privacy cost', linewidth=5).pop()
+  ax.plot(x, y1, color='b', ls='-', label=r'_nolegend_', alpha=.5, linewidth=1)
+  ax.plot(x, y2, color='b', ls='-', label=r'_nolegend_', alpha=.5, linewidth=1)
+  ax.fill_between(x, [0] * lenx, y1.tolist(), facecolor='b', alpha=.1)
+  ax.fill_between(x, y1.tolist(), y2.tolist(), facecolor='b', alpha=.2)
+  ax.fill_between(x, y2.tolist(), y.tolist(), facecolor='b', alpha=.3)
+  t1 = 300
+  ax.text(x[t1], y1[t1] * .35, r'due to $\delta$', alpha=.5, fontsize=18)
+  ax.text(
+      x[t1],
+      y1[t1] + (y2[t1] - y1[t1]) * .6,
+      r'selection ($\sigma_1$)',
+      alpha=.5,
+      fontsize=18)
+  t2 = 550
+  ax.annotate(
+      r'answering ($\sigma_2$)',
+      xy=(x[t2], (y[t2] + y2[t2]) / 2),
+      xytext=(x[200], y[t2] * 1.1),
+      arrowprops=dict(facecolor='black', shrink=0.005, alpha=.5),
+      fontsize=18,
+      alpha=.5)
+  ax2 = ax.twinx()
+  l2 = ax2.plot(
+      x, y_right, 'r', ls='-', label=r'Optimal order', linewidth=5,
+      alpha=.5).pop()
+  ax2.grid(False)
+  ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
+  plt.xticks(np.arange(0, 7000, 1000))
+  plt.xlim([0, xlim])
+  ax.set_ylim([0, 1.])
+  ax2.set_ylim([0, 200.])
+  ax.set_xlabel('Number of queries answered', fontsize=16)
+  ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
+  # Merging legends.
+  ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(figures_dir, 'partition.pdf')
+  print('Saving the graph to ' + fout_name)
+  fig.savefig(fout_name, bbox_inches='tight')
+  plt.show()
+def run_all_analyses(votes, lambda_laplace, gnmax_parameters, sigma2):
+  """Sequentially runs all analyses.
+  Args:
+    votes: A matrix of votes, where each row contains votes in one instance.
+    lambda_laplace: The scale of the Laplace noise (lambda).
+    gnmax_parameters: A list of parameters for GNMax.
+    sigma2: Shared parameter for the GNMax mechanisms.
+  Returns:
+    Five lists whose length is the number of queries.
+  """
+  print('=== Laplace Mechanism ===')
+  eps_lap, _, _, _ = run_analysis(votes, 'lnmax', lambda_laplace, None)
+  print()
+  # Does not go anywhere, for now
+  # print('=== Gaussian Mechanism (simple) ===')
+  # eps, _, _, _ = run_analysis(votes[:n,], 'gnmax', sigma1, None)
+  eps_gnmax = [[] for p in gnmax_parameters]
+  partition_gmax = [[] for p in gnmax_parameters]
+  answered = [[] for p in gnmax_parameters]
+  order_opt = [[] for p in gnmax_parameters]
+  for i, p in enumerate(gnmax_parameters):
+    print('=== Gaussian Mechanism (confident) {}: ==='.format(p))
+    eps_gnmax[i], partition_gmax[i], answered[i], order_opt[i] = run_analysis(
+        votes, 'gnmax_conf', sigma2, p)
+    print()
+  return eps_lap, eps_gnmax, partition_gmax, answered, order_opt
+def main(argv):
+  del argv  # Unused.
+  lambda_laplace = 50.  # corresponds to eps = 1. / lambda_laplace
+  # Paramaters of the GNMax
+  gnmax_parameters = ({
+      't': 1000,
+      'sigma1': 500
+  }, {
+      't': 3500,
+      'sigma1': 1500
+  }, {
+      't': 5000,
+      'sigma1': 1500
+  })
+  sigma2 = 100  # GNMax parameters differ only in Step 1 (selection).
+  ftemp_name = '/tmp/precomputed.pkl'
+  figures_dir = os.path.expanduser(FLAGS.figures_dir)
+  if FLAGS.cache and os.path.isfile(ftemp_name):
+    print('Reading from cache ' + ftemp_name)
+    with open(ftemp_name, 'rb') as f:
+      (eps_lap, eps_gnmax, partition_gmax, answered_gnmax,
+       orders_opt_gnmax) = pickle.load(f)
+  else:
+    fin_name = os.path.expanduser(FLAGS.counts_file)
+    print('Reading raw votes from ' + fin_name)
+    sys.stdout.flush()
+    votes = np.load(fin_name)
+    (eps_lap, eps_gnmax, partition_gmax,
+     answered_gnmax, orders_opt_gnmax) = run_all_analyses(
+         votes, lambda_laplace, gnmax_parameters, sigma2)
+    print('Writing to cache ' + ftemp_name)
+    with open(ftemp_name, 'wb') as f:
+      pickle.dump((eps_lap, eps_gnmax, partition_gmax, answered_gnmax,
+                   orders_opt_gnmax), f)
+  print_plot_small(figures_dir, eps_lap, eps_gnmax[0], answered_gnmax[0])
+  print_plot_large(figures_dir, eps_lap, eps_gnmax[1], answered_gnmax[1],
+                   eps_gnmax[2], partition_gmax[2], answered_gnmax[2])
+  print_plot_partition(figures_dir, eps_gnmax[2], partition_gmax[2],
+                       answered_gnmax[2], orders_opt_gnmax[2])
+  plt.close('all')
+if __name__ == '__main__':
+  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/rdp_flow.py
+++ b/research/differential_privacy/pate/ICLR2018/rdp_flow.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Plots two graphs illustrating cost of privacy per answered query.
+PRESENTLY NOT USED.
+A script in support of the PATE2 paper. The input is a file containing a numpy
+array of votes, one query per row, one class per column. Ex:
+  43, 1821, ..., 3
+  31, 16, ..., 0
+  ...
+  0, 86, ..., 438
+The output is written to a specified directory and consists of two pdf files.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import os
+import sys
+sys.path.append('..')  # Main modules reside in the parent directory.
+from absl import app
+from absl import flags
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
+import numpy as np
+import core as pate
+plt.style.use('ggplot')
+FLAGS = flags.FLAGS
+flags.DEFINE_string('counts_file', '', 'Counts file.')
+flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
+def plot_rdp_curve_per_example(votes, sigmas):
+  orders = np.linspace(1., 100., endpoint=True, num=1000)
+  orders[0] = 1.5
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  styles = [':', '-']
+  labels = ['ex1', 'ex2']
+  for i in xrange(votes.shape[0]):
+    print(sorted(votes[i,], reverse=True)[:10])
+    for sigma in sigmas:
+      logq = pate.compute_logq_gaussian(votes[i,], sigma)
+      rdp = pate.rdp_gaussian(logq, sigma, orders)
+      ax.plot(
+          orders,
+          rdp,
+          label=r'{} $\sigma$={}'.format(labels[i], int(sigma)),
+          linestyle=styles[i],
+          linewidth=5)
+  for sigma in sigmas:
+    ax.plot(
+        orders,
+        pate.rdp_data_independent_gaussian(sigma, orders),
+        alpha=.3,
+        label=r'Data-ind bound $\sigma$={}'.format(int(sigma)),
+        linewidth=10)
+  plt.yticks([0, .01])
+  plt.xlabel(r'Order $\lambda$', fontsize=16)
+  plt.ylabel(r'RDP value $\varepsilon$ at $\lambda$', fontsize=16)
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(FLAGS.figures_dir, 'rdp_flow1.pdf')
+  print('Saving the graph to ' + fout_name)
+  fig.savefig(fout_name, bbox_inches='tight')
+  plt.legend(loc=0, fontsize=13)
+  plt.show()
+def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
+                      target_answered):
+  rdp_cum = np.zeros(len(orders))
+  answered = 0
+  for i, v in enumerate(votes):
+    v = sorted(v, reverse=True)
+    q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
+    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
+    rdp = pate.rdp_gaussian(logq_step2, sigma2, orders)
+    rdp_cum += q_step1 * rdp
+    answered += q_step1
+    if answered >= target_answered:
+      print('Processed {} queries to answer {}.'.format(i, target_answered))
+      return rdp_cum
+  assert False, 'Never reached {} answered queries.'.format(target_answered)
+def plot_rdp_total(votes, sigmas):
+  orders = np.linspace(1., 100., endpoint=True, num=100)
+  orders[0] = 1.5
+  fig, ax = plt.subplots()
+  fig.set_figheight(4.5)
+  fig.set_figwidth(4.7)
+  for sigma in sigmas:
+    rdp = compute_rdp_curve(votes, 5000, 1000, sigma, orders, 2000)
+    ax.plot(
+        orders,
+        rdp,
+        alpha=.8,
+        label=r'$\sigma$={}'.format(int(sigma)),
+        linewidth=5)
+  plt.xlabel(r'Order $\lambda$', fontsize=16)
+  plt.ylabel(r'RDP value $\varepsilon$ at $\lambda$', fontsize=16)
+  ax.tick_params(labelsize=14)
+  fout_name = os.path.join(FLAGS.figures_dir, 'rdp_flow2.pdf')
+  print('Saving the graph to ' + fout_name)
+  fig.savefig(fout_name, bbox_inches='tight')
+  plt.legend(loc=0, fontsize=13)
+  plt.show()
+def main(argv):
+  del argv  # Unused.
+  fin_name = os.path.expanduser(FLAGS.counts_file)
+  print('Reading raw votes from ' + fin_name)
+  sys.stdout.flush()
+  votes = np.load(fin_name)
+  votes = votes[:12000,]  # truncate to 4000 samples
+  v1 = [2550, 2200, 250]  # based on votes[2,]
+  v2 = [2600, 2200, 200]  # based on votes[381,]
+  plot_rdp_curve_per_example(np.array([v1, v2]), (100., 150.))
+  plot_rdp_total(votes, (100., 150.))
+if __name__ == '__main__':
+  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/smooth_sensitivity_table.py
+++ b/research/differential_privacy/pate/ICLR2018/smooth_sensitivity_table.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Performs privacy analysis of GNMax with smooth sensitivity.
+A script in support of the paper "Scalable Private Learning with PATE" by
+Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
+Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
+Several flavors of the GNMax algorithm can be analyzed.
+  - Plain GNMax (argmax w/ Gaussian noise) is assumed when arguments threshold
+    and sigma2 are missing.
+  - Confident GNMax (thresholding + argmax w/ Gaussian noise) is used when
+    threshold, sigma1, and sigma2 are given.
+  - Interactive GNMax (two- or multi-round) is triggered by specifying
+    baseline_file, which provides baseline values for votes selection in Step 1.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+import os
+import sys
+sys.path.append('..')  # Main modules reside in the parent directory.
+from absl import app
+from absl import flags
+import numpy as np
+import core as pate
+import smooth_sensitivity as pate_ss
+FLAGS = flags.FLAGS
+flags.DEFINE_string('counts_file', None, 'Counts file.')
+flags.DEFINE_string('baseline_file', None, 'File with baseline scores.')
+flags.DEFINE_boolean('data_independent', False,
+                     'Force data-independent bounds.')
+flags.DEFINE_float('threshold', None, 'Threshold for step 1 (selection).')
+flags.DEFINE_float('sigma1', None, 'Sigma for step 1 (selection).')
+flags.DEFINE_float('sigma2', None, 'Sigma for step 2 (argmax).')
+flags.DEFINE_integer('queries', None, 'Number of queries made by the student.')
+flags.DEFINE_float('delta', 1e-8, 'Target delta.')
+flags.DEFINE_float(
+    'order', None,
+    'Fixes a Renyi DP order (if unspecified, finds an optimal order from a '
+    'hardcoded list).')
+flags.DEFINE_integer(
+    'teachers', None,
+    'Number of teachers (if unspecified, derived from the counts file).')
+flags.mark_flag_as_required('counts_file')
+flags.mark_flag_as_required('sigma2')
+def _check_conditions(sigma, num_classes, orders):
+  """Symbolic-numeric verification of conditions C5 and C6.
+  The conditions on the beta function are verified by constructing the beta
+  function symbolically, and then checking that its derivative (computed
+  symbolically) is non-negative within the interval of conjectured monotonicity.
+  The last check is performed numerically.
+  """
+  print('Checking conditions C5 and C6 for all orders.')
+  sys.stdout.flush()
+  conditions_hold = True
+  for order in orders:
+    cond5, cond6 = pate_ss.check_conditions(sigma, num_classes, order)
+    conditions_hold &= cond5 and cond6
+    if not cond5:
+      print('Condition C5 does not hold for order =', order)
+    elif not cond6:
+      print('Condition C6 does not hold for order =', order)
+  if conditions_hold:
+    print('Conditions C5-C6 hold for all orders.')
+  sys.stdout.flush()
+  return conditions_hold
+def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
+                 data_ind):
+  """Computes the (data-dependent) RDP curve for Confident GNMax."""
+  rdp_cum = np.zeros(len(orders))
+  rdp_sqrd_cum = np.zeros(len(orders))
+  answered = 0
+  for i, v in enumerate(votes):
+    if threshold is None:
+      logq_step1 = 0  # No thresholding, always proceed to step 2.
+      rdp_step1 = np.zeros(len(orders))
+    else:
+      logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
+                                               v - baseline[i,])
+      if data_ind:
+        rdp_step1 = pate.compute_rdp_data_independent_threshold(sigma1, orders)
+      else:
+        rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1, orders)
+    if data_ind:
+      rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
+    else:
+      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
+      rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)
+    q_step1 = np.exp(logq_step1)
+    rdp = rdp_step1 + rdp_step2 * q_step1
+    # The expression below evaluates
+    #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
+    rdp_sqrd = (
+        rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
+        q_step1 * rdp_step2**2)
+    rdp_sqrd_cum += rdp_sqrd
+    rdp_cum += rdp
+    answered += q_step1
+    if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
+      rdp_var = rdp_sqrd_cum / i - (
+          rdp_cum / i)**2  # Ignore Bessel's correction.
+      eps_total, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
+      order_opt_idx = np.searchsorted(orders, order_opt)
+      eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
+      print(
+          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
+          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
+              i + 1, answered, eps_total, eps_std, order_opt,
+              -math.log(delta) / (order_opt - 1)))
+      sys.stdout.flush()
+    _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
+  return order_opt
+def _find_optimal_smooth_sensitivity_parameters(
+    votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1,
+    ind_step2, order):
+  """Optimizes smooth sensitivity parameters by minimizing a cost function.
+  The cost function is
+        exact_eps + cost of GNSS + two stds of noise,
+  which captures that upper bound of the confidence interval of the sanitized
+  privacy budget.
+  Since optimization is done with full view of sensitive data, the results
+  cannot be released.
+  """
+  rdp_cum = 0
+  answered_cum = 0
+  ls_cum = 0
+  # Define a plausible range for the beta values.
+  betas = np.arange(.3 / order, .495 / order, .01 / order)
+  cost_delta = math.log(1 / delta) / (order - 1)
+  for i, v in enumerate(votes):
+    if threshold is None:
+      log_pr_answered = 0
+      rdp1 = 0
+      ls_step1 = np.zeros(num_teachers)
+    else:
+      log_pr_answered = pate.compute_logpr_answered(threshold, sigma1,
+                                                    v - baseline[i,])
+      if ind_step1:  # apply data-independent bound for step 1 (thresholding).
+        rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order)
+        ls_step1 = np.zeros(num_teachers)
+      else:
+        rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order)
+        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
+            v - baseline[i,], num_teachers, threshold, sigma1, order)
+    pr_answered = math.exp(log_pr_answered)
+    answered_cum += pr_answered
+    if ind_step2:  # apply data-independent bound for step 2 (GNMax).
+      rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
+      ls_step2 = np.zeros(num_teachers)
+    else:
+      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
+      rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
+      # Compute smooth sensitivity.
+      ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
+          v, num_teachers, sigma2, order)
+    rdp_cum += rdp1 + pr_answered * rdp2
+    ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.
+    if ind_step1 and ind_step2:
+      # Data-independent bounds.
+      cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
+    else:
+      # Data-dependent bounds.
+      cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None
+      for beta in betas:
+        ss = pate_ss.compute_discounted_max(beta, ls_cum)
+        # Solution to the minimization problem:
+        #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
+        sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
+        cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
+            beta, sigma_ss, order)
+        # Cost captures exact_eps + cost of releasing SS + two stds of noise.
+        cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
+        if cost < cost_opt:
+          cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss
+    if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
+      eps_before_ss = rdp_cum + cost_delta
+      eps_with_ss = (
+          eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
+              beta_opt, sigma_ss_opt, order))
+      print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
+            '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'.
+            format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
+                   ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
+      sys.stdout.flush()
+  # Return optimal parameters for the last iteration.
+  return beta_opt, ss_opt, sigma_ss_opt
+####################
+# HELPER FUNCTIONS #
+####################
+def _load_votes(counts_file, baseline_file, queries):
+  counts_file_expanded = os.path.expanduser(counts_file)
+  print('Reading raw votes from ' + counts_file_expanded)
+  sys.stdout.flush()
+  votes = np.load(counts_file_expanded)
+  print('Shape of the votes matrix = {}'.format(votes.shape))
+  if baseline_file is not None:
+    baseline_file_expanded = os.path.expanduser(baseline_file)
+    print('Reading baseline values from ' + baseline_file_expanded)
+    sys.stdout.flush()
+    baseline = np.load(baseline_file_expanded)
+    if votes.shape != baseline.shape:
+      raise ValueError(
+          'Counts file and baseline file must have the same shape. Got {} and '
+          '{} instead.'.format(votes.shape, baseline.shape))
+  else:
+    baseline = np.zeros_like(votes)
+  if queries is not None:
+    if votes.shape[0] < queries:
+      raise ValueError('Expect {} rows, got {} in {}'.format(
+          queries, votes.shape[0], counts_file))
+    # Truncate the votes matrix to the number of queries made.
+    votes = votes[:queries,]
+    baseline = baseline[:queries,]
+  else:
+    print('Process all {} input rows. (Use --queries flag to truncate.)'.format(
+        votes.shape[0]))
+  return votes, baseline
+def _count_teachers(votes):
+  s = np.sum(votes, axis=1)
+  num_teachers = int(max(s))
+  if min(s) != num_teachers:
+    raise ValueError(
+        'Matrix of votes is malformed: the number of votes is not the same '
+        'across rows.')
+  return num_teachers
+def _is_data_ind_step1(num_teachers, threshold, sigma1, orders):
+  if threshold is None:
+    return True
+  return np.all(
+      pate.is_data_independent_always_opt_threshold(num_teachers, threshold,
+                                                    sigma1, orders))
+def _is_data_ind_step2(num_teachers, num_classes, sigma, orders):
+  return np.all(
+      pate.is_data_independent_always_opt_gaussian(num_teachers, num_classes,
+                                                   sigma, orders))
+def main(argv):
+  del argv  # Unused.
+  if (FLAGS.threshold is None) != (FLAGS.sigma1 is None):
+    raise ValueError(
+        '--threshold flag and --sigma1 flag must be present or absent '
+        'simultaneously.')
+  if FLAGS.order is None:
+    # Long list of orders.
+    orders = np.concatenate((np.arange(2, 100 + 1, .5),
+                             np.logspace(np.log10(100), np.log10(500),
+                                         num=100)))
+    # Short list of orders.
+    # orders = np.round(
+    #     np.concatenate((np.arange(2, 50 + 1, 1),
+    #                     np.logspace(np.log10(50), np.log10(1000), num=20))))
+  else:
+    orders = np.array([FLAGS.order])
+  votes, baseline = _load_votes(FLAGS.counts_file, FLAGS.baseline_file,
+                                FLAGS.queries)
+  if FLAGS.teachers is None:
+    num_teachers = _count_teachers(votes)
+  else:
+    num_teachers = FLAGS.teachers
+  num_classes = votes.shape[1]
+  order = _compute_rdp(votes, baseline, FLAGS.threshold, FLAGS.sigma1,
+                       FLAGS.sigma2, FLAGS.delta, orders,
+                       FLAGS.data_independent)
+  ind_step1 = _is_data_ind_step1(num_teachers, FLAGS.threshold, FLAGS.sigma1,
+                                 order)
+  ind_step2 = _is_data_ind_step2(num_teachers, num_classes, FLAGS.sigma2, order)
+  if FLAGS.data_independent or (ind_step1 and ind_step2):
+    print('Nothing to do here, all analyses are data-independent.')
+    return
+  if not _check_conditions(FLAGS.sigma2, num_classes, [order]):
+    return  # Quit early: sufficient conditions for correctness fail to hold.
+  beta_opt, ss_opt, sigma_ss_opt = _find_optimal_smooth_sensitivity_parameters(
+      votes, baseline, num_teachers, FLAGS.threshold, FLAGS.sigma1,
+      FLAGS.sigma2, FLAGS.delta, ind_step1, ind_step2, order)
+  print('Optimal beta = {:.4f}, E[SS_beta] = {:.4}, sigma_ss = {:.2f}'.format(
+      beta_opt, ss_opt, sigma_ss_opt))
+if __name__ == '__main__':
+  app.run(main)
--- a/research/differential_privacy/pate/README.md
+++ b/research/differential_privacy/pate/README.md
+Implementation of an RDP privacy accountant and smooth sensitivity analysis for
+the PATE framework. The underlying theory and supporting experiments appear in
+"Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya
+Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018,
+https://arxiv.org/abs/1802.08908).
+## Overview
+The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 
+by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from
+Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 
+framework enables model-agnostic training that provably provides [differential
+privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 
+dataset. 
+The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 
+teachers are models trained on disjoint subsets of the training datasets. The student
+model has access to an insensitive (i.e., public) unlabelled dataset, which is labelled by 
+interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 
+outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
+refuses to answer.
+Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_,
+which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 
+analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on
+the input distribution, the final privacy guarantees can be improved relative to the data-independent
+analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 
+therefore publishing these guarantees requires its own sanitization procedure. In our case 
+sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis.
+The common machinery used for all privacy analyses in this repository is the 
+R&eacute;nyi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 
+This repository contains implementations of privacy accountants and smooth 
+sensitivity analysis for several data-independent and data-dependent mechanism that together
+comprise the PATE framework.
+### Requirements
+* Python, version &ge; 2.7
+* absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
+* numpy
+* scipy
+* sympy (for smooth sensitivity analysis)
+* unittest (for testing)
+### Self-testing
+To verify the installation run
+```bash
+$ python core_test.py
+$ python smooth_sensitivity_test.py
+```
+## Files in this directory
+*   core.py --- RDP privacy accountant for several vote aggregators (GNMax,
+    Threshold, Laplace).
+*   smooth_sensitivity.py --- Smooth sensitivity analysis for GNMax and
+    Threshold mechanisms.
+*   core_test.py and smooth_sensitivity_test.py --- Unit tests for the
+    files above.
+## Contact information
+You may direct your comments to mironov@google.com and PR to @ilyamironov.
--- a/research/differential_privacy/pate/core.py
+++ b/research/differential_privacy/pate/core.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Core functions for RDP analysis in PATE framework.
+This library comprises the core functions for doing differentially private
+analysis of the PATE architecture and its various Noisy Max and other
+mechanisms.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+from absl import app
+import numpy as np
+import scipy.stats
+def _logaddexp(x):
+  """Addition in the log space. Analogue of numpy.logaddexp for a list."""
+  m = max(x)
+  return m + math.log(sum(np.exp(x - m)))
+def _log1mexp(x):
+  """Numerically stable computation of log(1-exp(x))."""
+  if x < -1:
+    return math.log1p(-math.exp(x))
+  elif x < 0:
+    return math.log(-math.expm1(x))
+  elif x == 0:
+    return -np.inf
+  else:
+    raise ValueError("Argument must be non-positive.")
+def compute_eps_from_delta(orders, rdp, delta):
+  """Translates between RDP and (eps, delta)-DP.
+  Args:
+    orders: A list (or a scalar) of orders.
+    rdp: A list of RDP guarantees (of the same length as orders).
+    delta: Target delta.
+  Returns:
+    Pair of (eps, optimal_order).
+  Raises:
+    ValueError: If input is malformed.
+  """
+  if len(orders) != len(rdp):
+    raise ValueError("Input lists must have the same length.")
+  eps = np.array(rdp) - math.log(delta) / (np.array(orders) - 1)
+  idx_opt = np.argmin(eps)
+  return eps[idx_opt], orders[idx_opt]
+#####################
+# RDP FOR THE GNMAX #
+#####################
+def compute_logq_gaussian(counts, sigma):
+  """Returns an upper bound on ln Pr[outcome != argmax] for GNMax.
+  Implementation of Proposition 7.
+  Args:
+    counts: A numpy array of scores.
+    sigma: The standard deviation of the Gaussian noise in the GNMax mechanism.
+  Returns:
+    logq: Natural log of the probability that outcome is different from argmax.
+  """
+  n = len(counts)
+  variance = sigma**2
+  idx_max = np.argmax(counts)
+  counts_normalized = counts[idx_max] - counts
+  counts_rest = counts_normalized[np.arange(n) != idx_max]  # exclude one index
+  # Upper bound q via a union bound rather than a more precise calculation.
+  logq = _logaddexp(
+      scipy.stats.norm.logsf(counts_rest, scale=math.sqrt(2 * variance)))
+  # A sketch of a more accurate estimate, which is currently disabled for two
+  # reasons:
+  # 1. Numerical instability;
+  # 2. Not covered by smooth sensitivity analysis.
+  # covariance = variance * (np.ones((n - 1, n - 1)) + np.identity(n - 1))
+  # logq = np.log1p(-statsmodels.sandbox.distributions.extras.mvnormcdf(
+  #     counts_rest, np.zeros(n - 1), covariance, maxpts=1e4))
+  return min(logq, math.log(1 - (1 / n)))
+def rdp_data_independent_gaussian(sigma, orders):
+  """Computes a data-independent RDP curve for GNMax.
+  Implementation of Proposition 8.
+  Args:
+    sigma: Standard deviation of Gaussian noise.
+    orders: An array_like list of Renyi orders.
+  Returns:
+    Upper bound on RPD for all orders. A scalar if orders is a scalar.
+  Raises:
+    ValueError: If the input is malformed.
+  """
+  if sigma < 0 or np.any(orders <= 1):  # not defined for alpha=1
+    raise ValueError("Inputs are malformed.")
+  variance = sigma**2
+  if np.isscalar(orders):
+    return orders / variance
+  else:
+    return np.atleast_1d(orders) / variance
+def rdp_gaussian(logq, sigma, orders):
+  """Bounds RDP from above of GNMax given an upper bound on q (Theorem 6).
+  Args:
+    logq: Natural logarithm of the probability of a non-argmax outcome.
+    sigma: Standard deviation of Gaussian noise.
+    orders: An array_like list of Renyi orders.
+  Returns:
+    Upper bound on RPD for all orders. A scalar if orders is a scalar.
+  Raises:
+    ValueError: If the input is malformed.
+  """
+  if logq > 0 or sigma < 0 or np.any(orders <= 1):  # not defined for alpha=1
+    raise ValueError("Inputs are malformed.")
+  if np.isneginf(logq):  # If the mechanism's output is fixed, it has 0-DP.
+    if np.isscalar(orders):
+      return 0.
+    else:
+      return np.full_like(orders, 0., dtype=np.float)
+  variance = sigma**2
+  # Use two different higher orders: mu_hi1 and mu_hi2 computed according to
+  # Proposition 10.
+  mu_hi2 = math.sqrt(variance * -logq)
+  mu_hi1 = mu_hi2 + 1
+  orders_vec = np.atleast_1d(orders)
+  ret = orders_vec / variance  # baseline: data-independent bound
+  # Filter out entries where data-dependent bound does not apply.
+  mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1)
+  rdp_hi1 = mu_hi1 / variance
+  rdp_hi2 = mu_hi2 / variance
+  log_a2 = (mu_hi2 - 1) * rdp_hi2
+  # Make sure q is in the increasing wrt q range and A is positive.
+  if (np.any(mask) and logq <= log_a2 - mu_hi2 *
+      (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1))) and
+      -logq > rdp_hi2):
+    # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0.
+    log1q = _log1mexp(logq)  # log1q = log(1-q)
+    log_a = (orders - 1) * (
+        log1q - _log1mexp((logq + rdp_hi2) * (1 - 1 / mu_hi2)))
+    log_b = (orders - 1) * (rdp_hi1 - logq / (mu_hi1 - 1))
+    # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y.
+    log_s = np.logaddexp(log1q + log_a, logq + log_b)
+    ret[mask] = np.minimum(ret, log_s / (orders - 1))[mask]
+  assert np.all(ret >= 0)
+  if np.isscalar(orders):
+    return np.asscalar(ret)
+  else:
+    return ret
+def is_data_independent_always_opt_gaussian(num_teachers, num_classes, sigma,
+                                            orders):
+  """Tests whether data-ind bound is always optimal for GNMax.
+  Args:
+    num_teachers: Number of teachers.
+    num_classes: Number of classes.
+    sigma: Standard deviation of the Gaussian noise.
+    orders: An array_like list of Renyi orders.
+  Returns:
+    Boolean array of length |orders| (a scalar if orders is a scalar). True if
+    the data-independent bound is always the same as the data-dependent bound.
+  """
+  unanimous = np.array([num_teachers] + [0] * (num_classes - 1))
+  logq = compute_logq_gaussian(unanimous, sigma)
+  rdp_dep = rdp_gaussian(logq, sigma, orders)
+  rdp_ind = rdp_data_independent_gaussian(sigma, orders)
+  return np.isclose(rdp_dep, rdp_ind)
+###################################
+# RDP FOR THE THRESHOLD MECHANISM #
+###################################
+def compute_logpr_answered(t, sigma, counts):
+  """Computes log of the probability that a noisy threshold is crossed.
+  Args:
+    t: The threshold.
+    sigma: The stdev of the Gaussian noise added to the threshold.
+    counts: An array of votes.
+  Returns:
+    Natural log of the probability that max is larger than a noisy threshold.
+  """
+  # Compared to the paper, max(counts) is rounded to the nearest integer. This
+  # is done to facilitate computation of smooth sensitivity for the case of
+  # the interactive mechanism, where votes are not necessarily integer.
+  return scipy.stats.norm.logsf(t - round(max(counts)), scale=sigma)
+def compute_rdp_data_independent_threshold(sigma, orders):
+  # The input to the threshold mechanism has stability 1, compared to
+  # GNMax, which has stability = 2. Hence the sqrt(2) factor below.
+  return rdp_data_independent_gaussian(2**.5 * sigma, orders)
+def compute_rdp_threshold(log_pr_answered, sigma, orders):
+  logq = min(log_pr_answered, _log1mexp(log_pr_answered))
+  # The input to the threshold mechanism has stability 1, compared to
+  # GNMax, which has stability = 2. Hence the sqrt(2) factor below.
+  return rdp_gaussian(logq, 2**.5 * sigma, orders)
+def is_data_independent_always_opt_threshold(num_teachers, threshold, sigma,
+                                             orders):
+  """Tests whether data-ind bound is always optimal for the threshold mechanism.
+  Args:
+    num_teachers: Number of teachers.
+    threshold: The cut-off threshold.
+    sigma: Standard deviation of the Gaussian noise.
+    orders: An array_like list of Renyi orders.
+  Returns:
+    Boolean array of length |orders| (a scalar if orders is a scalar). True if
+    the data-independent bound is always the same as the data-dependent bound.
+  """
+  # Since the data-dependent bound depends only on max(votes), it suffices to
+  # check whether the data-dependent bounds are better than data-independent
+  # bounds in the extreme cases when max(votes) is minimal or maximal.
+  # For both Confident GNMax and Interactive GNMax it holds that
+  #   0 <= max(votes) <= num_teachers.
+  # The upper bound is trivial in both cases.
+  # The lower bound is trivial for Confident GNMax (and a stronger one, based on
+  # the pigeonhole principle, is possible).
+  # For Interactive GNMax (Algorithm 2), the lower bound follows from the
+  # following argument. Since the votes vector is the difference between the
+  # actual teachers' votes and the student's baseline, we need to argue that
+  # max(n_j - M * p_j) >= 0.
+  # The bound holds because sum_j n_j = sum M * p_j = M. Thus,
+  # sum_j (n_j - M * p_j) = 0, and max_j (n_j - M * p_j) >= 0 as needed.
+  logq1 = compute_logpr_answered(threshold, sigma, [0])
+  logq2 = compute_logpr_answered(threshold, sigma, [num_teachers])
+  rdp_dep1 = compute_rdp_threshold(logq1, sigma, orders)
+  rdp_dep2 = compute_rdp_threshold(logq2, sigma, orders)
+  rdp_ind = compute_rdp_data_independent_threshold(sigma, orders)
+  return np.isclose(rdp_dep1, rdp_ind) and np.isclose(rdp_dep2, rdp_ind)
+#############################
+# RDP FOR THE LAPLACE NOISE #
+#############################
+def compute_logq_laplace(counts, lmbd):
+  """Computes an upper bound on log Pr[outcome != argmax] for LNMax.
+  Args:
+    counts: A list of scores.
+    lmbd: The lambda parameter of the Laplace distribution ~exp(-|x| / lambda).
+  Returns:
+    logq: Natural log of the probability that outcome is different from argmax.
+  """
+  # For noisy max, we only get an upper bound via the union bound. See Lemma 4
+  # in https://arxiv.org/abs/1610.05755.
+  #
+  # Pr[ j beats i*] = (2+gap(j,i*))/ 4 exp(gap(j,i*)
+  # proof at http://mathoverflow.net/questions/66763/
+  idx_max = np.argmax(counts)
+  counts_normalized = (counts - counts[idx_max]) / lmbd
+  counts_rest = np.array(
+      [counts_normalized[i] for i in range(len(counts)) if i != idx_max])
+  logq = _logaddexp(np.log(2 - counts_rest) + math.log(.25) + counts_rest)
+  return min(logq, math.log(1 - (1 / len(counts))))
+def rdp_pure_eps(logq, pure_eps, orders):
+  """Computes the RDP value given logq and pure privacy eps.
+  Implementation of https://arxiv.org/abs/1610.05755, Theorem 3.
+  The bound used is the min of three terms. The first term is from
+  https://arxiv.org/pdf/1605.02065.pdf.
+  The second term is based on the fact that when event has probability (1-q) for
+  q close to zero, q can only change by exp(eps), which corresponds to a
+  much smaller multiplicative change in (1-q)
+  The third term comes directly from the privacy guarantee.
+  Args:
+    logq: Natural logarithm of the probability of a non-optimal outcome.
+    pure_eps: eps parameter for DP
+    orders: array_like list of moments to compute.
+  Returns:
+    Array of upper bounds on rdp (a scalar if orders is a scalar).
+  """
+  orders_vec = np.atleast_1d(orders)
+  q = math.exp(logq)
+  log_t = np.full_like(orders_vec, np.inf)
+  if q <= 1 / (math.exp(pure_eps) + 1):
+    logt_one = math.log1p(-q) + (
+        math.log1p(-q) - _log1mexp(pure_eps + logq)) * (
+            orders_vec - 1)
+    logt_two = logq + pure_eps * (orders_vec - 1)
+    log_t = np.logaddexp(logt_one, logt_two)
+  ret = np.minimum(
+      np.minimum(0.5 * pure_eps * pure_eps * orders_vec,
+                 log_t / (orders_vec - 1)), pure_eps)
+  if np.isscalar(orders):
+    return np.asscalar(ret)
+  else:
+    return ret
+def main(argv):
+  del argv  # Unused.
+if __name__ == "__main__":
+  app.run(main)
--- a/research/differential_privacy/pate/core_test.py
+++ b/research/differential_privacy/pate/core_test.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for google3.experimental.brain.privacy.pate.pate."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import unittest
+import numpy as np
+import core as pate
+class PateTest(unittest.TestCase):
+  def _test_rdp_gaussian_value_errors(self):
+    # Test for ValueErrors.
+    with self.assertRaises(ValueError):
+      pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4]))
+    with self.assertRaises(ValueError):
+      pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4]))
+    with self.assertRaises(ValueError):
+      pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4]))
+  def _test_rdp_gaussian_as_function_of_q(self):
+    # Test for data-independent and data-dependent ranges over q.
+    # The following corresponds to orders 1.1, 2.5, 32, 250
+    # sigmas 1.5, 15, 1500, 15000.
+    # Hand calculated -log(q0)s arranged in a 'sigma major' ordering.
+    neglogq0s = [
+        2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2,
+        8.6, 6.4
+    ]
+    idx_neglogq0s = 0  # To iterate through neglogq0s.
+    orders = [1.1, 2.5, 32, 250]
+    sigmas = [1.5, 15, 1500, 15000]
+    for sigma in sigmas:
+      for order in orders:
+        curr_neglogq0 = neglogq0s[idx_neglogq0s]
+        idx_neglogq0s += 1
+        if curr_neglogq0 is None:  # sigma == 1.5 and order == 250:
+          continue
+        rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order)
+        # Data-dependent range. (Successively halve the value of q.)
+        logq_dds = (-curr_neglogq0 - np.array(
+            [0, np.log(2), np.log(4), np.log(8)]))
+        # Check that in q_dds, rdp is decreasing.
+        for idx in range(len(logq_dds) - 1):
+          self.assertGreater(
+              pate.rdp_gaussian(logq_dds[idx], sigma, order),
+              pate.rdp_gaussian(logq_dds[idx + 1], sigma, order))
+        # Data-independent range.
+        q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4])
+        # Check that in q_dids, rdp is constant.
+        for q in q_dids:
+          self.assertEqual(rdp_at_q0, pate.rdp_gaussian(
+              np.log(q), sigma, order))
+  def _test_compute_eps_from_delta_value_error(self):
+    # Test for ValueError.
+    with self.assertRaises(ValueError):
+      pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001)
+  def _test_compute_eps_from_delta_monotonicity(self):
+    # Test for monotonicity with respect to delta.
+    orders = [1.1, 2.5, 250.0]
+    sigmas = [1e-3, 1.0, 1e5]
+    deltas = [1e-60, 1e-6, 0.1, 0.999]
+    for sigma in sigmas:
+      list_of_eps = []
+      rdps_for_gaussian = np.array(orders) / (2 * sigma**2)
+      for delta in deltas:
+        list_of_eps.append(
+            pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0])
+      # Check that in list_of_eps, epsilons are decreasing (as delta increases).
+      sorted_list_of_eps = list(list_of_eps)
+      sorted_list_of_eps.sort(reverse=True)
+      self.assertEqual(list_of_eps, sorted_list_of_eps)
+  def _test_compute_q0(self):
+    # Stub code to search a logq space and figure out logq0 by eyeballing
+    # results. This code does not run with the tests. Remove underscore to run.
+    sigma = 15
+    order = 250
+    logqs = np.arange(-290, -270, 1)
+    count = 0
+    for logq in logqs:
+      count += 1
+      sys.stdout.write("\t%0.5g: %0.10g" %
+                       (logq, pate.rdp_gaussian(logq, sigma, order)))
+      sys.stdout.flush()
+      if count % 5 == 0:
+        print("")
+  def test_rdp_gaussian(self):
+    self._test_rdp_gaussian_value_errors()
+    self._test_rdp_gaussian_as_function_of_q()
+  def test_compute_eps_from_delta(self):
+    self._test_compute_eps_from_delta_value_error()
+    self._test_compute_eps_from_delta_monotonicity()
+if __name__ == "__main__":
+  unittest.main()
--- a/research/differential_privacy/pate/smooth_sensitivity.py
+++ b/research/differential_privacy/pate/smooth_sensitivity.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for smooth sensitivity analysis for PATE mechanisms.
+This library implements functionality for doing smooth sensitivity analysis
+for Gaussian Noise Max (GNMax), Threshold with Gaussian noise, and Gaussian
+Noise with Smooth Sensitivity (GNSS) mechanisms.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import math
+from absl import app
+import numpy as np
+import scipy
+import sympy as sp
+import core as pate
+################################
+# SMOOTH SENSITIVITY FOR GNMAX #
+################################
+# Global dictionary for storing cached q0 values keyed by (sigma, order).
+_logq0_cache = {}
+def _compute_logq0(sigma, order):
+  key = (sigma, order)
+  if key in _logq0_cache:
+    return _logq0_cache[key]
+  logq0 = compute_logq0_gnmax(sigma, order)
+  _logq0_cache[key] = logq0  # Update the global variable.
+  return logq0
+def _compute_logq1(sigma, order, num_classes):
+  logq0 = _compute_logq0(sigma, order)  # Most likely already cached.
+  logq1 = math.log(_compute_bl_gnmax(math.exp(logq0), sigma, num_classes))
+  assert logq1 <= logq0
+  return logq1
+def _compute_mu1_mu2_gnmax(sigma, logq):
+  # Computes mu1, mu2 according to Proposition 10.
+  mu2 = sigma * math.sqrt(-logq)
+  mu1 = mu2 + 1
+  return mu1, mu2
+def _compute_data_dep_bound_gnmax(sigma, logq, order):
+  # Applies Theorem 6 in Appendix without checking that logq satisfies necessary
+  # constraints. The pre-conditions must be assured by comparing logq against
+  # logq0 by the caller.
+  variance = sigma**2
+  mu1, mu2 = _compute_mu1_mu2_gnmax(sigma, logq)
+  eps1 = mu1 / variance
+  eps2 = mu2 / variance
+  log1q = np.log1p(-math.exp(logq))  # log1q = log(1-q)
+  log_a = (order - 1) * (
+      log1q - (np.log1p(-math.exp((logq + eps2) * (1 - 1 / mu2)))))
+  log_b = (order - 1) * (eps1 - logq / (mu1 - 1))
+  return np.logaddexp(log1q + log_a, logq + log_b) / (order - 1)
+def _compute_rdp_gnmax(sigma, logq, order):
+  logq0 = _compute_logq0(sigma, order)
+  if logq >= logq0:
+    return pate.rdp_data_independent_gaussian(sigma, order)
+  else:
+    return _compute_data_dep_bound_gnmax(sigma, logq, order)
+def compute_logq0_gnmax(sigma, order):
+  """Computes the point where we start using data-independent bounds.
+  Args:
+    sigma: std of the Gaussian noise
+    order: Renyi order lambda
+  Returns:
+    logq0: the point above which the data-ind bound overtakes data-dependent
+    bound.
+  """
+  def _check_validity_conditions(logq):
+    # Function returns true iff logq is in the range where data-dependent bound
+    # is valid. (Theorem 6 in Appendix.)
+    mu1, mu2 = _compute_mu1_mu2_gnmax(sigma, logq)
+    if mu1 < order:
+      return False
+    eps2 = mu2 / sigma**2
+    # Do computation in the log space. The condition below comes from Lemma 9
+    # from Appendix.
+    return (logq <= (mu2 - 1) * eps2 - mu2 * math.log(mu1 / (mu1 - 1) * mu2 /
+                                                      (mu2 - 1)))
+  def _compare_dep_vs_ind(logq):
+    return (_compute_data_dep_bound_gnmax(sigma, logq, order) -
+            pate.rdp_data_independent_gaussian(sigma, order))
+  # Natural upper bounds on q0.
+  logub = min(-(1 + 1. / sigma)**2, -((order - 1) / sigma)**2, -1 / sigma**2)
+  assert _check_validity_conditions(logub)
+  # If data-dependent bound is already better, we are done already.
+  if _compare_dep_vs_ind(logub) < 0:
+    return logub
+  # Identifying a reasonable lower bound to bracket logq0.
+  loglb = 2 * logub  # logub is negative, and thus loglb < logub.
+  while _compare_dep_vs_ind(loglb) > 0:
+    assert loglb > -10000, "The lower bound on q0 is way too low."
+    loglb *= 1.5
+  logq0, r = scipy.optimize.brentq(
+      _compare_dep_vs_ind, loglb, logub, full_output=True)
+  assert r.converged, "The root finding procedure failed to converge."
+  assert _check_validity_conditions(logq0)  # just in case.
+  return logq0
+def _compute_bl_gnmax(q, sigma, num_classes):
+  return ((num_classes - 1) / 2 * scipy.special.erfc(
+      1 / sigma + scipy.special.erfcinv(2 * q / (num_classes - 1))))
+def _compute_bu_gnmax(q, sigma, num_classes):
+  return min(1, (num_classes - 1) / 2 * scipy.special.erfc(
+      -1 / sigma + scipy.special.erfcinv(2 * q / (num_classes - 1))))
+def _compute_local_sens_gnmax(logq, sigma, num_classes, order):
+  """Implements Algorithm 3 (computes an upper bound on local sensitivity).
+  (See Proposition 13 for proof of correctness.)
+  """
+  logq0 = _compute_logq0(sigma, order)
+  logq1 = _compute_logq1(sigma, order, num_classes)
+  if logq1 <= logq <= logq0:
+    logq = logq1
+  beta = _compute_rdp_gnmax(sigma, logq, order)
+  beta_bu_q = _compute_rdp_gnmax(
+      sigma, math.log(_compute_bu_gnmax(math.exp(logq), sigma, num_classes)),
+      order)
+  beta_bl_q = _compute_rdp_gnmax(
+      sigma, math.log(_compute_bl_gnmax(math.exp(logq), sigma, num_classes)),
+      order)
+  return max(beta_bu_q - beta, beta - beta_bl_q)
+def compute_local_sensitivity_bounds_gnmax(votes, num_teachers, sigma, order):
+  """Computes a list of max-LS-at-distance-d for the GNMax mechanism.
+  A more efficient implementation of Algorithms 4 and 5 working in time
+  O(teachers*classes). A naive implementation is O(teachers^2*classes) or worse.
+  Args:
+    votes: A numpy array of votes.
+    num_teachers: Total number of voting teachers.
+    sigma: Standard deviation of the Guassian noise.
+    order: The Renyi order.
+  Returns:
+    A numpy array of local sensitivities at distances d, 0 <= d <= num_teachers.
+  """
+  num_classes = len(votes)  # Called m in the paper.
+  logq0 = _compute_logq0(sigma, order)
+  logq1 = _compute_logq1(sigma, order, num_classes)
+  logq = pate.compute_logq_gaussian(votes, sigma)
+  plateau = _compute_local_sens_gnmax(logq1, sigma, num_classes, order)
+  res = np.full(num_teachers, plateau)
+  if logq1 <= logq <= logq0:
+    return res
+  # Invariant: votes is sorted in the non-increasing order.
+  votes = sorted(votes, reverse=True)
+  res[0] = _compute_local_sens_gnmax(logq, sigma, num_classes, order)
+  curr_d = 0
+  go_left = logq > logq0  # Otherwise logq < logq1 and we go right.
+  # Iterate while the following is true:
+  #    1. If we are going left, logq is still larger than logq0 and we may still
+  #       increase the gap between votes[0] and votes[1].
+  #    2. If we are going right, logq is still smaller than logq1.
+  while ((go_left and logq > logq0 and votes[1] > 0) or
+         (not go_left and logq < logq1)):
+    curr_d += 1
+    if go_left:  # Try decreasing logq.
+      votes[0] += 1
+      votes[1] -= 1
+      idx = 1
+      # Restore the invariant. (Can be implemented more efficiently by keeping
+      # track of the range of indices equal to votes[1]. Does not seem to matter
+      # for the overall running time.)
+      while idx < len(votes) - 1 and votes[idx] < votes[idx + 1]:
+        votes[idx], votes[idx + 1] = votes[idx + 1], votes[idx]
+        idx += 1
+    else:  # Go right, i.e., try increasing logq.
+      votes[0] -= 1
+      votes[1] += 1  # The invariant holds since otherwise logq >= logq1.
+    logq = pate.compute_logq_gaussian(votes, sigma)
+    res[curr_d] = _compute_local_sens_gnmax(logq, sigma, num_classes, order)
+  return res
+##################################################
+# SMOOTH SENSITIVITY FOR THE THRESHOLD MECHANISM #
+##################################################
+# A global dictionary of RDPs for various threshold values. Indexed by a 4-tuple
+# (num_teachers, threshold, sigma, order).
+_rdp_thresholds = {}
+def _compute_rdp_list_threshold(num_teachers, threshold, sigma, order):
+  key = (num_teachers, threshold, sigma, order)
+  if key in _rdp_thresholds:
+    return _rdp_thresholds[key]
+  res = np.zeros(num_teachers + 1)
+  for v in range(0, num_teachers + 1):
+    logp = scipy.stats.norm.logsf(threshold - v, scale=sigma)
+    res[v] = pate.compute_rdp_threshold(logp, sigma, order)
+  _rdp_thresholds[key] = res
+  return res
+def compute_local_sensitivity_bounds_threshold(counts, num_teachers, threshold,
+                                               sigma, order):
+  """Computes a list of max-LS-at-distance-d for the threshold mechanism."""
+  def _compute_ls(v):
+    ls_step_up, ls_step_down = None, None
+    if v > 0:
+      ls_step_down = abs(rdp_list[v - 1] - rdp_list[v])
+    if v < num_teachers:
+      ls_step_up = abs(rdp_list[v + 1] - rdp_list[v])
+    return max(ls_step_down, ls_step_up)  # Rely on max(x, None) = x.
+  cur_max = int(round(max(counts)))
+  rdp_list = _compute_rdp_list_threshold(num_teachers, threshold, sigma, order)
+  ls = np.zeros(num_teachers)
+  for d in range(max(cur_max, num_teachers - cur_max)):
+    ls_up, ls_down = None, None
+    if cur_max + d <= num_teachers:
+      ls_up = _compute_ls(cur_max + d)
+    if cur_max - d >= 0:
+      ls_down = _compute_ls(cur_max - d)
+    ls[d] = max(ls_up, ls_down)
+  return ls
+#############################################
+# PROCEDURES FOR SMOOTH SENSITIVITY RELEASE #
+#############################################
+# A global dictionary of exponentially decaying arrays. Indexed by beta.
+dict_beta_discount = {}
+def compute_discounted_max(beta, a):
+  n = len(a)
+  if beta not in dict_beta_discount or (len(dict_beta_discount[beta]) < n):
+    dict_beta_discount[beta] = np.exp(-beta * np.arange(n))
+  return max(a * dict_beta_discount[beta][:n])
+def compute_smooth_sensitivity_gnmax(beta, counts, num_teachers, sigma, order):
+  """Computes smooth sensitivity of a single application of GNMax."""
+  ls = compute_local_sensitivity_bounds_gnmax(counts, sigma, order,
+                                              num_teachers)
+  return compute_discounted_max(beta, ls)
+def compute_rdp_of_smooth_sensitivity_gaussian(beta, sigma, order):
+  """Computes the RDP curve for the GNSS mechanism.
+  Implements Theorem 23 (https://arxiv.org/pdf/1802.08908.pdf).
+  """
+  if beta > 0 and not 1 < order < 1 / (2 * beta):
+    raise ValueError("Order outside the (1, 1/(2*beta)) range.")
+  return order * math.exp(2 * beta) / sigma**2 + (
+      -.5 * math.log(1 - 2 * order * beta) + beta * order) / (
+          order - 1)
+def compute_params_for_ss_release(eps, delta):
+  """Computes sigma for additive Gaussian noise scaled by smooth sensitivity.
+  Presently not used. (We proceed via RDP analysis.)
+  Compute beta, sigma for applying Lemma 2.6 (full version of Nissim et al.) via
+  Lemma 2.10.
+  """
+  # Rather than applying Lemma 2.10 directly, which would give suboptimal alpha,
+  # (see http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf),
+  # we extract a sufficient condition on alpha from its proof.
+  #
+  # Let a = rho_(delta/2)(Z_1). Then solve for alpha such that
+  # 2 alpha a + alpha^2 = eps/2.
+  a = scipy.special.ndtri(1 - delta / 2)
+  alpha = math.sqrt(a**2 + eps / 2) - a
+  beta = eps / (2 * scipy.special.chdtri(1, delta / 2))
+  return alpha, beta
+#######################################################
+# SYMBOLIC-NUMERIC VERIFICATION OF CONDITIONS C5--C6. #
+#######################################################
+def _construct_symbolic_beta(q, sigma, order):
+  mu2 = sigma * sp.sqrt(sp.log(1 / q))
+  mu1 = mu2 + 1
+  eps1 = mu1 / sigma**2
+  eps2 = mu2 / sigma**2
+  a = (1 - q) / (1 - (q * sp.exp(eps2))**(1 - 1 / mu2))
+  b = sp.exp(eps1) / q**(1 / (mu1 - 1))
+  s = (1 - q) * a**(order - 1) + q * b**(order - 1)
+  return (1 / (order - 1)) * sp.log(s)
+def _construct_symbolic_bu(q, sigma, m):
+  return (m - 1) / 2 * sp.erfc(sp.erfcinv(2 * q / (m - 1)) - 1 / sigma)
+def _is_non_decreasing(fn, q, bounds):
+  """Verifies whether the function is non-decreasing within a range.
+  Args:
+    fn: Symbolic function of a single variable.
+    q: The name of f's variable.
+    bounds: Pair of (lower_bound, upper_bound) reals.
+  Returns:
+    True iff the function is non-decreasing in the range.
+  """
+  diff_fn = sp.diff(fn, q)  # Symbolically compute the derivative.
+  diff_fn_lambdified = sp.lambdify(
+      q,
+      diff_fn,
+      modules=[
+          "numpy", {
+              "erfc": scipy.special.erfc,
+              "erfcinv": scipy.special.erfcinv
+          }
+      ])
+  r = scipy.optimize.minimize_scalar(
+      diff_fn_lambdified, bounds=bounds, method="bounded")
+  assert r.success, "Minimizer failed to converge."
+  return r.fun >= 0  # Check whether the derivative is non-negative.
+def check_conditions(sigma, m, order):
+  """Checks conditions C5 and C6 (Section B.4.2 in Appendix)."""
+  q = sp.symbols("q", positive=True, real=True)
+  beta = _construct_symbolic_beta(q, sigma, order)
+  q0 = math.exp(compute_logq0_gnmax(sigma, order))
+  cond5 = _is_non_decreasing(beta, q, (0, q0))
+  if cond5:
+    bl_q0 = _compute_bl_gnmax(q0, sigma, m)
+    bu = _construct_symbolic_bu(q, sigma, m)
+    delta_beta = beta.subs(q, bu) - beta
+    cond6 = _is_non_decreasing(delta_beta, q, (0, bl_q0))
+  else:
+    cond6 = False  # Skip the check, since Condition 5 is false already.
+  return (cond5, cond6)
+def main(argv):
+  del argv  # Unused.
+if __name__ == "__main__":
+  app.run(main)
--- a/research/differential_privacy/pate/smooth_sensitivity_test.py
+++ b/research/differential_privacy/pate/smooth_sensitivity_test.py
+# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for google3.experimental.brain.privacy.pate.pate_smooth_sensitivity."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import unittest
+import numpy as np
+import smooth_sensitivity as pate_ss
+class PateSmoothSensitivityTest(unittest.TestCase):
+  def test_check_conditions(self):
+    self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False))
+    self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True))
+  def _assert_all_close(self, x, y):
+    """Asserts that two numpy arrays are close."""
+    self.assertEqual(len(x), len(y))
+    self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0))
+  def test_compute_local_sensitivity_bounds_gnmax(self):
+    counts1 = np.array([10, 0, 0])
+    sigma1 = .5
+    order1 = 1.5
+    answer1 = np.array(
+        [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7)
+    # Test for "going right" in the smooth sensitivity computation.
+    out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
+        counts1, 10, sigma1, order1)
+    self._assert_all_close(out1, answer1)
+    counts2 = np.array([1000, 500, 300, 200, 0])
+    sigma2 = 250.
+    order2 = 10.
+    # Test for "going left" in the smooth sensitivity computation.
+    out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
+        counts2, 2000, sigma2, order2)
+    answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
+                       [2.73113623988e-6] * 1700)
+    self._assert_all_close(out2, answer2)
+  def test_compute_local_sensitivity_bounds_threshold(self):
+    counts1_3 = np.array([20, 10, 0])
+    num_teachers = sum(counts1_3)
+    t1 = 16  # high threshold
+    sigma = 2
+    order = 10
+    out1 = pate_ss.compute_local_sensitivity_bounds_threshold(
+        counts1_3, num_teachers, t1, sigma, order)
+    answer1 = np.array([0] * 3 + [
+        1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
+        9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01,
+        1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
+        9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01,
+        1.93127860e-01
+    ] + [0] * 10)
+    self._assert_all_close(out1, answer1)
+    t2 = 2  # low threshold
+    out2 = pate_ss.compute_local_sensitivity_bounds_threshold(
+        counts1_3, num_teachers, t2, sigma, order)
+    answer2 = np.array([
+        1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01,
+        1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02,
+        6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04
+    ] + [0] * 18)
+    self._assert_all_close(out2, answer2)
+    t3 = 50  # very high threshold (larger than the number of teachers).
+    out3 = pate_ss.compute_local_sensitivity_bounds_threshold(
+        counts1_3, num_teachers, t3, sigma, order)
+    answer3 = np.array([
+        1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15,
+        1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10,
+        2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05,
+        0.000360973025976, 0.000360973025976, 2.76377015215e-50,
+        1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61,
+        1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72,
+        1.28328011060e-76, 8.20047697109e-81
+    ] + [0] * 10)
+    self._assert_all_close(out3, answer3)
+    # Fractional values.
+    counts4 = np.array([19.5, -5.1, 0])
+    t4 = 10.1
+    out4 = pate_ss.compute_local_sensitivity_bounds_threshold(
+        counts4, num_teachers, t4, sigma, order)
+    answer4 = np.array([
+        0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530,
+        0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142,
+        0.00678695871
+    ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10)
+    self._assert_all_close(out4, answer4)
+if __name__ == "__main__":
+  unittest.main()