remove all code related to differential privacy (#6045)

86906935 · Nicolas Papernot · Ilya Mironov · d32d957a · d32d957a · d32d957a
Commit 86906935 authored Jan 14, 2019 by Nicolas Papernot Committed by Ilya Mironov Jan 14, 2019
11 changed files
--- a/research/differential_privacy/pate/ICLR2018/plot_partition.py
+++ b/research/differential_privacy/pate/ICLR2018/plot_partition.py
-"""Produces two plots. One compares aggregators and their analyses. The other
-illustrates sources of privacy loss for Confident-GNMax.
-
-A script in support of the paper "Scalable Private Learning with PATE" by
-Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
-Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
-
-The input is a file containing a numpy array of votes, one query per row, one
-class per column. Ex:
-  43, 1821, ..., 3
-  31, 16, ..., 0
-  ...
-  0, 86, ..., 438
-The output is written to a specified directory and consists of two files.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import pickle
-import sys
-
-sys.path.append('..')  # Main modules reside in the parent directory.
-
-from absl import app
-from absl import flags
-from collections import namedtuple
-import matplotlib
-
-matplotlib.use('TkAgg')
-import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
-import numpy as np
-import core as pate
-import smooth_sensitivity as pate_ss
-
-plt.style.use('ggplot')
-
-FLAGS = flags.FLAGS
-flags.DEFINE_boolean('cache', False,
-                     'Read results of privacy analysis from cache.')
-flags.DEFINE_string('counts_file', None, 'Counts file.')
-flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
-flags.DEFINE_float('threshold', None, 'Threshold for step 1 (selection).')
-flags.DEFINE_float('sigma1', None, 'Sigma for step 1 (selection).')
-flags.DEFINE_float('sigma2', None, 'Sigma for step 2 (argmax).')
-flags.DEFINE_integer('queries', None, 'Number of queries made by the student.')
-flags.DEFINE_float('delta', 1e-8, 'Target delta.')
-
-flags.mark_flag_as_required('counts_file')
-flags.mark_flag_as_required('threshold')
-flags.mark_flag_as_required('sigma1')
-flags.mark_flag_as_required('sigma2')
-
-Partition = namedtuple('Partition', ['step1', 'step2', 'ss', 'delta'])
-
-
-def analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2, delta):
-  orders = np.logspace(np.log10(1.5), np.log10(500), num=100)
-  n = votes.shape[0]
-
-  rdp_total = np.zeros(len(orders))
-  answered_total = 0
-  answered = np.zeros(n)
-  eps_cum = np.full(n, None, dtype=float)
-
-  for i in range(n):
-    v = votes[i,]
-    if threshold is not None and sigma1 is not None:
-      q_step1 = np.exp(pate.compute_logpr_answered(threshold, sigma1, v))
-      rdp_total += pate.rdp_data_independent_gaussian(sigma1, orders)
-    else:
-      q_step1 = 1.  # always answer
-
-    answered_total += q_step1
-    answered[i] = answered_total
-
-    rdp_total += q_step1 * pate.rdp_data_independent_gaussian(sigma2, orders)
-
-    eps_cum[i], order_opt = pate.compute_eps_from_delta(orders, rdp_total,
-                                                        delta)
-
-    if i > 0 and (i + 1) % 1000 == 0:
-      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} '
-            'at order = {:.2f}.'.format(
-          i + 1,
-          answered[i],
-          eps_cum[i],
-          order_opt))
-      sys.stdout.flush()
-
-  return eps_cum, answered
-
-
-def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
-  # Short list of orders.
-  # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20))
-
-  # Long list of orders.
-  orders = np.concatenate((np.arange(20, 40, .2),
-                           np.arange(40, 75, .5),
-                            np.logspace(np.log10(75), np.log10(200), num=20)))
-
-  n = votes.shape[0]
-  num_classes = votes.shape[1]
-  num_teachers = int(sum(votes[0,]))
-
-  if threshold is not None and sigma1 is not None:
-    is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian(
-        num_teachers, num_classes, sigma1, orders)
-  else:
-    is_data_ind_step1 = [True] * len(orders)
-
-  is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian(
-      num_teachers, num_classes, sigma2, orders)
-
-  eps_partitioned = np.full(n, None, dtype=Partition)
-  order_opt = np.full(n, None, dtype=float)
-  ss_std_opt = np.full(n, None, dtype=float)
-  answered = np.zeros(n)
-
-  rdp_step1_total = np.zeros(len(orders))
-  rdp_step2_total = np.zeros(len(orders))
-
-  ls_total = np.zeros((len(orders), num_teachers))
-  answered_total = 0
-
-  for i in range(n):
-    v = votes[i,]
-
-    if threshold is not None and sigma1 is not None:
-      logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v)
-      rdp_step1_total += pate.compute_rdp_threshold(logq_step1, sigma1, orders)
-    else:
-      logq_step1 = 0.  # always answer
-
-    pr_answered = np.exp(logq_step1)
-    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
-    rdp_step2_total += pr_answered * pate.rdp_gaussian(logq_step2, sigma2,
-                                                       orders)
-
-    answered_total += pr_answered
-
-    rdp_ss = np.zeros(len(orders))
-    ss_std = np.zeros(len(orders))
-
-    for j, order in enumerate(orders):
-      if not is_data_ind_step1[j]:
-        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(v,
-            num_teachers, threshold, sigma1, order)
-      else:
-        ls_step1 = np.full(num_teachers, 0, dtype=float)
-
-      if not is_data_ind_step2[j]:
-        ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
-            v, num_teachers, sigma2, order)
-      else:
-        ls_step2 = np.full(num_teachers, 0, dtype=float)
-
-      ls_total[j,] += ls_step1 + pr_answered * ls_step2
-
-      beta_ss = .49 / order
-
-      ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j,])
-      sigma_ss = ((order * math.exp(2 * beta_ss)) / ss) ** (1 / 3)
-      rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
-          beta_ss, sigma_ss, order)
-      ss_std[j] = ss * sigma_ss
-
-    rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss
-
-    answered[i] = answered_total
-    _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta)
-    order_idx = np.searchsorted(orders, order_opt[i])
-
-    # Since optimal orders are always non-increasing, shrink orders array
-    # and all cumulative arrays to speed up computation.
-    if order_idx < len(orders):
-      orders = orders[:order_idx + 1]
-      rdp_step1_total = rdp_step1_total[:order_idx + 1]
-      rdp_step2_total = rdp_step2_total[:order_idx + 1]
-
-    eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx],
-                                   step2=rdp_step2_total[order_idx],
-                                   ss=rdp_ss[order_idx],
-                                   delta=-math.log(delta) / (order_opt[i] - 1))
-    ss_std_opt[i] = ss_std[order_idx]
-    if i > 0 and (i + 1) % 1 == 0:
-      print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
-            'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
-            'step2 = {:.3f}, ss = {:.3f}'.format(
-          i + 1,
-          answered[i],
-          sum(eps_partitioned[i]),
-          ss_std_opt[i],
-          order_opt[i],
-          eps_partitioned[i].delta,
-          eps_partitioned[i].step1,
-          eps_partitioned[i].step2,
-          eps_partitioned[i].ss))
-      sys.stdout.flush()
-
-  return eps_partitioned, answered, ss_std_opt, order_opt
-
-
-def plot_comparison(figures_dir, simple_ind, conf_ind, simple_dep, conf_dep):
-  """Plots variants of GNMax algorithm and their analyses.
-  """
-
-  def pivot(x_axis, eps, answered):
-    y = np.full(len(x_axis), None, dtype=float)  # delta
-    for i, x in enumerate(x_axis):
-      idx = np.searchsorted(answered, x)
-      if idx < len(eps):
-        y[i] = eps[idx]
-    return y
-
-  def pivot_dep(x_axis, data_dep):
-    eps_partitioned, answered, _, _ = data_dep
-    eps = [sum(p) for p in eps_partitioned]  # Flatten eps
-    return pivot(x_axis, eps, answered)
-
-  xlim = 10000
-  x_axis = range(0, xlim, 10)
-
-  y_simple_ind = pivot(x_axis, *simple_ind)
-  y_conf_ind = pivot(x_axis, *conf_ind)
-
-  y_simple_dep = pivot_dep(x_axis, simple_dep)
-  y_conf_dep = pivot_dep(x_axis, conf_dep)
-
-  # plt.close('all')
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.5)
-  fig.set_figwidth(4.7)
-
-  ax.plot(x_axis, y_simple_ind, ls='--', color='r', lw=3, label=r'Simple GNMax, data-ind analysis')
-  ax.plot(x_axis, y_conf_ind, ls='--', color='b', lw=3, label=r'Confident GNMax, data-ind analysis')
-  ax.plot(x_axis, y_simple_dep, ls='-', color='r', lw=3, label=r'Simple GNMax, data-dep analysis')
-  ax.plot(x_axis, y_conf_dep, ls='-', color='b', lw=3, label=r'Confident GNMax, data-dep analysis')
-
-  plt.xticks(np.arange(0, xlim + 1000, 2000))
-  plt.xlim([0, xlim])
-  plt.ylim(bottom=0)
-  plt.legend(fontsize=16)
-  ax.set_xlabel('Number of queries answered', fontsize=16)
-  ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
-
-  ax.tick_params(labelsize=14)
-  plot_filename = os.path.join(figures_dir, 'comparison.pdf')
-  print('Saving the graph to ' + plot_filename)
-  fig.savefig(plot_filename, bbox_inches='tight')
-  plt.show()
-
-
-def plot_partition(figures_dir, gnmax_conf, print_order):
-  """Plots an expert version of the privacy-per-answered-query graph.
-
-  Args:
-    figures_dir: A name of the directory where to save the plot.
-    eps: The cumulative privacy cost.
-    partition: Allocation of the privacy cost.
-    answered: Cumulative number of queries answered.
-    order_opt: The list of optimal orders.
-  """
-  eps_partitioned, answered, ss_std_opt, order_opt = gnmax_conf
-
-  xlim = 10000
-  x = range(0, int(xlim), 10)
-  lenx = len(x)
-  y0 = np.full(lenx, np.nan, dtype=float)  # delta
-  y1 = np.full(lenx, np.nan, dtype=float)  # delta + step1
-  y2 = np.full(lenx, np.nan, dtype=float)  # delta + step1 + step2
-  y3 = np.full(lenx, np.nan, dtype=float)  # delta + step1 + step2 + ss
-  noise_std = np.full(lenx, np.nan, dtype=float)
-
-  y_right = np.full(lenx, np.nan, dtype=float)
-
-  for i in range(lenx):
-    idx = np.searchsorted(answered, x[i])
-    if idx < len(eps_partitioned):
-      y0[i] = eps_partitioned[idx].delta
-      y1[i] = y0[i] + eps_partitioned[idx].step1
-      y2[i] = y1[i] + eps_partitioned[idx].step2
-      y3[i] = y2[i] + eps_partitioned[idx].ss
-
-      noise_std[i] = ss_std_opt[idx]
-      y_right[i] = order_opt[idx]
-
-  # plt.close('all')
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.5)
-  fig.set_figwidth(4.7)
-  fig.patch.set_alpha(0)
-
-  l1 = ax.plot(
-      x, y3, color='b', ls='-', label=r'Total privacy cost', linewidth=1).pop()
-
-  for y in (y0, y1, y2):
-    ax.plot(x, y, color='b', ls='-', label=r'_nolegend_', alpha=.5, linewidth=1)
-
-  ax.fill_between(x, [0] * lenx, y0.tolist(), facecolor='b', alpha=.5)
-  ax.fill_between(x, y0.tolist(), y1.tolist(), facecolor='b', alpha=.4)
-  ax.fill_between(x, y1.tolist(), y2.tolist(), facecolor='b', alpha=.3)
-  ax.fill_between(x, y2.tolist(), y3.tolist(), facecolor='b', alpha=.2)
-
-  ax.fill_between(x, (y3 - noise_std).tolist(), (y3 + noise_std).tolist(),
-                  facecolor='r', alpha=.5)
-
-
-  plt.xticks(np.arange(0, xlim + 1000, 2000))
-  plt.xlim([0, xlim])
-  ax.set_ylim([0, 3.])
-
-  ax.set_xlabel('Number of queries answered', fontsize=16)
-  ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
-
-  # Merging legends.
-  if print_order:
-    ax2 = ax.twinx()
-    l2 = ax2.plot(
-        x, y_right, 'r', ls='-', label=r'Optimal order', linewidth=5,
-        alpha=.5).pop()
-    ax2.grid(False)
-    # ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
-    ax2.set_ylim([0, 200.])
-    # ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
-
-  ax.tick_params(labelsize=14)
-  plot_filename = os.path.join(figures_dir, 'partition.pdf')
-  print('Saving the graph to ' + plot_filename)
-  fig.savefig(plot_filename, bbox_inches='tight', dpi=800)
-  plt.show()
-
-
-def run_all_analyses(votes, threshold, sigma1, sigma2, delta):
-  simple_ind = analyze_gnmax_conf_data_ind(votes, None, None, sigma2,
-                                           delta)
-
-  conf_ind = analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2,
-                                         delta)
-
-  simple_dep = analyze_gnmax_conf_data_dep(votes, None, None, sigma2,
-                                           delta)
-
-  conf_dep = analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2,
-                                         delta)
-
-  return (simple_ind, conf_ind, simple_dep, conf_dep)
-
-
-def run_or_load_all_analyses():
-  temp_filename = os.path.expanduser('~/tmp/partition_cached.pkl')
-
-  if FLAGS.cache and os.path.isfile(temp_filename):
-    print('Reading from cache ' + temp_filename)
-    with open(temp_filename, 'rb') as f:
-      all_analyses = pickle.load(f)
-  else:
-    fin_name = os.path.expanduser(FLAGS.counts_file)
-    print('Reading raw votes from ' + fin_name)
-    sys.stdout.flush()
-
-    votes = np.load(fin_name)
-
-    if FLAGS.queries is not None:
-      if votes.shape[0] < FLAGS.queries:
-        raise ValueError('Expect {} rows, got {} in {}'.format(
-            FLAGS.queries, votes.shape[0], fin_name))
-      # Truncate the votes matrix to the number of queries made.
-      votes = votes[:FLAGS.queries, ]
-
-    all_analyses = run_all_analyses(votes, FLAGS.threshold, FLAGS.sigma1,
-                                    FLAGS.sigma2, FLAGS.delta)
-
-    print('Writing to cache ' + temp_filename)
-    with open(temp_filename, 'wb') as f:
-      pickle.dump(all_analyses, f)
-
-  return all_analyses
-
-
-def main(argv):
-  del argv  # Unused.
-
-  simple_ind, conf_ind, simple_dep, conf_dep = run_or_load_all_analyses()
-
-  figures_dir = os.path.expanduser(FLAGS.figures_dir)
-
-  plot_comparison(figures_dir, simple_ind, conf_ind, simple_dep, conf_dep)
-  plot_partition(figures_dir, conf_dep, True)
-  plt.close('all')
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/plots_for_slides.py
+++ b/research/differential_privacy/pate/ICLR2018/plots_for_slides.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Plots graphs for the slide deck.
-
-A script in support of the PATE2 paper. The input is a file containing a numpy
-array of votes, one query per row, one class per column. Ex:
-  43, 1821, ..., 3
-  31, 16, ..., 0
-  ...
-  0, 86, ..., 438
-The output graphs are visualized using the TkAgg backend.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import sys
-
-sys.path.append('..')  # Main modules reside in the parent directory.
-
-from absl import app
-from absl import flags
-import matplotlib
-
-matplotlib.use('TkAgg')
-import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
-import numpy as np
-import core as pate
-import random
-
-plt.style.use('ggplot')
-
-FLAGS = flags.FLAGS
-flags.DEFINE_string('counts_file', None, 'Counts file.')
-flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
-flags.DEFINE_boolean('transparent', False, 'Set background to transparent.')
-
-flags.mark_flag_as_required('counts_file')
-
-
-def setup_plot():
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.5)
-  fig.set_figwidth(4.7)
-
-  if FLAGS.transparent:
-    fig.patch.set_alpha(0)
-
-  return fig, ax
-
-
-def plot_rdp_curve_per_example(votes, sigmas):
-  orders = np.linspace(1., 100., endpoint=True, num=1000)
-  orders[0] = 1.001
-  fig, ax = setup_plot()
-
-  for i in range(votes.shape[0]):
-    for sigma in sigmas:
-      logq = pate.compute_logq_gaussian(votes[i,], sigma)
-      rdp = pate.rdp_gaussian(logq, sigma, orders)
-      ax.plot(
-          orders,
-          rdp,
-          alpha=1.,
-          label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
-          linewidth=5)
-
-  for sigma in sigmas:
-    ax.plot(
-        orders,
-        pate.rdp_data_independent_gaussian(sigma, orders),
-        alpha=.3,
-        label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
-        linewidth=10)
-
-  plt.xlim(xmin=1, xmax=100)
-  plt.ylim(ymin=0)
-  plt.xticks([1, 20, 40, 60, 80, 100])
-  plt.yticks([0, .0025, .005, .0075, .01])
-  plt.xlabel(r'Order $\alpha$', fontsize=16)
-  plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
-  ax.tick_params(labelsize=14)
-
-  plt.legend(loc=0, fontsize=13)
-  plt.show()
-
-
-def plot_rdp_of_sigma(v, order):
-  sigmas = np.linspace(1., 1000., endpoint=True, num=1000)
-  fig, ax = setup_plot()
-
-  y = np.zeros(len(sigmas))
-
-  for i, sigma in enumerate(sigmas):
-    logq = pate.compute_logq_gaussian(v, sigma)
-    y[i] = pate.rdp_gaussian(logq, sigma, order)
-
-  ax.plot(sigmas, y, alpha=.8, linewidth=5)
-
-  plt.xlim(xmin=1, xmax=1000)
-  plt.ylim(ymin=0)
-  # plt.yticks([0, .0004, .0008, .0012])
-  ax.tick_params(labelleft='off')
-  plt.xlabel(r'Noise $\sigma$', fontsize=16)
-  plt.ylabel(r'RDP at order $\alpha={}$'.format(order), fontsize=16)
-  ax.tick_params(labelsize=14)
-
-  # plt.legend(loc=0, fontsize=13)
-  plt.show()
-
-
-def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
-    target_answered):
-  rdp_cum = np.zeros(len(orders))
-  answered = 0
-  for i, v in enumerate(votes):
-    v = sorted(v, reverse=True)
-    q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
-    logq_step2 = pate.compute_logq_gaussian(v, sigma2)
-    rdp = pate.rdp_gaussian(logq_step2, sigma2, orders)
-    rdp_cum += q_step1 * rdp
-
-    answered += q_step1
-    if answered >= target_answered:
-      print('Processed {} queries to answer {}.'.format(i, target_answered))
-      return rdp_cum
-
-  assert False, 'Never reached {} answered queries.'.format(target_answered)
-
-
-def plot_rdp_total(votes, sigmas):
-  orders = np.linspace(1., 100., endpoint=True, num=100)
-  orders[0] = 1.1
-
-  fig, ax = setup_plot()
-
-  target_answered = 2000
-
-  for sigma in sigmas:
-    rdp = compute_rdp_curve(votes, 5000, 1000, sigma, orders, target_answered)
-    ax.plot(
-        orders,
-        rdp,
-        alpha=.8,
-        label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
-        linewidth=5)
-
-  # for sigma in sigmas:
-  #   ax.plot(
-  #       orders,
-  #       target_answered * pate.rdp_data_independent_gaussian(sigma, orders),
-  #       alpha=.3,
-  #       label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
-  #       linewidth=10)
-
-  plt.xlim(xmin=1, xmax=100)
-  plt.ylim(ymin=0)
-  plt.xticks([1, 20, 40, 60, 80, 100])
-  plt.yticks([0, .0005, .001, .0015, .002])
-
-  plt.xlabel(r'Order $\alpha$', fontsize=16)
-  plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
-  ax.tick_params(labelsize=14)
-
-  plt.legend(loc=0, fontsize=13)
-  plt.show()
-
-
-def plot_data_ind_curve():
-  fig, ax = setup_plot()
-
-  orders = np.linspace(1., 10., endpoint=True, num=1000)
-  orders[0] = 1.01
-
-  ax.plot(
-      orders,
-      pate.rdp_data_independent_gaussian(1., orders),
-      alpha=.5,
-      color='gray',
-      linewidth=10)
-
-  # plt.yticks([])
-  plt.xlim(xmin=1, xmax=10)
-  plt.ylim(ymin=0)
-  plt.xticks([1, 3, 5, 7, 9])
-  ax.tick_params(labelsize=14)
-  plt.show()
-
-
-def plot_two_data_ind_curves():
-  orders = np.linspace(1., 100., endpoint=True, num=1000)
-  orders[0] = 1.001
-
-  fig, ax = setup_plot()
-
-  for sigma in [100, 150]:
-    ax.plot(
-        orders,
-        pate.rdp_data_independent_gaussian(sigma, orders),
-        alpha=.3,
-        label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
-        linewidth=10)
-
-  plt.xlim(xmin=1, xmax=100)
-  plt.ylim(ymin=0)
-  plt.xticks([1, 20, 40, 60, 80, 100])
-  plt.yticks([0, .0025, .005, .0075, .01])
-  plt.xlabel(r'Order $\alpha$', fontsize=16)
-  plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
-  ax.tick_params(labelsize=14)
-
-  plt.legend(loc=0, fontsize=13)
-  plt.show()
-
-
-def scatter_plot(votes, threshold, sigma1, sigma2, order):
-  fig, ax = setup_plot()
-  x = []
-  y = []
-  for i, v in enumerate(votes):
-    if threshold is not None and sigma1 is not None:
-      q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
-    else:
-      q_step1 = 1.
-    if random.random() < q_step1:
-      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
-      x.append(max(v))
-      y.append(pate.rdp_gaussian(logq_step2, sigma2, order))
-
-  print('Selected {} queries.'.format(len(x)))
-  # Plot the data-independent curve:
-  # data_ind = pate.rdp_data_independent_gaussian(sigma, order)
-  # plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
-  ax.set_yscale('log')
-  plt.xlim(xmin=0, xmax=5000)
-  plt.ylim(ymin=1e-300, ymax=1)
-  plt.yticks([1, 1e-100, 1e-200, 1e-300])
-  plt.scatter(x, y, s=1, alpha=0.5)
-  plt.ylabel(r'RDP at $\alpha={}$'.format(order), fontsize=16)
-  plt.xlabel(r'max count', fontsize=16)
-  ax.tick_params(labelsize=14)
-  plt.show()
-
-
-def main(argv):
-  del argv  # Unused.
-  fin_name = os.path.expanduser(FLAGS.counts_file)
-  print('Reading raw votes from ' + fin_name)
-  sys.stdout.flush()
-
-  plot_data_ind_curve()
-  plot_two_data_ind_curves()
-
-  v1 = [2550, 2200, 250]  # based on votes[2,]
-  # v2 = [2600, 2200, 200]  # based on votes[381,]
-  plot_rdp_curve_per_example(np.array([v1]), (100., 150.))
-
-  plot_rdp_of_sigma(np.array(v1), 20.)
-
-  votes = np.load(fin_name)
-
-  plot_rdp_total(votes[:12000, ], (100., 150.))
-  scatter_plot(votes[:6000, ], None, None, 100, 20)  # w/o thresholding
-  scatter_plot(votes[:6000, ], 3500, 1500, 100, 20)  # with thresholding
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/rdp_bucketized.py
+++ b/research/differential_privacy/pate/ICLR2018/rdp_bucketized.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Illustrates how noisy thresholding check changes distribution of queries.
-
-A script in support of the paper "Scalable Private Learning with PATE" by
-Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
-Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
-
-The input is a file containing a numpy array of votes, one query per row, one
-class per column. Ex:
-  43, 1821, ..., 3
-  31, 16, ..., 0
-  ...
-  0, 86, ..., 438
-The output is one of two graphs depending on the setting of the plot variable.
-The output is written to a pdf file.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import sys
-
-sys.path.append('..')  # Main modules reside in the parent directory.
-
-from absl import app
-from absl import flags
-import matplotlib
-matplotlib.use('TkAgg')
-import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
-import numpy as np
-import core as pate
-
-plt.style.use('ggplot')
-
-FLAGS = flags.FLAGS
-flags.DEFINE_enum('plot', 'small', ['small', 'large'], 'Selects which of'
-                  'the two plots is produced.')
-flags.DEFINE_string('counts_file', None, 'Counts file.')
-flags.DEFINE_string('plot_file', '', 'Plot file to write.')
-
-flags.mark_flag_as_required('counts_file')
-
-
-def compute_count_per_bin(bin_num, votes):
-  """Tabulates number of examples in each bin.
-
-  Args:
-    bin_num: Number of bins.
-    votes: A matrix of votes, where each row contains votes in one instance.
-
-  Returns:
-    Array of counts of length bin_num.
-  """
-  sums = np.sum(votes, axis=1)
-  # Check that all rows contain the same number of votes.
-  assert max(sums) == min(sums)
-
-  s = max(sums)
-
-  counts = np.zeros(bin_num)
-  n = votes.shape[0]
-
-  for i in xrange(n):
-    v = votes[i,]
-    bin_idx = int(math.floor(max(v) * bin_num / s))
-    assert 0 <= bin_idx < bin_num
-    counts[bin_idx] += 1
-
-  return counts
-
-
-def compute_privacy_cost_per_bins(bin_num, votes, sigma2, order):
-  """Outputs average privacy cost per bin.
-
-  Args:
-    bin_num: Number of bins.
-    votes: A matrix of votes, where each row contains votes in one instance.
-    sigma2: The scale (std) of the Gaussian noise. (Same as sigma_2 in
-            Algorithms 1 and 2.)
-    order: The Renyi order for which privacy cost is computed.
-
-  Returns:
-    Expected eps of RDP (ignoring delta) per example in each bin.
-  """
-  n = votes.shape[0]
-
-  bin_counts = np.zeros(bin_num)
-  bin_rdp = np.zeros(bin_num)  # RDP at order=order
-
-  for i in xrange(n):
-    v = votes[i,]
-    logq = pate.compute_logq_gaussian(v, sigma2)
-    rdp_at_order = pate.rdp_gaussian(logq, sigma2, order)
-
-    bin_idx = int(math.floor(max(v) * bin_num / sum(v)))
-    assert 0 <= bin_idx < bin_num
-    bin_counts[bin_idx] += 1
-    bin_rdp[bin_idx] += rdp_at_order
-    if (i + 1) % 1000 == 0:
-      print('example {}'.format(i + 1))
-      sys.stdout.flush()
-
-  return bin_rdp / bin_counts
-
-
-def compute_expected_answered_per_bin(bin_num, votes, threshold, sigma1):
-  """Computes expected number of answers per bin.
-
-  Args:
-    bin_num: Number of bins.
-    votes: A matrix of votes, where each row contains votes in one instance.
-    threshold: The threshold against which check is performed.
-    sigma1: The std of the Gaussian noise with which check is performed. (Same
-      as sigma_1 in Algorithms 1 and 2.)
-
-  Returns:
-    Expected number of queries answered per bin.
-  """
-  n = votes.shape[0]
-
-  bin_answered = np.zeros(bin_num)
-
-  for i in xrange(n):
-    v = votes[i,]
-    p = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
-    bin_idx = int(math.floor(max(v) * bin_num / sum(v)))
-    assert 0 <= bin_idx < bin_num
-    bin_answered[bin_idx] += p
-    if (i + 1) % 1000 == 0:
-      print('example {}'.format(i + 1))
-      sys.stdout.flush()
-
-  return bin_answered
-
-
-def main(argv):
-  del argv  # Unused.
-  fin_name = os.path.expanduser(FLAGS.counts_file)
-  print('Reading raw votes from ' + fin_name)
-  sys.stdout.flush()
-
-  votes = np.load(fin_name)
-  votes = votes[:4000,]  # truncate to 4000 samples
-
-  if FLAGS.plot == 'small':
-    bin_num = 5
-    m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500)
-  elif FLAGS.plot == 'large':
-    bin_num = 10
-    m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500)
-    a_check = compute_expected_answered_per_bin(bin_num, votes, 5000, 1500)
-    eps = compute_privacy_cost_per_bins(bin_num, votes, 100, 50)
-  else:
-    raise ValueError('--plot flag must be one of ["small", "large"]')
-
-  counts = compute_count_per_bin(bin_num, votes)
-  bins = np.linspace(0, 100, num=bin_num, endpoint=False)
-
-  plt.close('all')
-  fig, ax = plt.subplots()
-  if FLAGS.plot == 'small':
-    fig.set_figheight(5)
-    fig.set_figwidth(5)
-    ax.bar(
-        bins,
-        counts,
-        20,
-        color='orangered',
-        linestyle='dotted',
-        linewidth=5,
-        edgecolor='red',
-        fill=False,
-        alpha=.5,
-        align='edge',
-        label='LNMax answers')
-    ax.bar(
-        bins,
-        m_check,
-        20,
-        color='g',
-        alpha=.5,
-        linewidth=0,
-        edgecolor='g',
-        align='edge',
-        label='Confident-GNMax\nanswers')
-  elif FLAGS.plot == 'large':
-    fig.set_figheight(4.7)
-    fig.set_figwidth(7)
-    ax.bar(
-        bins,
-        counts,
-        10,
-        linestyle='dashed',
-        linewidth=5,
-        edgecolor='red',
-        fill=False,
-        alpha=.5,
-        align='edge',
-        label='LNMax answers')
-    ax.bar(
-        bins,
-        m_check,
-        10,
-        color='g',
-        alpha=.5,
-        linewidth=0,
-        edgecolor='g',
-        align='edge',
-        label='Confident-GNMax\nanswers (moderate)')
-    ax.bar(
-        bins,
-        a_check,
-        10,
-        color='b',
-        alpha=.5,
-        align='edge',
-        label='Confident-GNMax\nanswers (aggressive)')
-    ax2 = ax.twinx()
-    bin_centers = [x + 5 for x in bins]
-    ax2.plot(bin_centers, eps, 'ko', alpha=.8)
-    ax2.set_ylim([1e-200, 1.])
-    ax2.set_yscale('log')
-    ax2.grid(False)
-    ax2.set_yticks([1e-3, 1e-50, 1e-100, 1e-150, 1e-200])
-    plt.tick_params(which='minor', right='off')
-    ax2.set_ylabel(r'Per query privacy cost $\varepsilon$', fontsize=16)
-
-  plt.xlim([0, 100])
-  ax.set_ylim([0, 2500])
-  # ax.set_yscale('log')
-  ax.set_xlabel('Percentage of teachers that agree', fontsize=16)
-  ax.set_ylabel('Number of queries answered', fontsize=16)
-  vals = ax.get_xticks()
-  ax.set_xticklabels([str(int(x)) + '%' for x in vals])
-  ax.tick_params(labelsize=14, bottom=True, top=True, left=True, right=True)
-  ax.legend(loc=2, prop={'size': 16})
-
-  # simple: 'figures/noisy_thresholding_check_perf.pdf')
-  # detailed: 'figures/noisy_thresholding_check_perf_details.pdf'
-
-  print('Saving the graph to ' + FLAGS.plot_file)
-  plt.savefig(os.path.expanduser(FLAGS.plot_file), bbox_inches='tight')
-  plt.show()
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/rdp_cumulative.py
+++ b/research/differential_privacy/pate/ICLR2018/rdp_cumulative.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Plots three graphs illustrating cost of privacy per answered query.
-
-A script in support of the paper "Scalable Private Learning with PATE" by
-Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
-Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
-
-The input is a file containing a numpy array of votes, one query per row, one
-class per column. Ex:
-  43, 1821, ..., 3
-  31, 16, ..., 0
-  ...
-  0, 86, ..., 438
-The output is written to a specified directory and consists of three pdf files.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import pickle
-import sys
-
-sys.path.append('..')  # Main modules reside in the parent directory.
-
-from absl import app
-from absl import flags
-import matplotlib
-
-matplotlib.use('TkAgg')
-import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
-import numpy as np
-import core as pate
-
-plt.style.use('ggplot')
-
-FLAGS = flags.FLAGS
-flags.DEFINE_boolean('cache', False,
-                     'Read results of privacy analysis from cache.')
-flags.DEFINE_string('counts_file', None, 'Counts file.')
-flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
-
-flags.mark_flag_as_required('counts_file')
-
-def run_analysis(votes, mechanism, noise_scale, params):
-  """Computes data-dependent privacy.
-
-  Args:
-    votes: A matrix of votes, where each row contains votes in one instance.
-    mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf')
-    noise_scale: A mechanism privacy parameter.
-    params: Other privacy parameters.
-
-  Returns:
-    Four lists: cumulative privacy cost epsilon, how privacy budget is split,
-    how many queries were answered, optimal order.
-  """
-
-  def compute_partition(order_opt, eps):
-    order_opt_idx = np.searchsorted(orders, order_opt)
-    if mechanism == 'gnmax_conf':
-      p = (rdp_select_cum[order_opt_idx],
-           rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx],
-           -math.log(delta) / (order_opt - 1))
-    else:
-      p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1))
-    return [x / eps for x in p]  # Ensures that sum(x) == 1
-
-  # Short list of orders.
-  # orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1),
-  #                   np.logspace(np.log10(50), np.log10(1000), num=20))))
-
-  # Long list of orders.
-  orders = np.concatenate((np.arange(2, 100 + 1, .5),
-                           np.logspace(np.log10(100), np.log10(500), num=100)))
-  delta = 1e-8
-
-  n = votes.shape[0]
-  eps_total = np.zeros(n)
-  partition = [None] * n
-  order_opt = np.full(n, np.nan, dtype=float)
-  answered = np.zeros(n, dtype=float)
-
-  rdp_cum = np.zeros(len(orders))
-  rdp_sqrd_cum = np.zeros(len(orders))
-  rdp_select_cum = np.zeros(len(orders))
-  answered_sum = 0
-
-  for i in range(n):
-    v = votes[i,]
-    if mechanism == 'lnmax':
-      logq_lnmax = pate.compute_logq_laplace(v, noise_scale)
-      rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders)
-      rdp_sqrd = rdp_query ** 2
-      pr_answered = 1
-    elif mechanism == 'gnmax':
-      logq_gmax = pate.compute_logq_gaussian(v, noise_scale)
-      rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders)
-      rdp_sqrd = rdp_query ** 2
-      pr_answered = 1
-    elif mechanism == 'gnmax_conf':
-      logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v)
-      logq_step2 = pate.compute_logq_gaussian(v, noise_scale)
-      q_step1 = np.exp(logq_step1)
-      logq_step1_min = min(logq_step1, math.log1p(-q_step1))
-      rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min,
-                                          2 ** .5 * params['sigma1'], orders)
-      rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders)
-      rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2
-      # The expression below evaluates
-      #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
-      rdp_sqrd = (
-          rdp_gnmax_step1 ** 2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2
-          + q_step1 * rdp_gnmax_step2 ** 2)
-      rdp_select_cum += rdp_gnmax_step1
-      pr_answered = q_step1
-    else:
-      raise ValueError(
-          'Mechanism must be one of ["lnmax", "gnmax", "gnmax_conf"]')
-
-    rdp_cum += rdp_query
-    rdp_sqrd_cum += rdp_sqrd
-    answered_sum += pr_answered
-
-    answered[i] = answered_sum
-    eps_total[i], order_opt[i] = pate.compute_eps_from_delta(
-        orders, rdp_cum, delta)
-    partition[i] = compute_partition(order_opt[i], eps_total[i])
-
-    if i > 0 and (i + 1) % 1000 == 0:
-      rdp_var = rdp_sqrd_cum / i - (
-          rdp_cum / i) ** 2  # Ignore Bessel's correction.
-      order_opt_idx = np.searchsorted(orders, order_opt[i])
-      eps_std = ((i + 1) * rdp_var[order_opt_idx]) ** .5  # Std of the sum.
-      print(
-          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
-          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
-              i + 1, answered_sum, eps_total[i], eps_std, order_opt[i],
-              -math.log(delta) / (order_opt[i] - 1)))
-      sys.stdout.flush()
-
-  return eps_total, partition, answered, order_opt
-
-
-def print_plot_small(figures_dir, eps_lap, eps_gnmax, answered_gnmax):
-  """Plots a graph of LNMax vs GNMax.
-
-  Args:
-    figures_dir: A name of the directory where to save the plot.
-    eps_lap: The cumulative privacy costs of the Laplace mechanism.
-    eps_gnmax: The cumulative privacy costs of the Gaussian mechanism
-    answered_gnmax: The cumulative count of queries answered.
-  """
-  xlim = 6000
-  x_axis = range(0, int(xlim), 10)
-  y_lap = np.zeros(len(x_axis), dtype=float)
-  y_gnmax = np.full(len(x_axis), np.nan, dtype=float)
-
-  for i in range(len(x_axis)):
-    x = x_axis[i]
-    y_lap[i] = eps_lap[x]
-    idx = np.searchsorted(answered_gnmax, x)
-    if idx < len(eps_gnmax):
-      y_gnmax[i] = eps_gnmax[idx]
-
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.5)
-  fig.set_figwidth(4.7)
-  ax.plot(
-      x_axis, y_lap, color='r', ls='--', label='LNMax', alpha=.5, linewidth=5)
-  ax.plot(
-      x_axis,
-      y_gnmax,
-      color='g',
-      ls='-',
-      label='Confident-GNMax',
-      alpha=.5,
-      linewidth=5)
-  plt.xticks(np.arange(0, 7000, 1000))
-  plt.xlim([0, 6000])
-  plt.ylim([0, 6.])
-  plt.xlabel('Number of queries answered', fontsize=16)
-  plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
-  plt.legend(loc=2, fontsize=13)  # loc=2 -- upper left
-  ax.tick_params(labelsize=14)
-  fout_name = os.path.join(figures_dir, 'lnmax_vs_gnmax.pdf')
-  print('Saving the graph to ' + fout_name)
-  fig.savefig(fout_name, bbox_inches='tight')
-  plt.show()
-
-
-def print_plot_large(figures_dir, eps_lap, eps_gnmax1, answered_gnmax1,
-    eps_gnmax2, partition_gnmax2, answered_gnmax2):
-  """Plots a graph of LNMax vs GNMax with two parameters.
-
-  Args:
-    figures_dir: A name of the  directory where to save the plot.
-    eps_lap: The cumulative privacy costs of the Laplace mechanism.
-    eps_gnmax1: The cumulative privacy costs of the Gaussian mechanism (set 1).
-    answered_gnmax1: The cumulative count of queries answered (set 1).
-    eps_gnmax2: The cumulative privacy costs of the Gaussian mechanism (set 2).
-    partition_gnmax2: Allocation of eps for set 2.
-    answered_gnmax2: The cumulative count of queries answered (set 2).
-  """
-  xlim = 6000
-  x_axis = range(0, int(xlim), 10)
-  lenx = len(x_axis)
-  y_lap = np.zeros(lenx)
-  y_gnmax1 = np.full(lenx, np.nan, dtype=float)
-  y_gnmax2 = np.full(lenx, np.nan, dtype=float)
-  y1_gnmax2 = np.full(lenx, np.nan, dtype=float)
-
-  for i in range(lenx):
-    x = x_axis[i]
-    y_lap[i] = eps_lap[x]
-    idx1 = np.searchsorted(answered_gnmax1, x)
-    if idx1 < len(eps_gnmax1):
-      y_gnmax1[i] = eps_gnmax1[idx1]
-    idx2 = np.searchsorted(answered_gnmax2, x)
-    if idx2 < len(eps_gnmax2):
-      y_gnmax2[i] = eps_gnmax2[idx2]
-      fraction_step1, fraction_step2, _ = partition_gnmax2[idx2]
-      y1_gnmax2[i] = eps_gnmax2[idx2] * fraction_step1 / (
-          fraction_step1 + fraction_step2)
-
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.5)
-  fig.set_figwidth(4.7)
-  ax.plot(
-      x_axis,
-      y_lap,
-      color='r',
-      ls='dashed',
-      label='LNMax',
-      alpha=.5,
-      linewidth=5)
-  ax.plot(
-      x_axis,
-      y_gnmax1,
-      color='g',
-      ls='-',
-      label='Confident-GNMax (moderate)',
-      alpha=.5,
-      linewidth=5)
-  ax.plot(
-      x_axis,
-      y_gnmax2,
-      color='b',
-      ls='-',
-      label='Confident-GNMax (aggressive)',
-      alpha=.5,
-      linewidth=5)
-  ax.fill_between(
-      x_axis, [0] * lenx,
-      y1_gnmax2.tolist(),
-      facecolor='b',
-      alpha=.3,
-      hatch='\\')
-  ax.plot(
-      x_axis,
-      y1_gnmax2,
-      color='b',
-      ls='-',
-      label='_nolegend_',
-      alpha=.5,
-      linewidth=1)
-  ax.fill_between(
-      x_axis, y1_gnmax2.tolist(), y_gnmax2.tolist(), facecolor='b', alpha=.3)
-  plt.xticks(np.arange(0, 7000, 1000))
-  plt.xlim([0, xlim])
-  plt.ylim([0, 1.])
-  plt.xlabel('Number of queries answered', fontsize=16)
-  plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
-  plt.legend(loc=2, fontsize=13)  # loc=2 -- upper left
-  ax.tick_params(labelsize=14)
-  fout_name = os.path.join(figures_dir, 'lnmax_vs_2xgnmax_large.pdf')
-  print('Saving the graph to ' + fout_name)
-  fig.savefig(fout_name, bbox_inches='tight')
-  plt.show()
-
-
-def run_all_analyses(votes, lambda_laplace, gnmax_parameters, sigma2):
-  """Sequentially runs all analyses.
-
-  Args:
-    votes: A matrix of votes, where each row contains votes in one instance.
-    lambda_laplace: The scale of the Laplace noise (lambda).
-    gnmax_parameters: A list of parameters for GNMax.
-    sigma2: Shared parameter for the GNMax mechanisms.
-
-  Returns:
-    Five lists whose length is the number of queries.
-  """
-  print('=== Laplace Mechanism ===')
-  eps_lap, _, _, _ = run_analysis(votes, 'lnmax', lambda_laplace, None)
-  print()
-
-  # Does not go anywhere, for now
-  # print('=== Gaussian Mechanism (simple) ===')
-  # eps, _, _, _ = run_analysis(votes[:n,], 'gnmax', sigma1, None)
-
-  eps_gnmax = [[] for p in gnmax_parameters]
-  partition_gmax = [[] for p in gnmax_parameters]
-  answered = [[] for p in gnmax_parameters]
-  order_opt = [[] for p in gnmax_parameters]
-  for i, p in enumerate(gnmax_parameters):
-    print('=== Gaussian Mechanism (confident) {}: ==='.format(p))
-    eps_gnmax[i], partition_gmax[i], answered[i], order_opt[i] = run_analysis(
-        votes, 'gnmax_conf', sigma2, p)
-    print()
-
-  return eps_lap, eps_gnmax, partition_gmax, answered, order_opt
-
-
-def main(argv):
-  del argv  # Unused.
-  lambda_laplace = 50.  # corresponds to eps = 1. / lambda_laplace
-
-  # Paramaters of the GNMax
-  gnmax_parameters = ({
-                        't': 1000,
-                        'sigma1': 500
-                      }, {
-                        't': 3500,
-                        'sigma1': 1500
-                      }, {
-                        't': 5000,
-                        'sigma1': 1500
-                      })
-  sigma2 = 100  # GNMax parameters differ only in Step 1 (selection).
-  ftemp_name = '/tmp/precomputed.pkl'
-
-  figures_dir = os.path.expanduser(FLAGS.figures_dir)
-
-  if FLAGS.cache and os.path.isfile(ftemp_name):
-    print('Reading from cache ' + ftemp_name)
-    with open(ftemp_name, 'rb') as f:
-      (eps_lap, eps_gnmax, partition_gmax, answered_gnmax,
-       orders_opt_gnmax) = pickle.load(f)
-  else:
-    fin_name = os.path.expanduser(FLAGS.counts_file)
-    print('Reading raw votes from ' + fin_name)
-    sys.stdout.flush()
-
-    votes = np.load(fin_name)
-
-    (eps_lap, eps_gnmax, partition_gmax,
-     answered_gnmax, orders_opt_gnmax) = run_all_analyses(
-        votes, lambda_laplace, gnmax_parameters, sigma2)
-
-    print('Writing to cache ' + ftemp_name)
-    with open(ftemp_name, 'wb') as f:
-      pickle.dump((eps_lap, eps_gnmax, partition_gmax, answered_gnmax,
-                   orders_opt_gnmax), f)
-
-  print_plot_small(figures_dir, eps_lap, eps_gnmax[0], answered_gnmax[0])
-  print_plot_large(figures_dir, eps_lap, eps_gnmax[1], answered_gnmax[1],
-                   eps_gnmax[2], partition_gmax[2], answered_gnmax[2])
-  plt.close('all')
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/smooth_sensitivity_table.py
+++ b/research/differential_privacy/pate/ICLR2018/smooth_sensitivity_table.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Performs privacy analysis of GNMax with smooth sensitivity.
-
-A script in support of the paper "Scalable Private Learning with PATE" by
-Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar,
-Ulfar Erlingsson (https://arxiv.org/abs/1802.08908).
-
-Several flavors of the GNMax algorithm can be analyzed.
-  - Plain GNMax (argmax w/ Gaussian noise) is assumed when arguments threshold
-    and sigma2 are missing.
-  - Confident GNMax (thresholding + argmax w/ Gaussian noise) is used when
-    threshold, sigma1, and sigma2 are given.
-  - Interactive GNMax (two- or multi-round) is triggered by specifying
-    baseline_file, which provides baseline values for votes selection in Step 1.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-import sys
-
-sys.path.append('..')  # Main modules reside in the parent directory.
-
-from absl import app
-from absl import flags
-import numpy as np
-import core as pate
-import smooth_sensitivity as pate_ss
-
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string('counts_file', None, 'Counts file.')
-flags.DEFINE_string('baseline_file', None, 'File with baseline scores.')
-flags.DEFINE_boolean('data_independent', False,
-                     'Force data-independent bounds.')
-flags.DEFINE_float('threshold', None, 'Threshold for step 1 (selection).')
-flags.DEFINE_float('sigma1', None, 'Sigma for step 1 (selection).')
-flags.DEFINE_float('sigma2', None, 'Sigma for step 2 (argmax).')
-flags.DEFINE_integer('queries', None, 'Number of queries made by the student.')
-flags.DEFINE_float('delta', 1e-8, 'Target delta.')
-flags.DEFINE_float(
-    'order', None,
-    'Fixes a Renyi DP order (if unspecified, finds an optimal order from a '
-    'hardcoded list).')
-flags.DEFINE_integer(
-    'teachers', None,
-    'Number of teachers (if unspecified, derived from the counts file).')
-
-flags.mark_flag_as_required('counts_file')
-flags.mark_flag_as_required('sigma2')
-
-
-def _check_conditions(sigma, num_classes, orders):
-  """Symbolic-numeric verification of conditions C5 and C6.
-
-  The conditions on the beta function are verified by constructing the beta
-  function symbolically, and then checking that its derivative (computed
-  symbolically) is non-negative within the interval of conjectured monotonicity.
-  The last check is performed numerically.
-  """
-
-  print('Checking conditions C5 and C6 for all orders.')
-  sys.stdout.flush()
-  conditions_hold = True
-
-  for order in orders:
-    cond5, cond6 = pate_ss.check_conditions(sigma, num_classes, order)
-    conditions_hold &= cond5 and cond6
-    if not cond5:
-      print('Condition C5 does not hold for order =', order)
-    elif not cond6:
-      print('Condition C6 does not hold for order =', order)
-
-  if conditions_hold:
-    print('Conditions C5-C6 hold for all orders.')
-  sys.stdout.flush()
-  return conditions_hold
-
-
-def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders,
-                 data_ind):
-  """Computes the (data-dependent) RDP curve for Confident GNMax."""
-  rdp_cum = np.zeros(len(orders))
-  rdp_sqrd_cum = np.zeros(len(orders))
-  answered = 0
-
-  for i, v in enumerate(votes):
-    if threshold is None:
-      logq_step1 = 0  # No thresholding, always proceed to step 2.
-      rdp_step1 = np.zeros(len(orders))
-    else:
-      logq_step1 = pate.compute_logpr_answered(threshold, sigma1,
-                                               v - baseline[i,])
-      if data_ind:
-        rdp_step1 = pate.compute_rdp_data_independent_threshold(sigma1, orders)
-      else:
-        rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1, orders)
-
-    if data_ind:
-      rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders)
-    else:
-      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
-      rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders)
-
-    q_step1 = np.exp(logq_step1)
-    rdp = rdp_step1 + rdp_step2 * q_step1
-    # The expression below evaluates
-    #     E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2]
-    rdp_sqrd = (
-        rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 +
-        q_step1 * rdp_step2**2)
-    rdp_sqrd_cum += rdp_sqrd
-
-    rdp_cum += rdp
-    answered += q_step1
-    if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1):
-      rdp_var = rdp_sqrd_cum / i - (
-          rdp_cum / i)**2  # Ignore Bessel's correction.
-      eps_total, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
-      order_opt_idx = np.searchsorted(orders, order_opt)
-      eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5  # Std of the sum.
-      print(
-          'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) '
-          'at order = {:.2f} (contribution from delta = {:.3f})'.format(
-              i + 1, answered, eps_total, eps_std, order_opt,
-              -math.log(delta) / (order_opt - 1)))
-      sys.stdout.flush()
-
-    _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta)
-
-  return order_opt
-
-
-def _find_optimal_smooth_sensitivity_parameters(
-    votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1,
-    ind_step2, order):
-  """Optimizes smooth sensitivity parameters by minimizing a cost function.
-
-  The cost function is
-        exact_eps + cost of GNSS + two stds of noise,
-  which captures that upper bound of the confidence interval of the sanitized
-  privacy budget.
-
-  Since optimization is done with full view of sensitive data, the results
-  cannot be released.
-  """
-  rdp_cum = 0
-  answered_cum = 0
-  ls_cum = 0
-
-  # Define a plausible range for the beta values.
-  betas = np.arange(.3 / order, .495 / order, .01 / order)
-  cost_delta = math.log(1 / delta) / (order - 1)
-
-  for i, v in enumerate(votes):
-    if threshold is None:
-      log_pr_answered = 0
-      rdp1 = 0
-      ls_step1 = np.zeros(num_teachers)
-    else:
-      log_pr_answered = pate.compute_logpr_answered(threshold, sigma1,
-                                                    v - baseline[i,])
-      if ind_step1:  # apply data-independent bound for step 1 (thresholding).
-        rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order)
-        ls_step1 = np.zeros(num_teachers)
-      else:
-        rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order)
-        ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(
-            v - baseline[i,], num_teachers, threshold, sigma1, order)
-
-    pr_answered = math.exp(log_pr_answered)
-    answered_cum += pr_answered
-
-    if ind_step2:  # apply data-independent bound for step 2 (GNMax).
-      rdp2 = pate.rdp_data_independent_gaussian(sigma2, order)
-      ls_step2 = np.zeros(num_teachers)
-    else:
-      logq_step2 = pate.compute_logq_gaussian(v, sigma2)
-      rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order)
-      # Compute smooth sensitivity.
-      ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
-          v, num_teachers, sigma2, order)
-
-    rdp_cum += rdp1 + pr_answered * rdp2
-    ls_cum += ls_step1 + pr_answered * ls_step2  # Expected local sensitivity.
-
-    if ind_step1 and ind_step2:
-      # Data-independent bounds.
-      cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf
-    else:
-      # Data-dependent bounds.
-      cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None
-
-      for beta in betas:
-        ss = pate_ss.compute_discounted_max(beta, ls_cum)
-
-        # Solution to the minimization problem:
-        #   min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma}
-        sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3)
-        cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
-            beta, sigma_ss, order)
-
-        # Cost captures exact_eps + cost of releasing SS + two stds of noise.
-        cost = rdp_cum + cost_ss + 2 * ss * sigma_ss
-        if cost < cost_opt:
-          cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss
-
-    if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1):
-      eps_before_ss = rdp_cum + cost_delta
-      eps_with_ss = (
-          eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian(
-              beta_opt, sigma_ss_opt, order))
-      print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to '
-            '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'.
-            format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss,
-                   ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt))
-      sys.stdout.flush()
-
-  # Return optimal parameters for the last iteration.
-  return beta_opt, ss_opt, sigma_ss_opt
-
-
-####################
-# HELPER FUNCTIONS #
-####################
-
-
-def _load_votes(counts_file, baseline_file, queries):
-  counts_file_expanded = os.path.expanduser(counts_file)
-  print('Reading raw votes from ' + counts_file_expanded)
-  sys.stdout.flush()
-
-  votes = np.load(counts_file_expanded)
-  print('Shape of the votes matrix = {}'.format(votes.shape))
-
-  if baseline_file is not None:
-    baseline_file_expanded = os.path.expanduser(baseline_file)
-    print('Reading baseline values from ' + baseline_file_expanded)
-    sys.stdout.flush()
-    baseline = np.load(baseline_file_expanded)
-    if votes.shape != baseline.shape:
-      raise ValueError(
-          'Counts file and baseline file must have the same shape. Got {} and '
-          '{} instead.'.format(votes.shape, baseline.shape))
-  else:
-    baseline = np.zeros_like(votes)
-
-  if queries is not None:
-    if votes.shape[0] < queries:
-      raise ValueError('Expect {} rows, got {} in {}'.format(
-          queries, votes.shape[0], counts_file))
-    # Truncate the votes matrix to the number of queries made.
-    votes = votes[:queries,]
-    baseline = baseline[:queries,]
-  else:
-    print('Process all {} input rows. (Use --queries flag to truncate.)'.format(
-        votes.shape[0]))
-
-  return votes, baseline
-
-
-def _count_teachers(votes):
-  s = np.sum(votes, axis=1)
-  num_teachers = int(max(s))
-  if min(s) != num_teachers:
-    raise ValueError(
-        'Matrix of votes is malformed: the number of votes is not the same '
-        'across rows.')
-  return num_teachers
-
-
-def _is_data_ind_step1(num_teachers, threshold, sigma1, orders):
-  if threshold is None:
-    return True
-  return np.all(
-      pate.is_data_independent_always_opt_threshold(num_teachers, threshold,
-                                                    sigma1, orders))
-
-
-def _is_data_ind_step2(num_teachers, num_classes, sigma, orders):
-  return np.all(
-      pate.is_data_independent_always_opt_gaussian(num_teachers, num_classes,
-                                                   sigma, orders))
-
-
-def main(argv):
-  del argv  # Unused.
-
-  if (FLAGS.threshold is None) != (FLAGS.sigma1 is None):
-    raise ValueError(
-        '--threshold flag and --sigma1 flag must be present or absent '
-        'simultaneously.')
-
-  if FLAGS.order is None:
-    # Long list of orders.
-    orders = np.concatenate((np.arange(2, 100 + 1, .5),
-                             np.logspace(np.log10(100), np.log10(500),
-                                         num=100)))
-    # Short list of orders.
-    # orders = np.round(
-    #     np.concatenate((np.arange(2, 50 + 1, 1),
-    #                     np.logspace(np.log10(50), np.log10(1000), num=20))))
-  else:
-    orders = np.array([FLAGS.order])
-
-  votes, baseline = _load_votes(FLAGS.counts_file, FLAGS.baseline_file,
-                                FLAGS.queries)
-
-  if FLAGS.teachers is None:
-    num_teachers = _count_teachers(votes)
-  else:
-    num_teachers = FLAGS.teachers
-
-  num_classes = votes.shape[1]
-
-  order = _compute_rdp(votes, baseline, FLAGS.threshold, FLAGS.sigma1,
-                       FLAGS.sigma2, FLAGS.delta, orders,
-                       FLAGS.data_independent)
-
-  ind_step1 = _is_data_ind_step1(num_teachers, FLAGS.threshold, FLAGS.sigma1,
-                                 order)
-
-  ind_step2 = _is_data_ind_step2(num_teachers, num_classes, FLAGS.sigma2, order)
-
-  if FLAGS.data_independent or (ind_step1 and ind_step2):
-    print('Nothing to do here, all analyses are data-independent.')
-    return
-
-  if not _check_conditions(FLAGS.sigma2, num_classes, [order]):
-    return  # Quit early: sufficient conditions for correctness fail to hold.
-
-  beta_opt, ss_opt, sigma_ss_opt = _find_optimal_smooth_sensitivity_parameters(
-      votes, baseline, num_teachers, FLAGS.threshold, FLAGS.sigma1,
-      FLAGS.sigma2, FLAGS.delta, ind_step1, ind_step2, order)
-
-  print('Optimal beta = {:.4f}, E[SS_beta] = {:.4}, sigma_ss = {:.2f}'.format(
-      beta_opt, ss_opt, sigma_ss_opt))
-
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/ICLR2018/utility_queries_answered.py
+++ b/research/differential_privacy/pate/ICLR2018/utility_queries_answered.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl import app
-from absl import flags
-import matplotlib
-import os
-
-matplotlib.use('TkAgg')
-import matplotlib.pyplot as plt
-
-plt.style.use('ggplot')
-
-FLAGS = flags.FLAGS
-flags.DEFINE_string('plot_file', '', 'Output file name.')
-
-qa_lnmax = [500, 750] + range(1000, 12500, 500)
-
-acc_lnmax = [43.3, 52.3, 59.8, 66.7, 68.8, 70.5, 71.6, 72.3, 72.6, 72.9, 73.4,
-             73.4, 73.7, 73.9, 74.2, 74.4, 74.5, 74.7, 74.8, 75, 75.1, 75.1,
-             75.4, 75.4, 75.4]
-
-qa_gnmax = [456, 683, 908, 1353, 1818, 2260, 2702, 3153, 3602, 4055, 4511, 4964,
-            5422, 5875, 6332, 6792, 7244, 7696, 8146, 8599, 9041, 9496, 9945,
-            10390, 10842]
-
-acc_gnmax = [39.6, 52.2, 59.6, 66.6, 69.6, 70.5, 71.8, 72, 72.7, 72.9, 73.3,
-             73.4, 73.4, 73.8, 74, 74.2, 74.4, 74.5, 74.5, 74.7, 74.8, 75, 75.1,
-             75.1, 75.4]
-
-qa_gnmax_aggressive = [167, 258, 322, 485, 647, 800, 967, 1133, 1282, 1430,
-                       1573, 1728, 1889, 2028, 2190, 2348, 2510, 2668, 2950,
-                       3098, 3265, 3413, 3581, 3730]
-
-acc_gnmax_aggressive = [17.8, 26.8, 39.3, 48, 55.7, 61, 62.8, 64.8, 65.4, 66.7,
-                        66.2, 68.3, 68.3, 68.7, 69.1, 70, 70.2, 70.5, 70.9,
-                        70.7, 71.3, 71.3, 71.3, 71.8]
-
-
-def main(argv):
-  del argv  # Unused.
-
-  plt.close('all')
-  fig, ax = plt.subplots()
-  fig.set_figheight(4.7)
-  fig.set_figwidth(5)
-  ax.plot(qa_lnmax, acc_lnmax, color='r', ls='--', linewidth=5., marker='o',
-          alpha=.5, label='LNMax')
-  ax.plot(qa_gnmax, acc_gnmax, color='g', ls='-', linewidth=5., marker='o',
-          alpha=.5, label='Confident-GNMax')
-  # ax.plot(qa_gnmax_aggressive, acc_gnmax_aggressive, color='b', ls='-', marker='o', alpha=.5, label='Confident-GNMax (aggressive)')
-  plt.xticks([0, 2000, 4000, 6000])
-  plt.xlim([0, 6000])
-  # ax.set_yscale('log')
-  plt.ylim([65, 76])
-  ax.tick_params(labelsize=14)
-  plt.xlabel('Number of queries answered', fontsize=16)
-  plt.ylabel('Student test accuracy (%)', fontsize=16)
-  plt.legend(loc=2, prop={'size': 16})
-
-  x = [400, 2116, 4600, 4680]
-  y = [69.5, 68.5, 74, 72.5]
-  annotations = [0.76, 2.89, 1.42, 5.76]
-  color_annotations = ['g', 'r', 'g', 'r']
-  for i, txt in enumerate(annotations):
-    ax.annotate(r'${\varepsilon=}$' + str(txt), (x[i], y[i]), fontsize=16,
-                color=color_annotations[i])
-
-  plot_filename = os.path.expanduser(FLAGS.plot_file)
-  plt.savefig(plot_filename, bbox_inches='tight')
-  plt.show()
-
-if __name__ == '__main__':
-  app.run(main)
--- a/research/differential_privacy/pate/README.md
+++ b/research/differential_privacy/pate/README.md
-Implementation of an RDP privacy accountant and smooth sensitivity analysis for
-the PATE framework. The underlying theory and supporting experiments appear in
-"Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya
-Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018,
-https://arxiv.org/abs/1802.08908).
-
-## Overview
-
-The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced 
-by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from
-Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The 
-framework enables model-agnostic training that provably provides [differential
-privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training 
-dataset. 
-
-The framework consists of _teachers_, the _student_ model, and the _aggregator_. The 
-teachers are models trained on disjoint subsets of the training datasets. The student
-model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by 
-interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies 
-outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
-refuses to answer.
-
-Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_,
-which means that they are solely the function of the aggregator's parameters. Alternatively, privacy 
-analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on
-the input distribution, the final privacy guarantees can be improved relative to the data-independent
-analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and 
-therefore publishing these guarantees requires its own sanitization procedure. In our case 
-sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis.
-
-The common machinery used for all privacy analyses in this repository is the 
-R&eacute;nyi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). 
-
-This repository contains implementations of privacy accountants and smooth 
-sensitivity analysis for several data-independent and data-dependent mechanism that together
-comprise the PATE framework.
-
-
-### Requirements
-
-* Python, version &ge; 2.7
-* absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`)
-* numpy
-* scipy
-* sympy (for smooth sensitivity analysis)
-* unittest (for testing)
-
-
-### Self-testing
-
-To verify the installation run
-```bash
-$ python core_test.py
-$ python smooth_sensitivity_test.py
-```
-
-
-## Files in this directory
-
-*   core.py &mdash; RDP privacy accountant for several vote aggregators (GNMax,
-    Threshold, Laplace).
-
-*   smooth_sensitivity.py &mdash; Smooth sensitivity analysis for GNMax and
-    Threshold mechanisms.
-
-*   core_test.py and smooth_sensitivity_test.py &mdash; Unit tests for the
-    files above.
-
-## Contact information
-
-You may direct your comments to mironov@google.com and PR to @ilyamironov.
--- a/research/differential_privacy/pate/core.py
+++ b/research/differential_privacy/pate/core.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Core functions for RDP analysis in PATE framework.
-
-This library comprises the core functions for doing differentially private
-analysis of the PATE architecture and its various Noisy Max and other
-mechanisms.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-from absl import app
-import numpy as np
-import scipy.stats
-
-
-def _logaddexp(x):
-  """Addition in the log space. Analogue of numpy.logaddexp for a list."""
-  m = max(x)
-  return m + math.log(sum(np.exp(x - m)))
-
-
-def _log1mexp(x):
-  """Numerically stable computation of log(1-exp(x))."""
-  if x < -1:
-    return math.log1p(-math.exp(x))
-  elif x < 0:
-    return math.log(-math.expm1(x))
-  elif x == 0:
-    return -np.inf
-  else:
-    raise ValueError("Argument must be non-positive.")
-
-
-def compute_eps_from_delta(orders, rdp, delta):
-  """Translates between RDP and (eps, delta)-DP.
-
-  Args:
-    orders: A list (or a scalar) of orders.
-    rdp: A list of RDP guarantees (of the same length as orders).
-    delta: Target delta.
-
-  Returns:
-    Pair of (eps, optimal_order).
-
-  Raises:
-    ValueError: If input is malformed.
-  """
-  if len(orders) != len(rdp):
-    raise ValueError("Input lists must have the same length.")
-  eps = np.array(rdp) - math.log(delta) / (np.array(orders) - 1)
-  idx_opt = np.argmin(eps)
-  return eps[idx_opt], orders[idx_opt]
-
-
-#####################
-# RDP FOR THE GNMAX #
-#####################
-
-
-def compute_logq_gaussian(counts, sigma):
-  """Returns an upper bound on ln Pr[outcome != argmax] for GNMax.
-
-  Implementation of Proposition 7.
-
-  Args:
-    counts: A numpy array of scores.
-    sigma: The standard deviation of the Gaussian noise in the GNMax mechanism.
-
-  Returns:
-    logq: Natural log of the probability that outcome is different from argmax.
-  """
-  n = len(counts)
-  variance = sigma**2
-  idx_max = np.argmax(counts)
-  counts_normalized = counts[idx_max] - counts
-  counts_rest = counts_normalized[np.arange(n) != idx_max]  # exclude one index
-  # Upper bound q via a union bound rather than a more precise calculation.
-  logq = _logaddexp(
-      scipy.stats.norm.logsf(counts_rest, scale=math.sqrt(2 * variance)))
-
-  # A sketch of a more accurate estimate, which is currently disabled for two
-  # reasons:
-  # 1. Numerical instability;
-  # 2. Not covered by smooth sensitivity analysis.
-  # covariance = variance * (np.ones((n - 1, n - 1)) + np.identity(n - 1))
-  # logq = np.log1p(-statsmodels.sandbox.distributions.extras.mvnormcdf(
-  #     counts_rest, np.zeros(n - 1), covariance, maxpts=1e4))
-
-  return min(logq, math.log(1 - (1 / n)))
-
-
-def rdp_data_independent_gaussian(sigma, orders):
-  """Computes a data-independent RDP curve for GNMax.
-
-  Implementation of Proposition 8.
-
-  Args:
-    sigma: Standard deviation of Gaussian noise.
-    orders: An array_like list of Renyi orders.
-
-  Returns:
-    Upper bound on RPD for all orders. A scalar if orders is a scalar.
-
-  Raises:
-    ValueError: If the input is malformed.
-  """
-  if sigma < 0 or np.any(orders <= 1):  # not defined for alpha=1
-    raise ValueError("Inputs are malformed.")
-
-  variance = sigma**2
-  if np.isscalar(orders):
-    return orders / variance
-  else:
-    return np.atleast_1d(orders) / variance
-
-
-def rdp_gaussian(logq, sigma, orders):
-  """Bounds RDP from above of GNMax given an upper bound on q (Theorem 6).
-
-  Args:
-    logq: Natural logarithm of the probability of a non-argmax outcome.
-    sigma: Standard deviation of Gaussian noise.
-    orders: An array_like list of Renyi orders.
-
-  Returns:
-    Upper bound on RPD for all orders. A scalar if orders is a scalar.
-
-  Raises:
-    ValueError: If the input is malformed.
-  """
-  if logq > 0 or sigma < 0 or np.any(orders <= 1):  # not defined for alpha=1
-    raise ValueError("Inputs are malformed.")
-
-  if np.isneginf(logq):  # If the mechanism's output is fixed, it has 0-DP.
-    if np.isscalar(orders):
-      return 0.
-    else:
-      return np.full_like(orders, 0., dtype=np.float)
-
-  variance = sigma**2
-
-  # Use two different higher orders: mu_hi1 and mu_hi2 computed according to
-  # Proposition 10.
-  mu_hi2 = math.sqrt(variance * -logq)
-  mu_hi1 = mu_hi2 + 1
-
-  orders_vec = np.atleast_1d(orders)
-
-  ret = orders_vec / variance  # baseline: data-independent bound
-
-  # Filter out entries where data-dependent bound does not apply.
-  mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1)
-
-  rdp_hi1 = mu_hi1 / variance
-  rdp_hi2 = mu_hi2 / variance
-
-  log_a2 = (mu_hi2 - 1) * rdp_hi2
-
-  # Make sure q is in the increasing wrt q range and A is positive.
-  if (np.any(mask) and logq <= log_a2 - mu_hi2 *
-      (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1))) and
-      -logq > rdp_hi2):
-    # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0.
-    log1q = _log1mexp(logq)  # log1q = log(1-q)
-    log_a = (orders - 1) * (
-        log1q - _log1mexp((logq + rdp_hi2) * (1 - 1 / mu_hi2)))
-    log_b = (orders - 1) * (rdp_hi1 - logq / (mu_hi1 - 1))
-
-    # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y.
-    log_s = np.logaddexp(log1q + log_a, logq + log_b)
-    ret[mask] = np.minimum(ret, log_s / (orders - 1))[mask]
-
-  assert np.all(ret >= 0)
-
-  if np.isscalar(orders):
-    return np.asscalar(ret)
-  else:
-    return ret
-
-
-def is_data_independent_always_opt_gaussian(num_teachers, num_classes, sigma,
-                                            orders):
-  """Tests whether data-ind bound is always optimal for GNMax.
-
-  Args:
-    num_teachers: Number of teachers.
-    num_classes: Number of classes.
-    sigma: Standard deviation of the Gaussian noise.
-    orders: An array_like list of Renyi orders.
-
-  Returns:
-    Boolean array of length |orders| (a scalar if orders is a scalar). True if
-    the data-independent bound is always the same as the data-dependent bound.
-
-  """
-  unanimous = np.array([num_teachers] + [0] * (num_classes - 1))
-  logq = compute_logq_gaussian(unanimous, sigma)
-
-  rdp_dep = rdp_gaussian(logq, sigma, orders)
-  rdp_ind = rdp_data_independent_gaussian(sigma, orders)
-  return np.isclose(rdp_dep, rdp_ind)
-
-
-###################################
-# RDP FOR THE THRESHOLD MECHANISM #
-###################################
-
-
-def compute_logpr_answered(t, sigma, counts):
-  """Computes log of the probability that a noisy threshold is crossed.
-
-  Args:
-    t: The threshold.
-    sigma: The stdev of the Gaussian noise added to the threshold.
-    counts: An array of votes.
-
-  Returns:
-    Natural log of the probability that max is larger than a noisy threshold.
-  """
-  # Compared to the paper, max(counts) is rounded to the nearest integer. This
-  # is done to facilitate computation of smooth sensitivity for the case of
-  # the interactive mechanism, where votes are not necessarily integer.
-  return scipy.stats.norm.logsf(t - round(max(counts)), scale=sigma)
-
-
-def compute_rdp_data_independent_threshold(sigma, orders):
-  # The input to the threshold mechanism has stability 1, compared to
-  # GNMax, which has stability = 2. Hence the sqrt(2) factor below.
-  return rdp_data_independent_gaussian(2**.5 * sigma, orders)
-
-
-def compute_rdp_threshold(log_pr_answered, sigma, orders):
-  logq = min(log_pr_answered, _log1mexp(log_pr_answered))
-  # The input to the threshold mechanism has stability 1, compared to
-  # GNMax, which has stability = 2. Hence the sqrt(2) factor below.
-  return rdp_gaussian(logq, 2**.5 * sigma, orders)
-
-
-def is_data_independent_always_opt_threshold(num_teachers, threshold, sigma,
-                                             orders):
-  """Tests whether data-ind bound is always optimal for the threshold mechanism.
-
-  Args:
-    num_teachers: Number of teachers.
-    threshold: The cut-off threshold.
-    sigma: Standard deviation of the Gaussian noise.
-    orders: An array_like list of Renyi orders.
-
-  Returns:
-    Boolean array of length |orders| (a scalar if orders is a scalar). True if
-    the data-independent bound is always the same as the data-dependent bound.
-  """
-
-  # Since the data-dependent bound depends only on max(votes), it suffices to
-  # check whether the data-dependent bounds are better than data-independent
-  # bounds in the extreme cases when max(votes) is minimal or maximal.
-  # For both Confident GNMax and Interactive GNMax it holds that
-  #   0 <= max(votes) <= num_teachers.
-  # The upper bound is trivial in both cases.
-  # The lower bound is trivial for Confident GNMax (and a stronger one, based on
-  # the pigeonhole principle, is possible).
-  # For Interactive GNMax (Algorithm 2), the lower bound follows from the
-  # following argument. Since the votes vector is the difference between the
-  # actual teachers' votes and the student's baseline, we need to argue that
-  # max(n_j - M * p_j) >= 0.
-  # The bound holds because sum_j n_j = sum M * p_j = M. Thus,
-  # sum_j (n_j - M * p_j) = 0, and max_j (n_j - M * p_j) >= 0 as needed.
-  logq1 = compute_logpr_answered(threshold, sigma, [0])
-  logq2 = compute_logpr_answered(threshold, sigma, [num_teachers])
-
-  rdp_dep1 = compute_rdp_threshold(logq1, sigma, orders)
-  rdp_dep2 = compute_rdp_threshold(logq2, sigma, orders)
-
-  rdp_ind = compute_rdp_data_independent_threshold(sigma, orders)
-  return np.isclose(rdp_dep1, rdp_ind) and np.isclose(rdp_dep2, rdp_ind)
-
-
-#############################
-# RDP FOR THE LAPLACE NOISE #
-#############################
-
-
-def compute_logq_laplace(counts, lmbd):
-  """Computes an upper bound on log Pr[outcome != argmax] for LNMax.
-
-  Args:
-    counts: A list of scores.
-    lmbd: The lambda parameter of the Laplace distribution ~exp(-|x| / lambda).
-
-  Returns:
-    logq: Natural log of the probability that outcome is different from argmax.
-  """
-  # For noisy max, we only get an upper bound via the union bound. See Lemma 4
-  # in https://arxiv.org/abs/1610.05755.
-  #
-  # Pr[ j beats i*] = (2+gap(j,i*))/ 4 exp(gap(j,i*)
-  # proof at http://mathoverflow.net/questions/66763/
-
-  idx_max = np.argmax(counts)
-  counts_normalized = (counts - counts[idx_max]) / lmbd
-  counts_rest = np.array(
-      [counts_normalized[i] for i in range(len(counts)) if i != idx_max])
-
-  logq = _logaddexp(np.log(2 - counts_rest) + math.log(.25) + counts_rest)
-
-  return min(logq, math.log(1 - (1 / len(counts))))
-
-
-def rdp_pure_eps(logq, pure_eps, orders):
-  """Computes the RDP value given logq and pure privacy eps.
-
-  Implementation of https://arxiv.org/abs/1610.05755, Theorem 3.
-
-  The bound used is the min of three terms. The first term is from
-  https://arxiv.org/pdf/1605.02065.pdf.
-  The second term is based on the fact that when event has probability (1-q) for
-  q close to zero, q can only change by exp(eps), which corresponds to a
-  much smaller multiplicative change in (1-q)
-  The third term comes directly from the privacy guarantee.
-
-  Args:
-    logq: Natural logarithm of the probability of a non-optimal outcome.
-    pure_eps: eps parameter for DP
-    orders: array_like list of moments to compute.
-
-  Returns:
-    Array of upper bounds on rdp (a scalar if orders is a scalar).
-  """
-  orders_vec = np.atleast_1d(orders)
-  q = math.exp(logq)
-  log_t = np.full_like(orders_vec, np.inf)
-  if q <= 1 / (math.exp(pure_eps) + 1):
-    logt_one = math.log1p(-q) + (
-        math.log1p(-q) - _log1mexp(pure_eps + logq)) * (
-            orders_vec - 1)
-    logt_two = logq + pure_eps * (orders_vec - 1)
-    log_t = np.logaddexp(logt_one, logt_two)
-
-  ret = np.minimum(
-      np.minimum(0.5 * pure_eps * pure_eps * orders_vec,
-                 log_t / (orders_vec - 1)), pure_eps)
-  if np.isscalar(orders):
-    return np.asscalar(ret)
-  else:
-    return ret
-
-
-def main(argv):
-  del argv  # Unused.
-
-
-if __name__ == "__main__":
-  app.run(main)
--- a/research/differential_privacy/pate/core_test.py
+++ b/research/differential_privacy/pate/core_test.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for pate.core."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import unittest
-import numpy as np
-
-import core as pate
-
-
-class PateTest(unittest.TestCase):
-
-  def _test_rdp_gaussian_value_errors(self):
-    # Test for ValueErrors.
-    with self.assertRaises(ValueError):
-      pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4]))
-    with self.assertRaises(ValueError):
-      pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4]))
-    with self.assertRaises(ValueError):
-      pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4]))
-
-  def _test_rdp_gaussian_as_function_of_q(self):
-    # Test for data-independent and data-dependent ranges over q.
-    # The following corresponds to orders 1.1, 2.5, 32, 250
-    # sigmas 1.5, 15, 1500, 15000.
-    # Hand calculated -log(q0)s arranged in a 'sigma major' ordering.
-    neglogq0s = [
-        2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2,
-        8.6, 6.4
-    ]
-    idx_neglogq0s = 0  # To iterate through neglogq0s.
-    orders = [1.1, 2.5, 32, 250]
-    sigmas = [1.5, 15, 1500, 15000]
-    for sigma in sigmas:
-      for order in orders:
-        curr_neglogq0 = neglogq0s[idx_neglogq0s]
-        idx_neglogq0s += 1
-        if curr_neglogq0 is None:  # sigma == 1.5 and order == 250:
-          continue
-
-        rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order)
-
-        # Data-dependent range. (Successively halve the value of q.)
-        logq_dds = (-curr_neglogq0 - np.array(
-            [0, np.log(2), np.log(4), np.log(8)]))
-        # Check that in q_dds, rdp is decreasing.
-        for idx in range(len(logq_dds) - 1):
-          self.assertGreater(
-              pate.rdp_gaussian(logq_dds[idx], sigma, order),
-              pate.rdp_gaussian(logq_dds[idx + 1], sigma, order))
-
-        # Data-independent range.
-        q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4])
-        # Check that in q_dids, rdp is constant.
-        for q in q_dids:
-          self.assertEqual(rdp_at_q0, pate.rdp_gaussian(
-              np.log(q), sigma, order))
-
-  def _test_compute_eps_from_delta_value_error(self):
-    # Test for ValueError.
-    with self.assertRaises(ValueError):
-      pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001)
-
-  def _test_compute_eps_from_delta_monotonicity(self):
-    # Test for monotonicity with respect to delta.
-    orders = [1.1, 2.5, 250.0]
-    sigmas = [1e-3, 1.0, 1e5]
-    deltas = [1e-60, 1e-6, 0.1, 0.999]
-    for sigma in sigmas:
-      list_of_eps = []
-      rdps_for_gaussian = np.array(orders) / (2 * sigma**2)
-      for delta in deltas:
-        list_of_eps.append(
-            pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0])
-
-      # Check that in list_of_eps, epsilons are decreasing (as delta increases).
-      sorted_list_of_eps = list(list_of_eps)
-      sorted_list_of_eps.sort(reverse=True)
-      self.assertEqual(list_of_eps, sorted_list_of_eps)
-
-  def _test_compute_q0(self):
-    # Stub code to search a logq space and figure out logq0 by eyeballing
-    # results. This code does not run with the tests. Remove underscore to run.
-    sigma = 15
-    order = 250
-    logqs = np.arange(-290, -270, 1)
-    count = 0
-    for logq in logqs:
-      count += 1
-      sys.stdout.write("\t%0.5g: %0.10g" %
-                       (logq, pate.rdp_gaussian(logq, sigma, order)))
-      sys.stdout.flush()
-      if count % 5 == 0:
-        print("")
-
-  def test_rdp_gaussian(self):
-    self._test_rdp_gaussian_value_errors()
-    self._test_rdp_gaussian_as_function_of_q()
-
-  def test_compute_eps_from_delta(self):
-    self._test_compute_eps_from_delta_value_error()
-    self._test_compute_eps_from_delta_monotonicity()
-
-
-if __name__ == "__main__":
-  unittest.main()
--- a/research/differential_privacy/pate/smooth_sensitivity.py
+++ b/research/differential_privacy/pate/smooth_sensitivity.py
--- a/research/differential_privacy/pate/smooth_sensitivity_test.py
+++ b/research/differential_privacy/pate/smooth_sensitivity_test.py
-# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for pate.smooth_sensitivity."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-import numpy as np
-
-import smooth_sensitivity as pate_ss
-
-
-class PateSmoothSensitivityTest(unittest.TestCase):
-
-  def test_check_conditions(self):
-    self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False))
-    self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True))
-
-  def _assert_all_close(self, x, y):
-    """Asserts that two numpy arrays are close."""
-    self.assertEqual(len(x), len(y))
-    self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0))
-
-  def test_compute_local_sensitivity_bounds_gnmax(self):
-    counts1 = np.array([10, 0, 0])
-    sigma1 = .5
-    order1 = 1.5
-
-    answer1 = np.array(
-        [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7)
-
-    # Test for "going right" in the smooth sensitivity computation.
-    out1 = pate_ss.compute_local_sensitivity_bounds_gnmax(
-        counts1, 10, sigma1, order1)
-
-    self._assert_all_close(out1, answer1)
-
-    counts2 = np.array([1000, 500, 300, 200, 0])
-    sigma2 = 250.
-    order2 = 10.
-
-    # Test for "going left" in the smooth sensitivity computation.
-    out2 = pate_ss.compute_local_sensitivity_bounds_gnmax(
-        counts2, 2000, sigma2, order2)
-
-    answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] +
-                       [2.73113623988e-6] * 1700)
-    self._assert_all_close(out2, answer2)
-
-  def test_compute_local_sensitivity_bounds_threshold(self):
-    counts1_3 = np.array([20, 10, 0])
-    num_teachers = sum(counts1_3)
-    t1 = 16  # high threshold
-    sigma = 2
-    order = 10
-
-    out1 = pate_ss.compute_local_sensitivity_bounds_threshold(
-        counts1_3, num_teachers, t1, sigma, order)
-    answer1 = np.array([0] * 3 + [
-        1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
-        9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01,
-        1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02,
-        9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01,
-        1.93127860e-01
-    ] + [0] * 10)
-    self._assert_all_close(out1, answer1)
-
-    t2 = 2  # low threshold
-
-    out2 = pate_ss.compute_local_sensitivity_bounds_threshold(
-        counts1_3, num_teachers, t2, sigma, order)
-    answer2 = np.array([
-        1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01,
-        1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02,
-        6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04
-    ] + [0] * 18)
-    self._assert_all_close(out2, answer2)
-
-    t3 = 50  # very high threshold (larger than the number of teachers).
-
-    out3 = pate_ss.compute_local_sensitivity_bounds_threshold(
-        counts1_3, num_teachers, t3, sigma, order)
-
-    answer3 = np.array([
-        1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15,
-        1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10,
-        2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05,
-        0.000360973025976, 0.000360973025976, 2.76377015215e-50,
-        1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61,
-        1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72,
-        1.28328011060e-76, 8.20047697109e-81
-    ] + [0] * 10)
-
-    self._assert_all_close(out3, answer3)
-
-    # Fractional values.
-    counts4 = np.array([19.5, -5.1, 0])
-    t4 = 10.1
-    out4 = pate_ss.compute_local_sensitivity_bounds_threshold(
-        counts4, num_teachers, t4, sigma, order)
-
-    answer4 = np.array([
-        0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530,
-        0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142,
-        0.00678695871
-    ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10)
-    self._assert_all_close(out4, answer4)
-
-
-if __name__ == "__main__":
-  unittest.main()