Commit 6ce86cd8 authored by Ilya Mironov's avatar Ilya Mironov
Browse files

Changing accountant from log_mgf language to RDP. Adding more tests.

parent 73dbdd9a
...@@ -12,43 +12,49 @@ ...@@ -12,43 +12,49 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""A standalone utility for tracking the RDP accountant. """RDP analysis of the Gaussian-with-sampling mechanism.
The utility for computing Renyi differential privacy. Its public interface Functionality for computing Renyi differential privacy of an additive Gaussian
consists of two methods: mechanism with sampling. Its public interface consists of two methods:
compute_rdp(q, sigma, T, order) computes RDP with sampling probability q, compute_rdp(q, sigma, T, order) computes RDP with sampling probability q,
noise sigma, T steps at order. noise sigma, T steps at a given order.
get_privacy_spent computes delta (or eps) given RDP and eps (or delta). get_privacy_spent computes delta (or eps) given RDP and eps (or delta).
Example use: Example use:
Suppose that we have run an algorithm with parameters, an array of Suppose that we have run an algorithm with parameters, an array of
(q1, sigma1, T1) ... (qk, sigmak, Tk), and we wish to compute eps for a given (q1, sigma1, T1) ... (qk, sigma_k, Tk), and we wish to compute eps for a given
delta. The example code would be: delta. The example code would be:
max_order = 32 max_order = 32
orders = range(1, max_order + 1) orders = range(2, max_order + 1)
rdp_list = [] rdp = np.zeros_like(orders, dtype=float)
for order in orders: for q, sigma, T in parameters:
rdp = 0 rdp += rdp_accountant.compute_rdp(q, sigma, T, orders)
for q, sigma, T in parameters: eps, _, opt_order = rdp_accountant.get_privacy_spent(rdp, target_delta=delta)
rdp += compute_rdp(q, sigma, T, order)
rdp_list.append((order, rdp))
eps, delta, opt_order = get_privacy_spent(rdp_list, target_delta=delta)
""" """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from absl import app
from absl import flags
import math import math
import numpy as np
import sys import sys
import numpy as np
from scipy import special from scipy import special
###################### FLAGS = flags.FLAGS
# FLOAT64 ARITHMETIC # flags.DEFINE_boolean("rdp_verbose", False,
###################### "Output intermediate results for RDP computation.")
FLAGS(sys.argv) # Load the flags (including on import)
########################
# LOG-SPACE ARITHMETIC #
########################
def _log_add(logx, logy): def _log_add(logx, logy):
...@@ -56,8 +62,8 @@ def _log_add(logx, logy): ...@@ -56,8 +62,8 @@ def _log_add(logx, logy):
a, b = min(logx, logy), max(logx, logy) a, b = min(logx, logy), max(logx, logy)
if a == -np.inf: # adding 0 if a == -np.inf: # adding 0
return b return b
# Apply exp(a) + exp(b) = (exp(a - b) + 1.) * exp(b) # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return np.log(np.exp(a - b) + 1.) + b return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1)
def _log_sub(logx, logy): def _log_sub(logx, logy):
...@@ -65,78 +71,81 @@ def _log_sub(logx, logy): ...@@ -65,78 +71,81 @@ def _log_sub(logx, logy):
if logy == -np.inf: # subtracting 0 if logy == -np.inf: # subtracting 0
return logx return logx
assert logx > logy assert logx > logy
with np.errstate(over="raise"):
try: try:
return np.log(np.exp(logx - logy) - 1.) + logy # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
except FloatingPointError: return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1
return logx except OverflowError:
return logx
def _log_print(logx): def _log_print(logx):
"""Pretty print.""" """Pretty print."""
if logx < np.log(sys.float_info.max): if logx < math.log(sys.float_info.max):
return "{}".format(np.exp(logx)) return "{}".format(math.exp(logx))
else: else:
return "exp({})".format(logx) return "exp({})".format(logx)
def _compute_log_a_int(q, sigma, alpha, verbose=False): def _compute_log_a_int(q, sigma, alpha):
"""Compute log(A_alpha) for integer alpha.""" """Compute log(A_alpha) for integer alpha."""
assert isinstance(alpha, (int, long)) assert isinstance(alpha, (int, long))
log_a1, log_a2 = -np.inf, -np.inf # log of the first and second terms of A_alpha
# The first and second terms of A_alpha in the log space:
log_a1, log_a2 = -np.inf, -np.inf
for i in range(alpha + 1): for i in range(alpha + 1):
# Do computation in the log space. Extra care needed for q = 0 or 1. # Compute in the log space. Extra care needed for q = 0 or 1.
log_coef_i = np.log(special.binom(alpha, i)) log_coef_i = math.log(special.binom(alpha, i))
if q > 0: if q > 0:
log_coef_i += i * np.log(q) log_coef_i += i * math.log(q)
elif i > 0: elif i > 0:
continue # the term is 0, skip the rest continue # The term is 0, skip the rest.
if q < 1.0: if q < 1.0:
log_coef_i += (alpha - i) * np.log(1 - q) log_coef_i += (alpha - i) * math.log(1 - q)
elif i < alpha: elif i < alpha:
continue # the term is 0, skip the rest continue # The term is 0, skip the rest.
s1 = log_coef_i + (i * i - i) / (2.0 * (sigma**2)) s1 = log_coef_i + (i * i - i) / (2.0 * (sigma ** 2))
s2 = log_coef_i + (i * i + i) / (2.0 * (sigma**2)) s2 = log_coef_i + (i * i + i) / (2.0 * (sigma ** 2))
log_a1 = _log_add(log_a1, s1) log_a1 = _log_add(log_a1, s1)
log_a2 = _log_add(log_a2, s2) log_a2 = _log_add(log_a2, s2)
log_a = _log_add(np.log(1.0 - q) + log_a1, np.log(q) + log_a2) log_a = _log_add(math.log(1 - q) + log_a1, math.log(q) + log_a2)
if verbose: if FLAGS.rdp_verbose:
print("A: by binomial expansion {} = {} + {}".format( print("A: by binomial expansion {} = {} + {}".format(
_log_print(log_a), _log_print(log_a),
_log_print(np.log(1.0 - q) + log_a1), _log_print(np.log(q) + log_a2))) _log_print(math.log(1 - q) + log_a1), _log_print(math.log(q) + log_a2)))
return np.float64(log_a) return float(log_a)
def _compute_log_a_frac(q, sigma, alpha, verbose=False): def _compute_log_a_frac(q, sigma, alpha):
"""Compute log(A_alpha) for fractional alpha.""" """Compute log(A_alpha) for fractional alpha."""
# The four parts of A_alpha: # The four parts of A_alpha in the log space:
log_a11, log_a12 = -np.inf, -np.inf log_a11, log_a12 = -np.inf, -np.inf
log_a21, log_a22 = -np.inf, -np.inf log_a21, log_a22 = -np.inf, -np.inf
i = 0 i = 0
z0, _ = _compute_zs(sigma, q) z0, _ = _compute_zs(sigma, q)
while i == 0 or max(log_s11, log_s21, log_s21, log_s22) > -30: while True: # do ... until loop
coef = special.binom(alpha, i) coef = special.binom(alpha, i)
log_coef = np.log(abs(coef)) log_coef = math.log(abs(coef))
j = alpha - i j = alpha - i
log_t1 = log_coef + i * np.log(q) + j * np.log(1 - q) log_t1 = log_coef + i * math.log(q) + j * math.log(1 - q)
log_t2 = log_coef + j * np.log(q) + i * np.log(1 - q) log_t2 = log_coef + j * math.log(q) + i * math.log(1 - q)
log_e11 = np.log(.5) + _log_erfc((i - z0) / (2**.5 * sigma)) log_e11 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
log_e12 = np.log(.5) + _log_erfc((z0 - j) / (2**.5 * sigma)) log_e12 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))
log_e21 = np.log(.5) + _log_erfc((i - (z0 - 1)) / (2**.5 * sigma)) log_e21 = math.log(.5) + _log_erfc((i - (z0 - 1)) / (math.sqrt(2) * sigma))
log_e22 = np.log(.5) + _log_erfc((z0 - 1 - j) / (2**.5 * sigma)) log_e22 = math.log(.5) + _log_erfc((z0 - 1 - j) / (math.sqrt(2) * sigma))
log_s11 = log_t1 + (i * i - i) / (2.0 * (sigma**2)) + log_e11 log_s11 = log_t1 + (i * i - i) / (2 * (sigma ** 2)) + log_e11
log_s12 = log_t2 + (j * j - j) / (2.0 * (sigma**2)) + log_e12 log_s12 = log_t2 + (j * j - j) / (2 * (sigma ** 2)) + log_e12
log_s21 = log_t1 + (i * i + i) / (2.0 * (sigma**2)) + log_e21 log_s21 = log_t1 + (i * i + i) / (2 * (sigma ** 2)) + log_e21
log_s22 = log_t2 + (j * j + j) / (2.0 * (sigma**2)) + log_e22 log_s22 = log_t2 + (j * j + j) / (2 * (sigma ** 2)) + log_e22
if coef > 0: if coef > 0:
log_a11 = _log_add(log_a11, log_s11) log_a11 = _log_add(log_a11, log_s11)
...@@ -150,18 +159,20 @@ def _compute_log_a_frac(q, sigma, alpha, verbose=False): ...@@ -150,18 +159,20 @@ def _compute_log_a_frac(q, sigma, alpha, verbose=False):
log_a22 = _log_sub(log_a22, log_s22) log_a22 = _log_sub(log_a22, log_s22)
i += 1 i += 1
if max(log_s11, log_s21, log_s21, log_s22) < -30:
break
log_a = _log_add( log_a = _log_add(
np.log(1. - q) + _log_add(log_a11, log_a12), math.log(1. - q) + _log_add(log_a11, log_a12),
np.log(q) + _log_add(log_a21, log_a22)) math.log(q) + _log_add(log_a21, log_a22))
return np.float64(log_a) return log_a
def compute_log_a(q, sigma, alpha, verbose=False): def _compute_log_a(q, sigma, alpha):
if float(alpha).is_integer(): if float(alpha).is_integer():
return _compute_log_a_int(q, sigma, int(alpha), verbose) return _compute_log_a_int(q, sigma, int(alpha))
else: else:
return _compute_log_a_frac(q, sigma, alpha, verbose) return _compute_log_a_frac(q, sigma, alpha)
def _log_erfc(x): def _log_erfc(x):
...@@ -173,14 +184,14 @@ def _log_erfc(x): ...@@ -173,14 +184,14 @@ def _log_erfc(x):
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5) # erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
# To verify in Mathematica: # To verify in Mathematica:
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}] # Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
return (-np.log(np.pi) / 2 - np.log(x) - x**2 - .5 * x**-2 + .625 * x**-4 - return (-math.log(math.pi) / 2 - math.log(x) - x ** 2 - .5 * x ** -2 +
37. / 24. * x**-6 + 353. / 64. * x**-8) .625 * x ** -4 - 37. / 24. * x ** -6 + 353. / 64. * x ** -8)
else: else:
return np.log(r) return math.log(r)
def _compute_zs(sigma, q): def _compute_zs(sigma, q):
z0 = sigma**2 * np.log(1. / q - 1) + .5 z0 = sigma ** 2 * math.log(1 / q - 1) + .5
z1 = min(z0 - 2, z0 / 2) z1 = min(z0 - 2, z0 / 2)
return z0, z1 return z0, z1
...@@ -191,40 +202,41 @@ def _compute_log_b0(sigma, q, alpha, z1): ...@@ -191,40 +202,41 @@ def _compute_log_b0(sigma, q, alpha, z1):
s, log_term, log_b0, k, sign, max_log_term = 0, 1., 0, 0, 1, -np.inf s, log_term, log_b0, k, sign, max_log_term = 0, 1., 0, 0, 1, -np.inf
# Keep adding new terms until precision is no longer preserved. # Keep adding new terms until precision is no longer preserved.
# Don't stop on the negative. # Don't stop on the negative.
while k < alpha or (log_term > max_log_term - 36 and log_term > -30 while (k < alpha or (log_term > max_log_term - 36 and log_term > -30) or
) or sign < 0.: sign < 0.):
log_b1 = k * (k - 2 * z0) / (2 * sigma**2) log_b1 = k * (k - 2 * z0) / (2 * sigma ** 2)
log_b2 = _log_erfc((k - z1) / (np.sqrt(2) * sigma)) log_b2 = _log_erfc((k - z1) / (math.sqrt(2) * sigma))
log_term = log_b0 + log_b1 + log_b2 log_term = log_b0 + log_b1 + log_b2
max_log_term = max(max_log_term, log_term) max_log_term = max(max_log_term, log_term)
s += sign * np.exp(log_term) s += sign * math.exp(log_term)
k += 1 k += 1
# Maintain invariant: sign * exp(log_b0) = {-alpha choose k} # Maintain invariant: sign * exp(log_b0) = {-alpha choose k}
log_b0 += np.log(np.abs(-alpha - k + 1)) - np.log(k) log_b0 += math.log(abs(-alpha - k + 1)) - math.log(k)
sign *= -1 sign *= -1
if s == 0: # May happen if all terms are < 1e-324. if s == 0: # May happen if all terms are < 1e-324.
return -np.inf return -np.inf
if s < 0 or np.log(s) < max_log_term - 25: # the series failed to converge if s < 0 or math.log(s) < max_log_term - 25: # The series failed to converge.
return None return None
c = np.log(.5) - np.log(1 - q) * alpha c = math.log(.5) - math.log(1 - q) * alpha
return c + np.log(s) return c + math.log(s)
def _bound_log_b1(sigma, q, alpha, z1): def _bound_log_b1(sigma, q, alpha, z1):
log_c = _log_add(np.log(1 - q), np.log(q) + (2 * z1 - 1.) / (2 * sigma**2)) log_c = _log_add(math.log(1 - q),
return np.log(.5) - log_c * alpha + _log_erfc(z1 / (2**.5 * sigma)) math.log(q) + (2 * z1 - 1.) / (2 * sigma ** 2))
return math.log(.5) - log_c * alpha + _log_erfc(z1 / (math.sqrt(2) * sigma))
def bound_log_b(q, sigma, alpha, verbose=False): def _bound_log_b(q, sigma, alpha):
"""Compute a numerically stable bound on log(B_alpha).""" """Compute a numerically stable bound on log(B_alpha)."""
if q == 1.: # If the sampling rate is 100%, A and B are symmetric. if q == 1.: # If the sampling rate is 100%, A and B are symmetric.
return compute_log_a(q, sigma, alpha, verbose) return _compute_log_a(q, sigma, alpha)
z0, z1 = _compute_zs(sigma, q) z0, z1 = _compute_zs(sigma, q)
log_b_bound = np.inf log_b_bound = np.inf
# log_b1 cannot be less than its value at z0 # Puts a lower bound on B1: it cannot be less than its value at z0.
log_lb_b1 = _bound_log_b1(sigma, q, alpha, z0) log_lb_b1 = _bound_log_b1(sigma, q, alpha, z0)
while z0 - z1 > 1e-3: while z0 - z1 > 1e-3:
...@@ -236,117 +248,150 @@ def bound_log_b(q, sigma, alpha, verbose=False): ...@@ -236,117 +248,150 @@ def bound_log_b(q, sigma, alpha, verbose=False):
log_b1 = _bound_log_b1(sigma, q, alpha, m) log_b1 = _bound_log_b1(sigma, q, alpha, m)
log_b_bound = min(log_b_bound, _log_add(log_b0, log_b1)) log_b_bound = min(log_b_bound, _log_add(log_b0, log_b1))
log_b_min_bound = _log_add(log_b0, log_lb_b1) log_b_min_bound = _log_add(log_b0, log_lb_b1)
if log_b_bound < 0 or log_b_min_bound < 0 or log_b_bound > log_b_min_bound + .01: if (log_b_bound < 0 or
log_b_min_bound < 0 or
log_b_bound > log_b_min_bound + .01):
# If the bound is likely to be too loose, move z1 closer to z0 and repeat. # If the bound is likely to be too loose, move z1 closer to z0 and repeat.
z1 = m z1 = m
else: else:
break break
return np.float64(log_b_bound) return log_b_bound
def _log_bound_b_elementary(q, alpha): def _log_bound_b_elementary(q, alpha):
return -np.log(1 - q) * alpha return -math.log(1 - q) * alpha
def _compute_delta(log_moments, eps): def _compute_delta(orders, rdp, eps):
"""Compute delta for given log_moments and eps. """Compute delta given an RDP curve and target epsilon.
Args: Args:
log_moments: the log moments of privacy loss, in the form of pairs orders: An array (or a scalar) of orders.
of (moment_order, log_moment) rdp: A list (or a scalar) of RDP guarantees.
eps: the target epsilon. eps: The target epsilon.
Returns: Returns:
delta, order Pair of (delta, optimal_order).
Raises:
ValueError: If input is malformed.
""" """
min_delta, opt_order = 1.0, float("NaN") orders_vec = np.atleast_1d(orders)
for moment_order, log_moment in log_moments: rdp_vec = np.atleast_1d(rdp)
if moment_order == 0:
continue if len(orders_vec) != len(rdp_vec):
if math.isinf(log_moment) or math.isnan(log_moment): raise ValueError("Input lists must have the same length.")
sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
continue deltas = np.exp((rdp_vec - eps) * (orders_vec - 1))
if log_moment < moment_order * eps: idx_opt = np.argmin(deltas)
delta = math.exp(log_moment - moment_order * eps) return min(deltas[idx_opt], 1.), orders_vec[idx_opt]
if delta < min_delta:
min_delta, opt_order = delta, moment_order
return min_delta, opt_order
def _compute_eps(log_moments, delta): def _compute_eps(orders, rdp, delta):
"""Compute epsilon for given log_moments and delta. """Compute epsilon given an RDP curve and target delta.
Args: Args:
log_moments: the log moments of privacy loss, in the form of pairs orders: An array (or a scalar) of orders.
of (moment_order, log_moment) rdp: A list (or a scalar) of RDP guarantees.
delta: the target delta. delta: The target delta.
Returns: Returns:
epsilon, order Pair of (eps, optimal_order).
Raises:
ValueError: If input is malformed.
""" """
min_eps, opt_order = float("inf"), float("NaN") orders_vec = np.atleast_1d(orders)
for moment_order, log_moment in log_moments: rdp_vec = np.atleast_1d(rdp)
if moment_order == 0:
continue
if math.isinf(log_moment) or math.isnan(log_moment):
sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
continue
eps = (log_moment - math.log(delta)) / moment_order
if eps < min_eps:
min_eps, opt_order = eps, moment_order
return min_eps, opt_order
if len(orders_vec) != len(rdp_vec):
raise ValueError("Input lists must have the same length.")
def compute_log_moment(q, sigma, steps, alpha, verbose=False): eps = rdp_vec - math.log(delta) / (orders_vec - 1)
"""Compute the log moment of Gaussian mechanism for given parameters.
Args: idx_opt = np.nanargmin(eps) # Ignore NaNs
q: the sampling ratio. return eps[idx_opt], orders_vec[idx_opt]
sigma: the noise sigma.
steps: the number of steps.
alpha: the moment order. def _compute_rdp(q, sigma, steps, alpha):
verbose: if True, print out debug information. log_moment_a = _compute_log_a(q, sigma, alpha - 1)
Returns:
the log moment with type np.float64, could be np.inf.
"""
log_moment_a = compute_log_a(q, sigma, alpha, verbose=verbose)
log_bound_b = _log_bound_b_elementary(q, alpha) # does not require sigma log_bound_b = _log_bound_b_elementary(q, alpha - 1) # does not require sigma
if log_bound_b < log_moment_a: if log_bound_b < log_moment_a:
if verbose: if FLAGS.rdp_verbose:
print("Elementary bound suffices : {} < {}".format( print("Elementary bound suffices : {} < {}".format(
_log_print(log_bound_b), _log_print(log_moment_a))) _log_print(log_bound_b), _log_print(log_moment_a)))
else: else:
log_bound_b2 = bound_log_b(q, sigma, alpha, verbose=verbose) log_bound_b2 = _bound_log_b(q, sigma, alpha - 1)
if np.isnan(log_bound_b2): if math.isnan(log_bound_b2):
if verbose: if FLAGS.rdp_verbose:
print("B bound failed to converge") print("B bound failed to converge")
else: else:
if verbose and (log_bound_b2 < log_bound_b): if FLAGS.rdp_verbose and (log_bound_b2 < log_bound_b):
print("Elementary bound is stronger: {} < {}".format( print("Elementary bound is stronger: {} < {}".format(
_log_print(log_bound_b2), _log_print(log_bound_b))) _log_print(log_bound_b2), _log_print(log_bound_b)))
log_bound_b = min(log_bound_b, log_bound_b2) log_bound_b = min(log_bound_b, log_bound_b2)
return max(log_moment_a, log_bound_b) * steps return max(log_moment_a, log_bound_b) * steps / (alpha - 1)
def compute_rdp(q, sigma, steps, orders):
"""Compute RDP of Gaussian mechanism with sampling for given parameters.
Args:
q: The sampling ratio.
sigma: The noise sigma.
steps: The number of steps.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders, can be np.inf.
"""
if np.isscalar(orders):
return _compute_rdp(q, sigma, steps, orders)
else:
rdp = np.zeros_like(orders, dtype=float)
for i, order in enumerate(orders):
rdp[i] = _compute_rdp(q, sigma, steps, order)
return rdp
def get_privacy_spent(log_moments, target_eps=None, target_delta=None):
"""Compute delta (or eps) for given eps (or delta) from log moments. def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
"""Compute delta (or eps) for given eps (or delta) from the RDP curve.
Args: Args:
log_moments: array of (moment_order, log_moment) pairs. orders: An array (or a scalar) of RDP orders.
target_eps: if not None, the epsilon for which we would like to compute rdp: An array of RDP values.
corresponding delta value. target_eps: If not None, the epsilon for which we compute the corresponding
target_delta: if not None, the delta for which we would like to compute delta.
corresponding epsilon value. Exactly one of target_eps and target_delta target_delta: If not None, the delta for which we compute the corresponding
is None. epsilon. Exactly one of target_eps and target_delta must be None.
Returns: Returns:
eps, delta, opt_order eps, delta, opt_order.
""" """
assert bool(target_eps is None) ^ bool(target_delta is None) if target_eps is None and target_delta is None:
raise ValueError(
"Exactly one out of eps and delta must be None. (Both are).")
if target_eps is not None and target_delta is not None:
raise ValueError(
"Exactly one out of eps and delta must be None. (None is).")
if target_eps is not None: if target_eps is not None:
delta, opt_order = _compute_delta(log_moments, target_eps) delta, opt_order = _compute_delta(orders, rdp, target_eps)
return target_eps, delta, opt_order return target_eps, delta, opt_order
else: else:
eps, opt_order = _compute_eps(log_moments, target_delta) eps, opt_order = _compute_eps(orders, rdp, target_delta)
return eps, target_delta, opt_order return eps, target_delta, opt_order
def main(_): pass
if __name__ == "__main__":
app.run(main)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -18,20 +19,22 @@ from __future__ import absolute_import ...@@ -18,20 +19,22 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from absl.testing import absltest
import numpy as np import numpy as np
import mpmath as mp import mpmath as mp
from absl.testing import absltest
import rdp_accountant import rdp_accountant
class TestGaussianMoments(absltest.TestCase): class TestGaussianMoments(absltest.TestCase):
# MULTI-PRECISION ROUTINES ##############################
# MULTI-PRECISION ARITHMETIC #
##############################
def _pdf_gauss_mp(self, x, sigma, mean): def _pdf_gauss_mp(self, x, sigma, mean):
return 1. / mp.sqrt(2. * sigma ** 2 * mp.pi) * mp.exp(-(x - mean) return 1. / mp.sqrt(2. * sigma ** 2 * mp.pi) * mp.exp(-(x - mean)
** 2 / ( ** 2 / (
2. * sigma ** 2)) 2. * sigma ** 2))
def _integral_inf_mp(self, fn): def _integral_inf_mp(self, fn):
integral, _ = mp.quad( integral, _ = mp.quad(
...@@ -85,88 +88,86 @@ class TestGaussianMoments(absltest.TestCase): ...@@ -85,88 +88,86 @@ class TestGaussianMoments(absltest.TestCase):
print("B: numerically {} = {} + {}".format(b_numeric, b0_numeric, print("B: numerically {} = {} + {}".format(b_numeric, b0_numeric,
b1_numeric)) b1_numeric))
return np.float64(b_numeric) return float(b_numeric)
def _compute_log_moment_mp(self, q, sigma, order): def _compute_rdp_mp(self, q, sigma, order):
log_a_mp = np.float64(mp.log(self.compute_a_mp(sigma, q, order))) log_a_mp = float(mp.log(self.compute_a_mp(sigma, q, order)))
log_b_mp = np.float64(mp.log(self.compute_b_mp(sigma, q, order))) log_b_mp = float(mp.log(self.compute_b_mp(sigma, q, order)))
return log_a_mp, log_b_mp return log_a_mp, log_b_mp
# TEST ROUTINES # TEST ROUTINES
def _almost_equal(self, a, b, rtol): def _almost_equal(self, a, b, rtol, atol=0):
# Analogue of np.testing.assert_allclose(a, b, rtol). # Analogue of np.testing.assert_allclose(a, b, rtol, atol).
self.assertBetween(a, b * (1 - rtol), b * (1 + rtol)) self.assertBetween(a, b * (1 - rtol) - atol, b * (1 + rtol) + atol)
def _compare_bounds(self, q, sigma, order): def _compare_bounds(self, q, sigma, order):
log_a_mp, log_b_mp = self._compute_log_moment_mp(q, sigma, order) log_a_mp, log_b_mp = self._compute_rdp_mp(q, sigma, order)
log_a = rdp_accountant.compute_log_a(q, sigma, order) log_a = rdp_accountant._compute_log_a(q, sigma, order)
log_bound_b = rdp_accountant.bound_log_b(q, sigma, order) log_bound_b = rdp_accountant._bound_log_b(q, sigma, order)
if log_a_mp < 1000 and log_a_mp > 1e-6: if log_a_mp < 1000 and log_a_mp > 1e-6:
self._almost_equal(log_a, log_a_mp, rtol=1e-6) self._almost_equal(log_a, log_a_mp, rtol=1e-6)
else: # be more tolerant for _very_ large or small logarithms else: # be more tolerant for _very_ large or small logarithms
if log_a_mp > 1e-12: self._almost_equal(log_a, log_a_mp, rtol=1e-3, atol=1e-14)
self._almost_equal(log_a, log_a_mp, rtol=1e-2)
else: if np.isfinite(log_bound_b):
print("Bounds on A are too small to compare: {}, {}".format(
log_a, log_a_mp))
if np.isfinite(log_bound_b) and log_bound_b > 1e-12:
# Ignore divergence between the bound and exact value of B if # Ignore divergence between the bound and exact value of B if
# they don't matter anyway (bound on A is larger) or q > .5 # they don't matter anyway (bound on A is larger) or q > .5
if log_bound_b > log_a and q <= .5: if log_bound_b > log_a and q <= .5:
self._almost_equal(log_b_mp, log_bound_b, rtol=1e-2) self._almost_equal(log_b_mp, log_bound_b, rtol=1e-6, atol=1e-14)
if np.isfinite(log_a_mp) and np.isfinite(log_b_mp): if np.isfinite(log_a_mp) and np.isfinite(log_b_mp):
# We hypothesize that this assertion is always true; no proof yet. # We hypothesize that this assertion is always true; no proof yet.
self.assertLessEqual(log_b_mp, log_a_mp + 1e-6) self.assertLessEqual(log_b_mp, log_a_mp + 1e-6)
def test_compute_log_moments(self): def test_compute_rdp(self):
log_moment = rdp_accountant.compute_log_moment(0.1, 2, 10, 4) rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5)
self.assertAlmostEqual(log_moment, 0.30948, places=5) self.assertAlmostEqual(rdp_scalar, 0.07737, places=5)
rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50, [1.5, 2.5, 5, 50, 100])
correct = [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307]
for i in range(len(rdp_vec)):
self.assertAlmostEqual(rdp_vec[i], correct[i], places=5)
def test_compare_with_mp(self):
# Compare the cheap computation with an expensive, multi-precision
# computation for a few parameters. Takes a few seconds.
self._compare_bounds(q=.01, sigma=.1, order=.5) self._compare_bounds(q=.01, sigma=.1, order=.5)
self._compare_bounds(q=.1, sigma=1., order=5) self._compare_bounds(q=.1, sigma=1., order=5)
self._compare_bounds(q=.5, sigma=2., order=32.5) self._compare_bounds(q=.5, sigma=2., order=32.5)
# Compare the cheap computation with expensive, multi-precision for q in (1e-6, .1, .999):
# computation for a few parameters. Takes about a minute. for sigma in (.1, 10., 100.):
# for q in (1e-6, .1, .999): for order in (1.01, 2, 255.9, 256):
# for sigma in (.1, 10.): self._compare_bounds(q, sigma, order)
# for order in (.5, 1., 1.5, 256.):
# self._compare_bounds(q, sigma, order)
def test_get_privacy_spent(self): def test_get_privacy_spent(self):
orders = range(1, 33) orders = range(2, 33)
log_moments = [] rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
for order in orders: eps, delta, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
log_moment = rdp_accountant.compute_log_moment(0.01, 4, 10000, order)
log_moments.append((order, log_moment))
eps, delta, opt_order = rdp_accountant.get_privacy_spent(log_moments,
target_delta=1e-5) target_delta=1e-5)
self.assertAlmostEqual(eps, 1.258575, places=5) self.assertAlmostEqual(eps, 1.258575, places=5)
self.assertEqual(opt_order, 19) self.assertEqual(opt_order, 20)
eps, delta, _ = rdp_accountant.get_privacy_spent(log_moments, eps, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp,
target_eps=1.258575) target_eps=1.258575)
self.assertAlmostEqual(delta, 1e-5) self.assertAlmostEqual(delta, 1e-5)
def test_compute_privacy_loss(self): def test_compute_privacy_loss(self):
parameters = [(0.01, 4, 10000), (0.1, 2, 100)] parameters = [(0.01, 4, 10000), (0.1, 2, 100)]
delta = 1e-5 delta = 1e-5
orders = ( orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
1, 1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.)
16., 20., 24., 28., 32., 64., 256.)
log_moments = [] rdp = np.zeros_like(orders, dtype=float)
for order in orders: for q, sigma, steps in parameters:
log_moment = 0 rdp += rdp_accountant.compute_rdp(q, sigma, steps, orders)
for q, sigma, steps in parameters: eps, delta, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
log_moment += rdp_accountant.compute_log_moment(q, sigma, steps, order) target_delta=delta)
log_moments.append((order, log_moment))
eps, delta, opt_order = rdp_accountant.get_privacy_spent(
log_moments, target_delta=delta)
self.assertAlmostEqual(eps, 3.276237, places=5) self.assertAlmostEqual(eps, 3.276237, places=5)
self.assertEqual(opt_order, 7) self.assertEqual(opt_order, 8)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment