Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
911a0d23
Unverified
Commit
911a0d23
authored
May 08, 2018
by
Ilya Mironov
Committed by
GitHub
May 08, 2018
Browse files
Merge pull request #4197 from ilyamironov/master
Updates to differential_privacy/pate following the Banff workshop.
parents
bc0edaf8
004dd361
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
874 additions
and
340 deletions
+874
-340
research/differential_privacy/pate/ICLR2018/README.md
research/differential_privacy/pate/ICLR2018/README.md
+3
-1
research/differential_privacy/pate/ICLR2018/plot_partition.py
...arch/differential_privacy/pate/ICLR2018/plot_partition.py
+9
-7
research/differential_privacy/pate/ICLR2018/plots_for_slides.py
...ch/differential_privacy/pate/ICLR2018/plots_for_slides.py
+283
-0
research/differential_privacy/pate/README.md
research/differential_privacy/pate/README.md
+4
-4
research/differential_privacy/pate/core_test.py
research/differential_privacy/pate/core_test.py
+1
-1
research/differential_privacy/pate/smooth_sensitivity_test.py
...arch/differential_privacy/pate/smooth_sensitivity_test.py
+1
-1
research/differential_privacy/privacy_accountant/python/gaussian_moments.py
...ial_privacy/privacy_accountant/python/gaussian_moments.py
+0
-326
research/differential_privacy/privacy_accountant/python/rdp_accountant.py
...ntial_privacy/privacy_accountant/python/rdp_accountant.py
+400
-0
research/differential_privacy/privacy_accountant/python/rdp_accountant_test.py
..._privacy/privacy_accountant/python/rdp_accountant_test.py
+173
-0
No files found.
research/differential_privacy/pate/ICLR2018/README.md
View file @
911a0d23
...
...
@@ -51,9 +51,11 @@ The output is written to the console.
*
plot_partition.py
—
Script for producing partition.pdf, a detailed breakdown of privacy
costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
*
rdp_flow.py and plot_ls_q.py are currently not used.
*
plots_for_slides.py
—
Script for producing several plots for the slide deck.
*
download.py
—
Utility script for populating the data/ directory.
*
plot_ls_q.py is not used.
All Python files take flags. Run script_name.py --help for help on flags.
research/differential_privacy/pate/ICLR2018/plot_partition.py
View file @
911a0d23
...
...
@@ -186,7 +186,7 @@ def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
ss
=
rdp_ss
[
order_idx
],
delta
=-
math
.
log
(
delta
)
/
(
order_opt
[
i
]
-
1
))
ss_std_opt
[
i
]
=
ss_std
[
order_idx
]
if
i
>
0
and
(
i
+
1
)
%
1
0
==
0
:
if
i
>
0
and
(
i
+
1
)
%
1
==
0
:
print
(
'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
'step2 = {:.3f}, ss = {:.3f}'
.
format
(
...
...
@@ -292,6 +292,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
fig
,
ax
=
plt
.
subplots
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
fig
.
patch
.
set_alpha
(
0
)
l1
=
ax
.
plot
(
x
,
y3
,
color
=
'b'
,
ls
=
'-'
,
label
=
r
'Total privacy cost'
,
linewidth
=
1
).
pop
()
...
...
@@ -311,8 +313,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
plt
.
xlim
([
0
,
xlim
])
ax
.
set_ylim
([
0
,
3.
])
ax
.
set_xlabel
(
'Number of queries answered'
,
fontsize
=
1
0
)
ax
.
set_ylabel
(
r
'Privacy cost $\varepsilon$ at $\delta=10^{-8}$'
,
fontsize
=
1
0
)
ax
.
set_xlabel
(
'Number of queries answered'
,
fontsize
=
1
6
)
ax
.
set_ylabel
(
r
'Privacy cost $\varepsilon$ at $\delta=10^{-8}$'
,
fontsize
=
1
6
)
# Merging legends.
if
print_order
:
...
...
@@ -321,11 +323,11 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
x
,
y_right
,
'r'
,
ls
=
'-'
,
label
=
r
'Optimal order'
,
linewidth
=
5
,
alpha
=
.
5
).
pop
()
ax2
.
grid
(
False
)
ax2
.
set_ylabel
(
r
'Optimal Renyi order'
,
fontsize
=
16
)
#
ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
ax2
.
set_ylim
([
0
,
200.
])
ax
.
legend
((
l1
,
l2
),
(
l1
.
get_label
(),
l2
.
get_label
()),
loc
=
0
,
fontsize
=
13
)
#
ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
ax
.
tick_params
(
labelsize
=
1
0
)
ax
.
tick_params
(
labelsize
=
1
4
)
plot_filename
=
os
.
path
.
join
(
figures_dir
,
'partition.pdf'
)
print
(
'Saving the graph to '
+
plot_filename
)
fig
.
savefig
(
plot_filename
,
bbox_inches
=
'tight'
,
dpi
=
800
)
...
...
@@ -387,7 +389,7 @@ def main(argv):
figures_dir
=
os
.
path
.
expanduser
(
FLAGS
.
figures_dir
)
plot_comparison
(
figures_dir
,
simple_ind
,
conf_ind
,
simple_dep
,
conf_dep
)
plot_partition
(
figures_dir
,
conf_dep
,
Fals
e
)
plot_partition
(
figures_dir
,
conf_dep
,
Tru
e
)
plt
.
close
(
'all'
)
...
...
research/differential_privacy/pate/ICLR2018/
rdp_flow
.py
→
research/differential_privacy/pate/ICLR2018/
plots_for_slides
.py
View file @
911a0d23
...
...
@@ -13,9 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Plots two graphs illustrating cost of privacy per answered query.
PRESENTLY NOT USED.
"""Plots graphs for the slide deck.
A script in support of the PATE2 paper. The input is a file containing a numpy
array of votes, one query per row, one class per column. Ex:
...
...
@@ -23,7 +21,7 @@ array of votes, one query per row, one class per column. Ex:
31, 16, ..., 0
...
0, 86, ..., 438
The output
is written to a specified directory and consists of two pdf files
.
The output
graphs are visualized using the TkAgg backend
.
"""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -35,43 +33,51 @@ import sys
sys
.
path
.
append
(
'..'
)
# Main modules reside in the parent directory.
from
absl
import
app
from
absl
import
flags
import
matplotlib
matplotlib
.
use
(
'TkAgg'
)
import
matplotlib.pyplot
as
plt
# pylint: disable=g-import-not-at-top
import
numpy
as
np
import
core
as
pate
import
random
plt
.
style
.
use
(
'ggplot'
)
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'counts_file'
,
''
,
'Counts file.'
)
flags
.
DEFINE_string
(
'counts_file'
,
None
,
'Counts file.'
)
flags
.
DEFINE_string
(
'figures_dir'
,
''
,
'Path where figures are written to.'
)
flags
.
DEFINE_boolean
(
'transparent'
,
False
,
'Set background to transparent.'
)
flags
.
mark_flag_as_required
(
'counts_file'
)
def
plot_rdp_curve_per_example
(
votes
,
sigmas
):
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.5
def
setup_plot
():
fig
,
ax
=
plt
.
subplots
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
styles
=
[
':'
,
'-'
]
labels
=
[
'ex1'
,
'ex2'
]
if
FLAGS
.
transparent
:
fig
.
patch
.
set_alpha
(
0
)
return
fig
,
ax
for
i
in
xrange
(
votes
.
shape
[
0
]):
print
(
sorted
(
votes
[
i
,],
reverse
=
True
)[:
10
])
def
plot_rdp_curve_per_example
(
votes
,
sigmas
):
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.001
fig
,
ax
=
setup_plot
()
for
i
in
range
(
votes
.
shape
[
0
]):
for
sigma
in
sigmas
:
logq
=
pate
.
compute_logq_gaussian
(
votes
[
i
,],
sigma
)
rdp
=
pate
.
rdp_gaussian
(
logq
,
sigma
,
orders
)
ax
.
plot
(
orders
,
rdp
,
label
=
r
'{} $\sigma$={}'
.
format
(
labels
[
i
],
int
(
sigma
))
,
l
inestyle
=
styles
[
i
]
,
alpha
=
1.
,
l
abel
=
r
'Data-dependent bound, $\sigma$={}'
.
format
(
int
(
sigma
))
,
linewidth
=
5
)
for
sigma
in
sigmas
:
...
...
@@ -79,23 +85,47 @@ def plot_rdp_curve_per_example(votes, sigmas):
orders
,
pate
.
rdp_data_independent_gaussian
(
sigma
,
orders
),
alpha
=
.
3
,
label
=
r
'Data-ind bound $\sigma$={}'
.
format
(
int
(
sigma
)),
label
=
r
'Data-ind
ependent
bound
,
$\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
10
)
plt
.
yticks
([
0
,
.
01
])
plt
.
xlabel
(
r
'Order $\lambda$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\lambda$'
,
fontsize
=
16
)
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0025
,
.
005
,
.
0075
,
.
01
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
fout_name
=
os
.
path
.
join
(
FLAGS
.
figures_dir
,
'rdp_flow1.pdf'
)
print
(
'Saving the graph to '
+
fout_name
)
fig
.
savefig
(
fout_name
,
bbox_inches
=
'tight'
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
def
plot_rdp_of_sigma
(
v
,
order
):
sigmas
=
np
.
linspace
(
1.
,
1000.
,
endpoint
=
True
,
num
=
1000
)
fig
,
ax
=
setup_plot
()
y
=
np
.
zeros
(
len
(
sigmas
))
for
i
,
sigma
in
enumerate
(
sigmas
):
logq
=
pate
.
compute_logq_gaussian
(
v
,
sigma
)
y
[
i
]
=
pate
.
rdp_gaussian
(
logq
,
sigma
,
order
)
ax
.
plot
(
sigmas
,
y
,
alpha
=
.
8
,
linewidth
=
5
)
plt
.
xlim
(
xmin
=
1
,
xmax
=
1000
)
plt
.
ylim
(
ymin
=
0
)
# plt.yticks([0, .0004, .0008, .0012])
ax
.
tick_params
(
labelleft
=
'off'
)
plt
.
xlabel
(
r
'Noise $\sigma$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP at order $\alpha={}$'
.
format
(
order
),
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
# plt.legend(loc=0, fontsize=13)
plt
.
show
()
def
compute_rdp_curve
(
votes
,
threshold
,
sigma1
,
sigma2
,
orders
,
target_answered
):
target_answered
):
rdp_cum
=
np
.
zeros
(
len
(
orders
))
answered
=
0
for
i
,
v
in
enumerate
(
votes
):
...
...
@@ -115,46 +145,138 @@ def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
def
plot_rdp_total
(
votes
,
sigmas
):
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
100
)
orders
[
0
]
=
1.
5
orders
[
0
]
=
1.
1
fig
,
ax
=
plt
.
sub
plot
s
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
fig
,
ax
=
setup_
plot
()
target_answered
=
2000
for
sigma
in
sigmas
:
rdp
=
compute_rdp_curve
(
votes
,
5000
,
1000
,
sigma
,
orders
,
2000
)
rdp
=
compute_rdp_curve
(
votes
,
5000
,
1000
,
sigma
,
orders
,
target_answered
)
ax
.
plot
(
orders
,
rdp
,
alpha
=
.
8
,
label
=
r
'$\sigma$={}'
.
format
(
int
(
sigma
)),
label
=
r
'
Data-dependent bound,
$\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
5
)
plt
.
xlabel
(
r
'Order $\lambda$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\lambda$'
,
fontsize
=
16
)
# for sigma in sigmas:
# ax.plot(
# orders,
# target_answered * pate.rdp_data_independent_gaussian(sigma, orders),
# alpha=.3,
# label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
# linewidth=10)
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0005
,
.
001
,
.
0015
,
.
002
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
def
plot_data_ind_curve
():
fig
,
ax
=
setup_plot
()
orders
=
np
.
linspace
(
1.
,
10.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.01
ax
.
plot
(
orders
,
pate
.
rdp_data_independent_gaussian
(
1.
,
orders
),
alpha
=
.
5
,
color
=
'gray'
,
linewidth
=
10
)
# plt.yticks([])
plt
.
xlim
(
xmin
=
1
,
xmax
=
10
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
3
,
5
,
7
,
9
])
ax
.
tick_params
(
labelsize
=
14
)
plt
.
show
()
def
plot_two_data_ind_curves
():
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.001
fig
,
ax
=
setup_plot
()
for
sigma
in
[
100
,
150
]:
ax
.
plot
(
orders
,
pate
.
rdp_data_independent_gaussian
(
sigma
,
orders
),
alpha
=
.
3
,
label
=
r
'Data-independent bound, $\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
10
)
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0025
,
.
005
,
.
0075
,
.
01
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
fout_name
=
os
.
path
.
join
(
FLAGS
.
figures_dir
,
'rdp_flow2.pdf'
)
print
(
'Saving the graph to '
+
fout_name
)
fig
.
savefig
(
fout_name
,
bbox_inches
=
'tight'
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
def
scatter_plot
(
votes
,
threshold
,
sigma1
,
sigma2
,
order
):
fig
,
ax
=
setup_plot
()
x
=
[]
y
=
[]
for
i
,
v
in
enumerate
(
votes
):
if
threshold
is
not
None
and
sigma1
is
not
None
:
q_step1
=
math
.
exp
(
pate
.
compute_logpr_answered
(
threshold
,
sigma1
,
v
))
else
:
q_step1
=
1.
if
random
.
random
()
<
q_step1
:
logq_step2
=
pate
.
compute_logq_gaussian
(
v
,
sigma2
)
x
.
append
(
max
(
v
))
y
.
append
(
pate
.
rdp_gaussian
(
logq_step2
,
sigma2
,
order
))
print
(
'Selected {} queries.'
.
format
(
len
(
x
)))
# Plot the data-independent curve:
# data_ind = pate.rdp_data_independent_gaussian(sigma, order)
# plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
ax
.
set_yscale
(
'log'
)
plt
.
xlim
(
xmin
=
0
,
xmax
=
5000
)
plt
.
ylim
(
ymin
=
1e-300
,
ymax
=
1
)
plt
.
yticks
([
1
,
1e-100
,
1e-200
,
1e-300
])
plt
.
scatter
(
x
,
y
,
s
=
1
,
alpha
=
0.5
)
plt
.
ylabel
(
r
'RDP at $\alpha={}$'
.
format
(
order
),
fontsize
=
16
)
plt
.
xlabel
(
r
'max count'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
plt
.
show
()
def
main
(
argv
):
del
argv
# Unused.
fin_name
=
os
.
path
.
expanduser
(
FLAGS
.
counts_file
)
print
(
'Reading raw votes from '
+
fin_name
)
sys
.
stdout
.
flush
()
votes
=
np
.
load
(
fin_name
)
votes
=
votes
[:
12000
,]
# truncate to 4000 samples
plot_data_ind_curve
(
)
plot_two_data_ind_curves
()
v1
=
[
2550
,
2200
,
250
]
# based on votes[2,]
v2
=
[
2600
,
2200
,
200
]
# based on votes[381,]
plot_rdp_curve_per_example
(
np
.
array
([
v1
,
v2
]),
(
100.
,
150.
))
# v2 = [2600, 2200, 200] # based on votes[381,]
plot_rdp_curve_per_example
(
np
.
array
([
v1
]),
(
100.
,
150.
))
plot_rdp_of_sigma
(
np
.
array
(
v1
),
20.
)
votes
=
np
.
load
(
fin_name
)
plot_rdp_total
(
votes
,
(
100.
,
150.
))
plot_rdp_total
(
votes
[:
12000
,
],
(
100.
,
150.
))
scatter_plot
(
votes
[:
6000
,
],
None
,
None
,
100
,
20
)
# w/o thresholding
scatter_plot
(
votes
[:
6000
,
],
3500
,
1500
,
100
,
20
)
# with thresholding
if
__name__
==
'__main__'
:
...
...
research/differential_privacy/pate/README.md
View file @
911a0d23
...
...
@@ -15,7 +15,7 @@ dataset.
The framework consists of _teachers_, the _student_ model, and the _aggregator_. The
teachers are models trained on disjoint subsets of the training datasets. The student
model has access to an insensitive (
i.e
., public) unlabelled dataset, which is labelled by
model has access to an insensitive (
e.g
., public) unlabelled dataset, which is labelled by
interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies
outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
refuses to answer.
...
...
@@ -57,13 +57,13 @@ $ python smooth_sensitivity_test.py
## Files in this directory
*
core.py
---
RDP privacy accountant for several vote aggregators (GNMax,
*
core.py
—
RDP privacy accountant for several vote aggregators (GNMax,
Threshold, Laplace).
*
smooth_sensitivity.py
---
Smooth sensitivity analysis for GNMax and
*
smooth_sensitivity.py
—
Smooth sensitivity analysis for GNMax and
Threshold mechanisms.
*
core_test.py and smooth_sensitivity_test.py
---
Unit tests for the
*
core_test.py and smooth_sensitivity_test.py
—
Unit tests for the
files above.
## Contact information
...
...
research/differential_privacy/pate/core_test.py
View file @
911a0d23
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Tests for
google3.experimental.brain.privacy.
pate.
pat
e."""
"""Tests for pate.
cor
e."""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
research/differential_privacy/pate/smooth_sensitivity_test.py
View file @
911a0d23
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Tests for
google3.experimental.brain.privacy.pate.pate_
smooth_sensitivity."""
"""Tests for
pate.
smooth_sensitivity."""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
research/differential_privacy/privacy_accountant/python/gaussian_moments.py
deleted
100644 → 0
View file @
bc0edaf8
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A standalone utility for computing the log moments.
The utility for computing the log moments. It consists of two methods.
compute_log_moment(q, sigma, T, lmbd) computes the log moment with sampling
probability q, noise sigma, order lmbd, and T steps. get_privacy_spent computes
delta (or eps) given log moments and eps (or delta).
Example use:
Suppose that we have run an algorithm with parameters, an array of
(q1, sigma1, T1) ... (qk, sigmak, Tk), and we wish to compute eps for a given
delta. The example code would be:
max_lmbd = 32
lmbds = xrange(1, max_lmbd + 1)
log_moments = []
for lmbd in lmbds:
log_moment = 0
for q, sigma, T in parameters:
log_moment += compute_log_moment(q, sigma, T, lmbd)
log_moments.append((lmbd, log_moment))
eps, delta = get_privacy_spent(log_moments, target_delta=delta)
To verify that the I1 >= I2 (see comments in GaussianMomentsAccountant in
accountant.py for the context), run the same loop above with verify=True
passed to compute_log_moment.
"""
from
__future__
import
print_function
import
math
import
sys
import
numpy
as
np
import
scipy.integrate
as
integrate
import
scipy.stats
from
six.moves
import
xrange
from
sympy.mpmath
import
mp
def
_to_np_float64
(
v
):
if
math
.
isnan
(
v
)
or
math
.
isinf
(
v
):
return
np
.
inf
return
np
.
float64
(
v
)
######################
# FLOAT64 ARITHMETIC #
######################
def
pdf_gauss
(
x
,
sigma
,
mean
=
0
):
return
scipy
.
stats
.
norm
.
pdf
(
x
,
loc
=
mean
,
scale
=
sigma
)
def
cropped_ratio
(
a
,
b
):
if
a
<
1E-50
and
b
<
1E-50
:
return
1.
else
:
return
a
/
b
def
integral_inf
(
fn
):
integral
,
_
=
integrate
.
quad
(
fn
,
-
np
.
inf
,
np
.
inf
)
return
integral
def
integral_bounded
(
fn
,
lb
,
ub
):
integral
,
_
=
integrate
.
quad
(
fn
,
lb
,
ub
)
return
integral
def
distributions
(
sigma
,
q
):
mu0
=
lambda
y
:
pdf_gauss
(
y
,
sigma
=
sigma
,
mean
=
0.0
)
mu1
=
lambda
y
:
pdf_gauss
(
y
,
sigma
=
sigma
,
mean
=
1.0
)
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
a_lambda_first_term_exact
=
0
a_lambda_second_term_exact
=
0
for
i
in
xrange
(
lmbd_int
+
1
):
coef_i
=
scipy
.
special
.
binom
(
lmbd_int
,
i
)
*
(
q
**
i
)
s1
,
s2
=
0
,
0
for
j
in
xrange
(
i
+
1
):
coef_j
=
scipy
.
special
.
binom
(
i
,
j
)
*
(
-
1
)
**
(
i
-
j
)
s1
+=
coef_j
*
np
.
exp
((
j
*
j
-
j
)
/
(
2.0
*
(
sigma
**
2
)))
s2
+=
coef_j
*
np
.
exp
((
j
*
j
+
j
)
/
(
2.0
*
(
sigma
**
2
)))
a_lambda_first_term_exact
+=
coef_i
*
s1
a_lambda_second_term_exact
+=
coef_i
*
s2
a_lambda_exact
=
((
1.0
-
q
)
*
a_lambda_first_term_exact
+
q
*
a_lambda_second_term_exact
)
if
verbose
:
print
(
"A: by binomial expansion {} = {} + {}"
.
format
(
a_lambda_exact
,
(
1.0
-
q
)
*
a_lambda_first_term_exact
,
q
*
a_lambda_second_term_exact
))
return
_to_np_float64
(
a_lambda_exact
)
def
compute_b
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
mu0
,
_
,
mu
=
distributions
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu0
(
z
),
mu
(
z
)),
lmbd
)
b_lambda
=
integral_inf
(
b_lambda_fn
)
m
=
sigma
**
2
*
(
np
.
log
((
2.
-
q
)
/
(
1.
-
q
))
+
1.
/
(
2
*
sigma
**
2
))
b_fn
=
lambda
z
:
(
np
.
power
(
mu0
(
z
)
/
mu
(
z
),
lmbd
)
-
np
.
power
(
mu
(
-
z
)
/
mu0
(
z
),
lmbd
))
if
verbose
:
print
(
"M ="
,
m
)
print
(
"f(-M) = {} f(M) = {}"
.
format
(
b_fn
(
-
m
),
b_fn
(
m
)))
assert
b_fn
(
-
m
)
<
0
and
b_fn
(
m
)
<
0
b_lambda_int1_fn
=
lambda
z
:
(
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu0
(
z
),
mu
(
z
)),
lmbd
))
b_lambda_int2_fn
=
lambda
z
:
(
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu
(
z
),
mu0
(
z
)),
lmbd
))
b_int1
=
integral_bounded
(
b_lambda_int1_fn
,
-
m
,
m
)
b_int2
=
integral_bounded
(
b_lambda_int2_fn
,
-
m
,
m
)
a_lambda_m1
=
compute_a
(
sigma
,
q
,
lmbd
-
1
)
b_bound
=
a_lambda_m1
+
b_int1
-
b_int2
if
verbose
:
print
(
"B: by numerical integration"
,
b_lambda
)
print
(
"B must be no more than "
,
b_bound
)
print
(
b_lambda
,
b_bound
)
return
_to_np_float64
(
b_lambda
)
###########################
# MULTIPRECISION ROUTINES #
###########################
def
pdf_gauss_mp
(
x
,
sigma
,
mean
):
return
mp
.
mpf
(
1.
)
/
mp
.
sqrt
(
mp
.
mpf
(
"2."
)
*
sigma
**
2
*
mp
.
pi
)
*
mp
.
exp
(
-
(
x
-
mean
)
**
2
/
(
mp
.
mpf
(
"2."
)
*
sigma
**
2
))
def
integral_inf_mp
(
fn
):
integral
,
_
=
mp
.
quad
(
fn
,
[
-
mp
.
inf
,
mp
.
inf
],
error
=
True
)
return
integral
def
integral_bounded_mp
(
fn
,
lb
,
ub
):
integral
,
_
=
mp
.
quad
(
fn
,
[
lb
,
ub
],
error
=
True
)
return
integral
def
distributions_mp
(
sigma
,
q
):
mu0
=
lambda
y
:
pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
mp
.
mpf
(
0
))
mu1
=
lambda
y
:
pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
mp
.
mpf
(
1
))
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a_mp
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
mu0
,
mu1
,
mu
=
distributions_mp
(
sigma
,
q
)
a_lambda_fn
=
lambda
z
:
mu
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda_first_term_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda_second_term_fn
=
lambda
z
:
mu1
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda
=
integral_inf_mp
(
a_lambda_fn
)
a_lambda_first_term
=
integral_inf_mp
(
a_lambda_first_term_fn
)
a_lambda_second_term
=
integral_inf_mp
(
a_lambda_second_term_fn
)
if
verbose
:
print
(
"A: by numerical integration {} = {} + {}"
.
format
(
a_lambda
,
(
1
-
q
)
*
a_lambda_first_term
,
q
*
a_lambda_second_term
))
return
_to_np_float64
(
a_lambda
)
def
compute_b_mp
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
mu0
,
_
,
mu
=
distributions_mp
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
b_lambda
=
integral_inf_mp
(
b_lambda_fn
)
m
=
sigma
**
2
*
(
mp
.
log
((
2
-
q
)
/
(
1
-
q
))
+
1
/
(
2
*
(
sigma
**
2
)))
b_fn
=
lambda
z
:
((
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
-
(
mu
(
-
z
)
/
mu0
(
z
))
**
lmbd_int
)
if
verbose
:
print
(
"M ="
,
m
)
print
(
"f(-M) = {} f(M) = {}"
.
format
(
b_fn
(
-
m
),
b_fn
(
m
)))
assert
b_fn
(
-
m
)
<
0
and
b_fn
(
m
)
<
0
b_lambda_int1_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
b_lambda_int2_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
b_int1
=
integral_bounded_mp
(
b_lambda_int1_fn
,
-
m
,
m
)
b_int2
=
integral_bounded_mp
(
b_lambda_int2_fn
,
-
m
,
m
)
a_lambda_m1
=
compute_a_mp
(
sigma
,
q
,
lmbd
-
1
)
b_bound
=
a_lambda_m1
+
b_int1
-
b_int2
if
verbose
:
print
(
"B by numerical integration"
,
b_lambda
)
print
(
"B must be no more than "
,
b_bound
)
assert
b_lambda
<
b_bound
+
1e-5
return
_to_np_float64
(
b_lambda
)
def
_compute_delta
(
log_moments
,
eps
):
"""Compute delta for given log_moments and eps.
Args:
log_moments: the log moments of privacy loss, in the form of pairs
of (moment_order, log_moment)
eps: the target epsilon.
Returns:
delta
"""
min_delta
=
1.0
for
moment_order
,
log_moment
in
log_moments
:
if
moment_order
==
0
:
continue
if
math
.
isinf
(
log_moment
)
or
math
.
isnan
(
log_moment
):
sys
.
stderr
.
write
(
"The %d-th order is inf or Nan
\n
"
%
moment_order
)
continue
if
log_moment
<
moment_order
*
eps
:
min_delta
=
min
(
min_delta
,
math
.
exp
(
log_moment
-
moment_order
*
eps
))
return
min_delta
def
_compute_eps
(
log_moments
,
delta
):
"""Compute epsilon for given log_moments and delta.
Args:
log_moments: the log moments of privacy loss, in the form of pairs
of (moment_order, log_moment)
delta: the target delta.
Returns:
epsilon
"""
min_eps
=
float
(
"inf"
)
for
moment_order
,
log_moment
in
log_moments
:
if
moment_order
==
0
:
continue
if
math
.
isinf
(
log_moment
)
or
math
.
isnan
(
log_moment
):
sys
.
stderr
.
write
(
"The %d-th order is inf or Nan
\n
"
%
moment_order
)
continue
min_eps
=
min
(
min_eps
,
(
log_moment
-
math
.
log
(
delta
))
/
moment_order
)
return
min_eps
def
compute_log_moment
(
q
,
sigma
,
steps
,
lmbd
,
verify
=
False
,
verbose
=
False
):
"""Compute the log moment of Gaussian mechanism for given parameters.
Args:
q: the sampling ratio.
sigma: the noise sigma.
steps: the number of steps.
lmbd: the moment order.
verify: if False, only compute the symbolic version. If True, computes
both symbolic and numerical solutions and verifies the results match.
verbose: if True, print out debug information.
Returns:
the log moment with type np.float64, could be np.inf.
"""
moment
=
compute_a
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
if
verify
:
mp
.
dps
=
50
moment_a_mp
=
compute_a_mp
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
moment_b_mp
=
compute_b_mp
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
np
.
testing
.
assert_allclose
(
moment
,
moment_a_mp
,
rtol
=
1e-10
)
if
not
np
.
isinf
(
moment_a_mp
):
# The following test fails for (1, np.inf)!
np
.
testing
.
assert_array_less
(
moment_b_mp
,
moment_a_mp
)
if
np
.
isinf
(
moment
):
return
np
.
inf
else
:
return
np
.
log
(
moment
)
*
steps
def
get_privacy_spent
(
log_moments
,
target_eps
=
None
,
target_delta
=
None
):
"""Compute delta (or eps) for given eps (or delta) from log moments.
Args:
log_moments: array of (moment_order, log_moment) pairs.
target_eps: if not None, the epsilon for which we would like to compute
corresponding delta value.
target_delta: if not None, the delta for which we would like to compute
corresponding epsilon value. Exactly one of target_eps and target_delta
is None.
Returns:
eps, delta pair
"""
assert
(
target_eps
is
None
)
^
(
target_delta
is
None
)
assert
not
((
target_eps
is
None
)
and
(
target_delta
is
None
))
if
target_eps
is
not
None
:
return
(
target_eps
,
_compute_delta
(
log_moments
,
target_eps
))
else
:
return
(
_compute_eps
(
log_moments
,
target_delta
),
target_delta
)
research/differential_privacy/privacy_accountant/python/rdp_accountant.py
0 → 100644
View file @
911a0d23
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RDP analysis of the Gaussian-with-sampling mechanism.
Functionality for computing Renyi differential privacy of an additive Gaussian
mechanism with sampling. Its public interface consists of two methods:
compute_rdp(q, sigma, T, order) computes RDP with sampling probability q,
noise sigma, T steps at a given order.
get_privacy_spent computes delta (or eps) given RDP and eps (or delta).
Example use:
Suppose that we have run an algorithm with parameters, an array of
(q1, sigma1, T1) ... (qk, sigma_k, Tk), and we wish to compute eps for a given
delta. The example code would be:
max_order = 32
orders = range(2, max_order + 1)
rdp = np.zeros_like(orders, dtype=float)
for q, sigma, T in parameters:
rdp += rdp_accountant.compute_rdp(q, sigma, T, orders)
eps, _, opt_order = rdp_accountant.get_privacy_spent(rdp, target_delta=delta)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
sys
from
absl
import
app
from
absl
import
flags
import
numpy
as
np
from
scipy
import
special
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_boolean
(
"rdp_verbose"
,
False
,
"Output intermediate results for RDP computation."
)
FLAGS
(
sys
.
argv
)
# Load the flags (including on import)
########################
# LOG-SPACE ARITHMETIC #
########################
def
_log_add
(
logx
,
logy
):
"""Add two numbers in the log space."""
a
,
b
=
min
(
logx
,
logy
),
max
(
logx
,
logy
)
if
a
==
-
np
.
inf
:
# adding 0
return
b
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return
math
.
log1p
(
math
.
exp
(
a
-
b
))
+
b
# log1p(x) = log(x + 1)
def
_log_sub
(
logx
,
logy
):
"""Subtract two numbers in the log space. Answer must be positive."""
if
logy
==
-
np
.
inf
:
# subtracting 0
return
logx
assert
logx
>
logy
try
:
# Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
return
math
.
log
(
math
.
expm1
(
logx
-
logy
))
+
logy
# expm1(x) = exp(x) - 1
except
OverflowError
:
return
logx
def
_log_print
(
logx
):
"""Pretty print."""
if
logx
<
math
.
log
(
sys
.
float_info
.
max
):
return
"{}"
.
format
(
math
.
exp
(
logx
))
else
:
return
"exp({})"
.
format
(
logx
)
def
_compute_log_a_int
(
q
,
sigma
,
alpha
):
"""Compute log(A_alpha) for integer alpha."""
assert
isinstance
(
alpha
,
(
int
,
long
))
# The first and second terms of A_alpha in the log space:
log_a1
,
log_a2
=
-
np
.
inf
,
-
np
.
inf
for
i
in
range
(
alpha
+
1
):
# Compute in the log space. Extra care needed for q = 0 or 1.
log_coef_i
=
math
.
log
(
special
.
binom
(
alpha
,
i
))
if
q
>
0
:
log_coef_i
+=
i
*
math
.
log
(
q
)
elif
i
>
0
:
continue
# The term is 0, skip the rest.
if
q
<
1.0
:
log_coef_i
+=
(
alpha
-
i
)
*
math
.
log
(
1
-
q
)
elif
i
<
alpha
:
continue
# The term is 0, skip the rest.
s1
=
log_coef_i
+
(
i
*
i
-
i
)
/
(
2.0
*
(
sigma
**
2
))
s2
=
log_coef_i
+
(
i
*
i
+
i
)
/
(
2.0
*
(
sigma
**
2
))
log_a1
=
_log_add
(
log_a1
,
s1
)
log_a2
=
_log_add
(
log_a2
,
s2
)
log_a
=
_log_add
(
math
.
log
(
1
-
q
)
+
log_a1
,
math
.
log
(
q
)
+
log_a2
)
if
FLAGS
.
rdp_verbose
:
print
(
"A: by binomial expansion {} = {} + {}"
.
format
(
_log_print
(
log_a
),
_log_print
(
math
.
log
(
1
-
q
)
+
log_a1
),
_log_print
(
math
.
log
(
q
)
+
log_a2
)))
return
float
(
log_a
)
def
_compute_log_a_frac
(
q
,
sigma
,
alpha
):
"""Compute log(A_alpha) for fractional alpha."""
# The four parts of A_alpha in the log space:
log_a11
,
log_a12
=
-
np
.
inf
,
-
np
.
inf
log_a21
,
log_a22
=
-
np
.
inf
,
-
np
.
inf
i
=
0
z0
,
_
=
_compute_zs
(
sigma
,
q
)
while
True
:
# do ... until loop
coef
=
special
.
binom
(
alpha
,
i
)
log_coef
=
math
.
log
(
abs
(
coef
))
j
=
alpha
-
i
log_t1
=
log_coef
+
i
*
math
.
log
(
q
)
+
j
*
math
.
log
(
1
-
q
)
log_t2
=
log_coef
+
j
*
math
.
log
(
q
)
+
i
*
math
.
log
(
1
-
q
)
log_e11
=
math
.
log
(.
5
)
+
_log_erfc
((
i
-
z0
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e12
=
math
.
log
(.
5
)
+
_log_erfc
((
z0
-
j
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e21
=
math
.
log
(.
5
)
+
_log_erfc
((
i
-
(
z0
-
1
))
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e22
=
math
.
log
(.
5
)
+
_log_erfc
((
z0
-
1
-
j
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_s11
=
log_t1
+
(
i
*
i
-
i
)
/
(
2
*
(
sigma
**
2
))
+
log_e11
log_s12
=
log_t2
+
(
j
*
j
-
j
)
/
(
2
*
(
sigma
**
2
))
+
log_e12
log_s21
=
log_t1
+
(
i
*
i
+
i
)
/
(
2
*
(
sigma
**
2
))
+
log_e21
log_s22
=
log_t2
+
(
j
*
j
+
j
)
/
(
2
*
(
sigma
**
2
))
+
log_e22
if
coef
>
0
:
log_a11
=
_log_add
(
log_a11
,
log_s11
)
log_a12
=
_log_add
(
log_a12
,
log_s12
)
log_a21
=
_log_add
(
log_a21
,
log_s21
)
log_a22
=
_log_add
(
log_a22
,
log_s22
)
else
:
log_a11
=
_log_sub
(
log_a11
,
log_s11
)
log_a12
=
_log_sub
(
log_a12
,
log_s12
)
log_a21
=
_log_sub
(
log_a21
,
log_s21
)
log_a22
=
_log_sub
(
log_a22
,
log_s22
)
i
+=
1
if
max
(
log_s11
,
log_s21
,
log_s21
,
log_s22
)
<
-
30
:
break
log_a
=
_log_add
(
math
.
log
(
1.
-
q
)
+
_log_add
(
log_a11
,
log_a12
),
math
.
log
(
q
)
+
_log_add
(
log_a21
,
log_a22
))
return
log_a
def
_compute_log_a
(
q
,
sigma
,
alpha
):
if
float
(
alpha
).
is_integer
():
return
_compute_log_a_int
(
q
,
sigma
,
int
(
alpha
))
else
:
return
_compute_log_a_frac
(
q
,
sigma
,
alpha
)
def
_log_erfc
(
x
):
# Can be replaced with a single call to log_ntdr if available:
# return np.log(2.) + special.log_ntdr(-x * 2**.5)
r
=
special
.
erfc
(
x
)
if
r
==
0.0
:
# Using the Laurent series at infinity for the tail of the erfc function:
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
# To verify in Mathematica:
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
return
(
-
math
.
log
(
math
.
pi
)
/
2
-
math
.
log
(
x
)
-
x
**
2
-
.
5
*
x
**
-
2
+
.
625
*
x
**
-
4
-
37.
/
24.
*
x
**
-
6
+
353.
/
64.
*
x
**
-
8
)
else
:
return
math
.
log
(
r
)
def
_compute_zs
(
sigma
,
q
):
z0
=
sigma
**
2
*
math
.
log
(
1
/
q
-
1
)
+
.
5
z1
=
min
(
z0
-
2
,
z0
/
2
)
return
z0
,
z1
def
_compute_log_b0
(
sigma
,
q
,
alpha
,
z1
):
"""Return an approximation to B0 or None if failed to converge."""
z0
,
_
=
_compute_zs
(
sigma
,
q
)
s
,
log_term
,
log_b0
,
k
,
sign
,
max_log_term
=
0
,
1.
,
0
,
0
,
1
,
-
np
.
inf
# Keep adding new terms until precision is no longer preserved.
# Don't stop on the negative.
while
(
k
<
alpha
or
(
log_term
>
max_log_term
-
36
and
log_term
>
-
30
)
or
sign
<
0.
):
log_b1
=
k
*
(
k
-
2
*
z0
)
/
(
2
*
sigma
**
2
)
log_b2
=
_log_erfc
((
k
-
z1
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_term
=
log_b0
+
log_b1
+
log_b2
max_log_term
=
max
(
max_log_term
,
log_term
)
s
+=
sign
*
math
.
exp
(
log_term
)
k
+=
1
# Maintain invariant: sign * exp(log_b0) = {-alpha choose k}
log_b0
+=
math
.
log
(
abs
(
-
alpha
-
k
+
1
))
-
math
.
log
(
k
)
sign
*=
-
1
if
s
==
0
:
# May happen if all terms are < 1e-324.
return
-
np
.
inf
if
s
<
0
or
math
.
log
(
s
)
<
max_log_term
-
25
:
# The series failed to converge.
return
None
c
=
math
.
log
(.
5
)
-
math
.
log
(
1
-
q
)
*
alpha
return
c
+
math
.
log
(
s
)
def
_bound_log_b1
(
sigma
,
q
,
alpha
,
z1
):
log_c
=
_log_add
(
math
.
log
(
1
-
q
),
math
.
log
(
q
)
+
(
2
*
z1
-
1.
)
/
(
2
*
sigma
**
2
))
return
math
.
log
(.
5
)
-
log_c
*
alpha
+
_log_erfc
(
z1
/
(
math
.
sqrt
(
2
)
*
sigma
))
def
_bound_log_b
(
q
,
sigma
,
alpha
):
"""Compute a numerically stable bound on log(B_alpha)."""
if
q
==
1.
:
# If the sampling rate is 100%, A and B are symmetric.
return
_compute_log_a
(
q
,
sigma
,
alpha
)
z0
,
z1
=
_compute_zs
(
sigma
,
q
)
log_b_bound
=
np
.
inf
# Puts a lower bound on B1: it cannot be less than its value at z0.
log_lb_b1
=
_bound_log_b1
(
sigma
,
q
,
alpha
,
z0
)
while
z0
-
z1
>
1e-3
:
m
=
(
z0
+
z1
)
/
2
log_b0
=
_compute_log_b0
(
sigma
,
q
,
alpha
,
m
)
if
log_b0
is
None
:
z0
=
m
continue
log_b1
=
_bound_log_b1
(
sigma
,
q
,
alpha
,
m
)
log_b_bound
=
min
(
log_b_bound
,
_log_add
(
log_b0
,
log_b1
))
log_b_min_bound
=
_log_add
(
log_b0
,
log_lb_b1
)
if
(
log_b_bound
<
0
or
log_b_min_bound
<
0
or
log_b_bound
>
log_b_min_bound
+
.
01
):
# If the bound is likely to be too loose, move z1 closer to z0 and repeat.
z1
=
m
else
:
break
return
log_b_bound
def
_log_bound_b_elementary
(
q
,
alpha
):
return
-
math
.
log
(
1
-
q
)
*
alpha
def
_compute_delta
(
orders
,
rdp
,
eps
):
"""Compute delta given an RDP curve and target epsilon.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
eps: The target epsilon.
Returns:
Pair of (delta, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec
=
np
.
atleast_1d
(
orders
)
rdp_vec
=
np
.
atleast_1d
(
rdp
)
if
len
(
orders_vec
)
!=
len
(
rdp_vec
):
raise
ValueError
(
"Input lists must have the same length."
)
deltas
=
np
.
exp
((
rdp_vec
-
eps
)
*
(
orders_vec
-
1
))
idx_opt
=
np
.
argmin
(
deltas
)
return
min
(
deltas
[
idx_opt
],
1.
),
orders_vec
[
idx_opt
]
def
_compute_eps
(
orders
,
rdp
,
delta
):
"""Compute epsilon given an RDP curve and target delta.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
delta: The target delta.
Returns:
Pair of (eps, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec
=
np
.
atleast_1d
(
orders
)
rdp_vec
=
np
.
atleast_1d
(
rdp
)
if
len
(
orders_vec
)
!=
len
(
rdp_vec
):
raise
ValueError
(
"Input lists must have the same length."
)
eps
=
rdp_vec
-
math
.
log
(
delta
)
/
(
orders_vec
-
1
)
idx_opt
=
np
.
nanargmin
(
eps
)
# Ignore NaNs
return
eps
[
idx_opt
],
orders_vec
[
idx_opt
]
def
_compute_rdp
(
q
,
sigma
,
steps
,
alpha
):
log_moment_a
=
_compute_log_a
(
q
,
sigma
,
alpha
-
1
)
log_bound_b
=
_log_bound_b_elementary
(
q
,
alpha
-
1
)
# does not require sigma
if
log_bound_b
<
log_moment_a
:
if
FLAGS
.
rdp_verbose
:
print
(
"Elementary bound suffices : {} < {}"
.
format
(
_log_print
(
log_bound_b
),
_log_print
(
log_moment_a
)))
else
:
log_bound_b2
=
_bound_log_b
(
q
,
sigma
,
alpha
-
1
)
if
math
.
isnan
(
log_bound_b2
):
if
FLAGS
.
rdp_verbose
:
print
(
"B bound failed to converge"
)
else
:
if
FLAGS
.
rdp_verbose
and
(
log_bound_b2
<
log_bound_b
):
print
(
"Elementary bound is stronger: {} < {}"
.
format
(
_log_print
(
log_bound_b2
),
_log_print
(
log_bound_b
)))
log_bound_b
=
min
(
log_bound_b
,
log_bound_b2
)
return
max
(
log_moment_a
,
log_bound_b
)
*
steps
/
(
alpha
-
1
)
def
compute_rdp
(
q
,
sigma
,
steps
,
orders
):
"""Compute RDP of Gaussian mechanism with sampling for given parameters.
Args:
q: The sampling ratio.
sigma: The noise sigma.
steps: The number of steps.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders, can be np.inf.
"""
if
np
.
isscalar
(
orders
):
return
_compute_rdp
(
q
,
sigma
,
steps
,
orders
)
else
:
rdp
=
np
.
zeros_like
(
orders
,
dtype
=
float
)
for
i
,
order
in
enumerate
(
orders
):
rdp
[
i
]
=
_compute_rdp
(
q
,
sigma
,
steps
,
order
)
return
rdp
def
get_privacy_spent
(
orders
,
rdp
,
target_eps
=
None
,
target_delta
=
None
):
"""Compute delta (or eps) for given eps (or delta) from the RDP curve.
Args:
orders: An array (or a scalar) of RDP orders.
rdp: An array of RDP values.
target_eps: If not None, the epsilon for which we compute the corresponding
delta.
target_delta: If not None, the delta for which we compute the corresponding
epsilon. Exactly one of target_eps and target_delta must be None.
Returns:
eps, delta, opt_order.
Raises:
ValueError: If target_eps and target_delta are messed up.
"""
if
target_eps
is
None
and
target_delta
is
None
:
raise
ValueError
(
"Exactly one out of eps and delta must be None. (Both are)."
)
if
target_eps
is
not
None
and
target_delta
is
not
None
:
raise
ValueError
(
"Exactly one out of eps and delta must be None. (None is)."
)
if
target_eps
is
not
None
:
delta
,
opt_order
=
_compute_delta
(
orders
,
rdp
,
target_eps
)
return
target_eps
,
delta
,
opt_order
else
:
eps
,
opt_order
=
_compute_eps
(
orders
,
rdp
,
target_delta
)
return
eps
,
target_delta
,
opt_order
def
main
(
_
):
pass
if
__name__
==
"__main__"
:
app
.
run
(
main
)
research/differential_privacy/privacy_accountant/python/rdp_accountant_test.py
0 → 100644
View file @
911a0d23
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for rdp_accountant.py."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
absltest
import
mpmath
as
mp
import
numpy
as
np
import
rdp_accountant
class
TestGaussianMoments
(
absltest
.
TestCase
):
##############################
# MULTI-PRECISION ARITHMETIC #
##############################
def
_pdf_gauss_mp
(
self
,
x
,
sigma
,
mean
):
return
1.
/
mp
.
sqrt
(
2.
*
sigma
**
2
*
mp
.
pi
)
*
mp
.
exp
(
-
(
x
-
mean
)
**
2
/
(
2.
*
sigma
**
2
))
def
_integral_inf_mp
(
self
,
fn
):
integral
,
_
=
mp
.
quad
(
fn
,
[
-
mp
.
inf
,
mp
.
inf
],
error
=
True
,
maxdegree
=
7
)
# maxdegree = 6 is not enough
return
integral
def
_integral_bounded_mp
(
self
,
fn
,
lb
,
ub
):
integral
,
_
=
mp
.
quad
(
fn
,
[
lb
,
ub
],
error
=
True
)
return
integral
def
_distributions_mp
(
self
,
sigma
,
q
):
mu0
=
lambda
y
:
self
.
_pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
0
)
mu1
=
lambda
y
:
self
.
_pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
1.
)
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a_mp
(
self
,
sigma
,
q
,
order
,
verbose
=
False
):
"""Compute A_lambda for arbitrary lambda by numerical integration."""
mp
.
dps
=
100
mu0
,
mu1
,
mu
=
self
.
_distributions_mp
(
sigma
,
q
)
a_lambda_fn
=
lambda
z
:
mu
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda
=
self
.
_integral_inf_mp
(
a_lambda_fn
)
if
verbose
:
a_lambda_first_term_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda_second_term_fn
=
lambda
z
:
mu1
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda_first_term
=
self
.
_integral_inf_mp
(
a_lambda_first_term_fn
)
a_lambda_second_term
=
self
.
_integral_inf_mp
(
a_lambda_second_term_fn
)
print
(
"A: by numerical integration {} = {} + {}"
.
format
(
a_lambda
,
(
1
-
q
)
*
a_lambda_first_term
,
q
*
a_lambda_second_term
))
return
a_lambda
def
compute_b_mp
(
self
,
sigma
,
q
,
order
,
verbose
=
False
):
"""Compute B_lambda for arbitrary lambda by numerical integration."""
mu0
,
_
,
mu
=
self
.
_distributions_mp
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
order
b_numeric
=
self
.
_integral_inf_mp
(
b_lambda_fn
)
if
verbose
:
_
,
z1
=
rdp_accountant
.
_compute_zs
(
sigma
,
q
)
print
(
"z1 = "
,
z1
)
print
(
"x in the Taylor series = "
,
q
/
(
1
-
q
)
*
np
.
exp
(
(
2
*
z1
-
1
)
/
(
2
*
sigma
**
2
)))
b0_numeric
=
self
.
_integral_bounded_mp
(
b_lambda_fn
,
-
np
.
inf
,
z1
)
b1_numeric
=
self
.
_integral_bounded_mp
(
b_lambda_fn
,
z1
,
+
np
.
inf
)
print
(
"B: numerically {} = {} + {}"
.
format
(
b_numeric
,
b0_numeric
,
b1_numeric
))
return
float
(
b_numeric
)
def
_compute_rdp_mp
(
self
,
q
,
sigma
,
order
):
log_a_mp
=
float
(
mp
.
log
(
self
.
compute_a_mp
(
sigma
,
q
,
order
)))
log_b_mp
=
float
(
mp
.
log
(
self
.
compute_b_mp
(
sigma
,
q
,
order
)))
return
log_a_mp
,
log_b_mp
# TEST ROUTINES
def
_almost_equal
(
self
,
a
,
b
,
rtol
,
atol
=
0
):
# Analogue of np.testing.assert_allclose(a, b, rtol, atol).
self
.
assertBetween
(
a
,
b
*
(
1
-
rtol
)
-
atol
,
b
*
(
1
+
rtol
)
+
atol
)
def
_compare_bounds
(
self
,
q
,
sigma
,
order
):
log_a_mp
,
log_b_mp
=
self
.
_compute_rdp_mp
(
q
,
sigma
,
order
)
log_a
=
rdp_accountant
.
_compute_log_a
(
q
,
sigma
,
order
)
log_bound_b
=
rdp_accountant
.
_bound_log_b
(
q
,
sigma
,
order
)
if
log_a_mp
<
1000
and
log_a_mp
>
1e-6
:
self
.
_almost_equal
(
log_a
,
log_a_mp
,
rtol
=
1e-6
)
else
:
# be more tolerant for _very_ large or small logarithms
self
.
_almost_equal
(
log_a
,
log_a_mp
,
rtol
=
1e-3
,
atol
=
1e-14
)
if
np
.
isfinite
(
log_bound_b
):
# Ignore divergence between the bound and exact value of B if
# they don't matter anyway (bound on A is larger) or q > .5
if
log_bound_b
>
log_a
and
q
<=
.
5
:
self
.
_almost_equal
(
log_b_mp
,
log_bound_b
,
rtol
=
1e-6
,
atol
=
1e-14
)
if
np
.
isfinite
(
log_a_mp
)
and
np
.
isfinite
(
log_b_mp
):
# We hypothesize that this assertion is always true; no proof yet.
self
.
assertLessEqual
(
log_b_mp
,
log_a_mp
+
1e-6
)
def
test_compute_rdp
(
self
):
rdp_scalar
=
rdp_accountant
.
compute_rdp
(
0.1
,
2
,
10
,
5
)
self
.
assertAlmostEqual
(
rdp_scalar
,
0.07737
,
places
=
5
)
rdp_vec
=
rdp_accountant
.
compute_rdp
(
0.01
,
2.5
,
50
,
[
1.5
,
2.5
,
5
,
50
,
100
])
correct
=
[
0.00065
,
0.001085
,
0.00218075
,
0.023846
,
167.416307
]
for
i
in
range
(
len
(
rdp_vec
)):
self
.
assertAlmostEqual
(
rdp_vec
[
i
],
correct
[
i
],
places
=
5
)
def
test_compare_with_mp
(
self
):
# Compare the cheap computation with an expensive, multi-precision
# computation for a few parameters. Takes a few seconds.
self
.
_compare_bounds
(
q
=
.
01
,
sigma
=
.
1
,
order
=
.
5
)
self
.
_compare_bounds
(
q
=
.
1
,
sigma
=
1.
,
order
=
5
)
self
.
_compare_bounds
(
q
=
.
5
,
sigma
=
2.
,
order
=
32.5
)
for
q
in
(
1e-6
,
.
1
,
.
999
):
for
sigma
in
(.
1
,
10.
,
100.
):
for
order
in
(
1.01
,
2
,
255.9
,
256
):
self
.
_compare_bounds
(
q
,
sigma
,
order
)
def
test_get_privacy_spent
(
self
):
orders
=
range
(
2
,
33
)
rdp
=
rdp_accountant
.
compute_rdp
(
0.01
,
4
,
10000
,
orders
)
eps
,
delta
,
opt_order
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_delta
=
1e-5
)
self
.
assertAlmostEqual
(
eps
,
1.258575
,
places
=
5
)
self
.
assertEqual
(
opt_order
,
20
)
eps
,
delta
,
_
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_eps
=
1.258575
)
self
.
assertAlmostEqual
(
delta
,
1e-5
)
def
test_compute_privacy_loss
(
self
):
parameters
=
[(
0.01
,
4
,
10000
),
(
0.1
,
2
,
100
)]
delta
=
1e-5
orders
=
(
1.25
,
1.5
,
1.75
,
2.
,
2.5
,
3.
,
4.
,
5.
,
6.
,
7.
,
8.
,
10.
,
12.
,
14.
,
16.
,
20.
,
24.
,
28.
,
32.
,
64.
,
256.
)
rdp
=
np
.
zeros_like
(
orders
,
dtype
=
float
)
for
q
,
sigma
,
steps
in
parameters
:
rdp
+=
rdp_accountant
.
compute_rdp
(
q
,
sigma
,
steps
,
orders
)
eps
,
delta
,
opt_order
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_delta
=
delta
)
self
.
assertAlmostEqual
(
eps
,
3.276237
,
places
=
5
)
self
.
assertEqual
(
opt_order
,
8
)
if
__name__
==
"__main__"
:
absltest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment