Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
911a0d23
Unverified
Commit
911a0d23
authored
May 08, 2018
by
Ilya Mironov
Committed by
GitHub
May 08, 2018
Browse files
Merge pull request #4197 from ilyamironov/master
Updates to differential_privacy/pate following the Banff workshop.
parents
bc0edaf8
004dd361
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
874 additions
and
340 deletions
+874
-340
research/differential_privacy/pate/ICLR2018/README.md
research/differential_privacy/pate/ICLR2018/README.md
+3
-1
research/differential_privacy/pate/ICLR2018/plot_partition.py
...arch/differential_privacy/pate/ICLR2018/plot_partition.py
+9
-7
research/differential_privacy/pate/ICLR2018/plots_for_slides.py
...ch/differential_privacy/pate/ICLR2018/plots_for_slides.py
+283
-0
research/differential_privacy/pate/README.md
research/differential_privacy/pate/README.md
+4
-4
research/differential_privacy/pate/core_test.py
research/differential_privacy/pate/core_test.py
+1
-1
research/differential_privacy/pate/smooth_sensitivity_test.py
...arch/differential_privacy/pate/smooth_sensitivity_test.py
+1
-1
research/differential_privacy/privacy_accountant/python/gaussian_moments.py
...ial_privacy/privacy_accountant/python/gaussian_moments.py
+0
-326
research/differential_privacy/privacy_accountant/python/rdp_accountant.py
...ntial_privacy/privacy_accountant/python/rdp_accountant.py
+400
-0
research/differential_privacy/privacy_accountant/python/rdp_accountant_test.py
..._privacy/privacy_accountant/python/rdp_accountant_test.py
+173
-0
No files found.
research/differential_privacy/pate/ICLR2018/README.md
View file @
911a0d23
...
@@ -51,9 +51,11 @@ The output is written to the console.
...
@@ -51,9 +51,11 @@ The output is written to the console.
*
plot_partition.py
—
Script for producing partition.pdf, a detailed breakdown of privacy
*
plot_partition.py
—
Script for producing partition.pdf, a detailed breakdown of privacy
costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours).
*
rdp_flow.py and plot_ls_q.py are currently not used.
*
plots_for_slides.py
—
Script for producing several plots for the slide deck.
*
download.py
—
Utility script for populating the data/ directory.
*
download.py
—
Utility script for populating the data/ directory.
*
plot_ls_q.py is not used.
All Python files take flags. Run script_name.py --help for help on flags.
All Python files take flags. Run script_name.py --help for help on flags.
research/differential_privacy/pate/ICLR2018/plot_partition.py
View file @
911a0d23
...
@@ -186,7 +186,7 @@ def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
...
@@ -186,7 +186,7 @@ def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta):
ss
=
rdp_ss
[
order_idx
],
ss
=
rdp_ss
[
order_idx
],
delta
=-
math
.
log
(
delta
)
/
(
order_opt
[
i
]
-
1
))
delta
=-
math
.
log
(
delta
)
/
(
order_opt
[
i
]
-
1
))
ss_std_opt
[
i
]
=
ss_std
[
order_idx
]
ss_std_opt
[
i
]
=
ss_std
[
order_idx
]
if
i
>
0
and
(
i
+
1
)
%
1
0
==
0
:
if
i
>
0
and
(
i
+
1
)
%
1
==
0
:
print
(
'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
print
(
'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} '
'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, '
'step2 = {:.3f}, ss = {:.3f}'
.
format
(
'step2 = {:.3f}, ss = {:.3f}'
.
format
(
...
@@ -292,6 +292,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
...
@@ -292,6 +292,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
fig
,
ax
=
plt
.
subplots
()
fig
,
ax
=
plt
.
subplots
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
fig
.
set_figwidth
(
4.7
)
fig
.
patch
.
set_alpha
(
0
)
l1
=
ax
.
plot
(
l1
=
ax
.
plot
(
x
,
y3
,
color
=
'b'
,
ls
=
'-'
,
label
=
r
'Total privacy cost'
,
linewidth
=
1
).
pop
()
x
,
y3
,
color
=
'b'
,
ls
=
'-'
,
label
=
r
'Total privacy cost'
,
linewidth
=
1
).
pop
()
...
@@ -311,8 +313,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
...
@@ -311,8 +313,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
plt
.
xlim
([
0
,
xlim
])
plt
.
xlim
([
0
,
xlim
])
ax
.
set_ylim
([
0
,
3.
])
ax
.
set_ylim
([
0
,
3.
])
ax
.
set_xlabel
(
'Number of queries answered'
,
fontsize
=
1
0
)
ax
.
set_xlabel
(
'Number of queries answered'
,
fontsize
=
1
6
)
ax
.
set_ylabel
(
r
'Privacy cost $\varepsilon$ at $\delta=10^{-8}$'
,
fontsize
=
1
0
)
ax
.
set_ylabel
(
r
'Privacy cost $\varepsilon$ at $\delta=10^{-8}$'
,
fontsize
=
1
6
)
# Merging legends.
# Merging legends.
if
print_order
:
if
print_order
:
...
@@ -321,11 +323,11 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
...
@@ -321,11 +323,11 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
x
,
y_right
,
'r'
,
ls
=
'-'
,
label
=
r
'Optimal order'
,
linewidth
=
5
,
x
,
y_right
,
'r'
,
ls
=
'-'
,
label
=
r
'Optimal order'
,
linewidth
=
5
,
alpha
=
.
5
).
pop
()
alpha
=
.
5
).
pop
()
ax2
.
grid
(
False
)
ax2
.
grid
(
False
)
ax2
.
set_ylabel
(
r
'Optimal Renyi order'
,
fontsize
=
16
)
#
ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
ax2
.
set_ylim
([
0
,
200.
])
ax2
.
set_ylim
([
0
,
200.
])
ax
.
legend
((
l1
,
l2
),
(
l1
.
get_label
(),
l2
.
get_label
()),
loc
=
0
,
fontsize
=
13
)
#
ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
ax
.
tick_params
(
labelsize
=
1
0
)
ax
.
tick_params
(
labelsize
=
1
4
)
plot_filename
=
os
.
path
.
join
(
figures_dir
,
'partition.pdf'
)
plot_filename
=
os
.
path
.
join
(
figures_dir
,
'partition.pdf'
)
print
(
'Saving the graph to '
+
plot_filename
)
print
(
'Saving the graph to '
+
plot_filename
)
fig
.
savefig
(
plot_filename
,
bbox_inches
=
'tight'
,
dpi
=
800
)
fig
.
savefig
(
plot_filename
,
bbox_inches
=
'tight'
,
dpi
=
800
)
...
@@ -387,7 +389,7 @@ def main(argv):
...
@@ -387,7 +389,7 @@ def main(argv):
figures_dir
=
os
.
path
.
expanduser
(
FLAGS
.
figures_dir
)
figures_dir
=
os
.
path
.
expanduser
(
FLAGS
.
figures_dir
)
plot_comparison
(
figures_dir
,
simple_ind
,
conf_ind
,
simple_dep
,
conf_dep
)
plot_comparison
(
figures_dir
,
simple_ind
,
conf_ind
,
simple_dep
,
conf_dep
)
plot_partition
(
figures_dir
,
conf_dep
,
Fals
e
)
plot_partition
(
figures_dir
,
conf_dep
,
Tru
e
)
plt
.
close
(
'all'
)
plt
.
close
(
'all'
)
...
...
research/differential_privacy/pate/ICLR2018/
rdp_flow
.py
→
research/differential_privacy/pate/ICLR2018/
plots_for_slides
.py
View file @
911a0d23
...
@@ -13,9 +13,7 @@
...
@@ -13,9 +13,7 @@
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Plots two graphs illustrating cost of privacy per answered query.
"""Plots graphs for the slide deck.
PRESENTLY NOT USED.
A script in support of the PATE2 paper. The input is a file containing a numpy
A script in support of the PATE2 paper. The input is a file containing a numpy
array of votes, one query per row, one class per column. Ex:
array of votes, one query per row, one class per column. Ex:
...
@@ -23,7 +21,7 @@ array of votes, one query per row, one class per column. Ex:
...
@@ -23,7 +21,7 @@ array of votes, one query per row, one class per column. Ex:
31, 16, ..., 0
31, 16, ..., 0
...
...
0, 86, ..., 438
0, 86, ..., 438
The output
is written to a specified directory and consists of two pdf files
.
The output
graphs are visualized using the TkAgg backend
.
"""
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
@@ -35,43 +33,51 @@ import sys
...
@@ -35,43 +33,51 @@ import sys
sys
.
path
.
append
(
'..'
)
# Main modules reside in the parent directory.
sys
.
path
.
append
(
'..'
)
# Main modules reside in the parent directory.
from
absl
import
app
from
absl
import
app
from
absl
import
flags
from
absl
import
flags
import
matplotlib
import
matplotlib
matplotlib
.
use
(
'TkAgg'
)
matplotlib
.
use
(
'TkAgg'
)
import
matplotlib.pyplot
as
plt
# pylint: disable=g-import-not-at-top
import
matplotlib.pyplot
as
plt
# pylint: disable=g-import-not-at-top
import
numpy
as
np
import
numpy
as
np
import
core
as
pate
import
core
as
pate
import
random
plt
.
style
.
use
(
'ggplot'
)
plt
.
style
.
use
(
'ggplot'
)
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'counts_file'
,
''
,
'Counts file.'
)
flags
.
DEFINE_string
(
'counts_file'
,
None
,
'Counts file.'
)
flags
.
DEFINE_string
(
'figures_dir'
,
''
,
'Path where figures are written to.'
)
flags
.
DEFINE_string
(
'figures_dir'
,
''
,
'Path where figures are written to.'
)
flags
.
DEFINE_boolean
(
'transparent'
,
False
,
'Set background to transparent.'
)
flags
.
mark_flag_as_required
(
'counts_file'
)
def
plot_rdp_curve_per_example
(
votes
,
sigmas
):
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.5
def
setup_plot
():
fig
,
ax
=
plt
.
subplots
()
fig
,
ax
=
plt
.
subplots
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
fig
.
set_figwidth
(
4.7
)
styles
=
[
':'
,
'-'
]
if
FLAGS
.
transparent
:
labels
=
[
'ex1'
,
'ex2'
]
fig
.
patch
.
set_alpha
(
0
)
return
fig
,
ax
for
i
in
xrange
(
votes
.
shape
[
0
]):
def
plot_rdp_curve_per_example
(
votes
,
sigmas
):
print
(
sorted
(
votes
[
i
,],
reverse
=
True
)[:
10
])
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.001
fig
,
ax
=
setup_plot
()
for
i
in
range
(
votes
.
shape
[
0
]):
for
sigma
in
sigmas
:
for
sigma
in
sigmas
:
logq
=
pate
.
compute_logq_gaussian
(
votes
[
i
,],
sigma
)
logq
=
pate
.
compute_logq_gaussian
(
votes
[
i
,],
sigma
)
rdp
=
pate
.
rdp_gaussian
(
logq
,
sigma
,
orders
)
rdp
=
pate
.
rdp_gaussian
(
logq
,
sigma
,
orders
)
ax
.
plot
(
ax
.
plot
(
orders
,
orders
,
rdp
,
rdp
,
label
=
r
'{} $\sigma$={}'
.
format
(
labels
[
i
],
int
(
sigma
))
,
alpha
=
1.
,
l
inestyle
=
styles
[
i
]
,
l
abel
=
r
'Data-dependent bound, $\sigma$={}'
.
format
(
int
(
sigma
))
,
linewidth
=
5
)
linewidth
=
5
)
for
sigma
in
sigmas
:
for
sigma
in
sigmas
:
...
@@ -79,23 +85,47 @@ def plot_rdp_curve_per_example(votes, sigmas):
...
@@ -79,23 +85,47 @@ def plot_rdp_curve_per_example(votes, sigmas):
orders
,
orders
,
pate
.
rdp_data_independent_gaussian
(
sigma
,
orders
),
pate
.
rdp_data_independent_gaussian
(
sigma
,
orders
),
alpha
=
.
3
,
alpha
=
.
3
,
label
=
r
'Data-ind bound $\sigma$={}'
.
format
(
int
(
sigma
)),
label
=
r
'Data-ind
ependent
bound
,
$\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
10
)
linewidth
=
10
)
plt
.
yticks
([
0
,
.
01
])
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
xlabel
(
r
'Order $\lambda$'
,
fontsize
=
16
)
plt
.
ylim
(
ymin
=
0
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\lambda$'
,
fontsize
=
16
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0025
,
.
005
,
.
0075
,
.
01
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
ax
.
tick_params
(
labelsize
=
14
)
fout_name
=
os
.
path
.
join
(
FLAGS
.
figures_dir
,
'rdp_flow1.pdf'
)
print
(
'Saving the graph to '
+
fout_name
)
fig
.
savefig
(
fout_name
,
bbox_inches
=
'tight'
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
plt
.
show
()
def
plot_rdp_of_sigma
(
v
,
order
):
sigmas
=
np
.
linspace
(
1.
,
1000.
,
endpoint
=
True
,
num
=
1000
)
fig
,
ax
=
setup_plot
()
y
=
np
.
zeros
(
len
(
sigmas
))
for
i
,
sigma
in
enumerate
(
sigmas
):
logq
=
pate
.
compute_logq_gaussian
(
v
,
sigma
)
y
[
i
]
=
pate
.
rdp_gaussian
(
logq
,
sigma
,
order
)
ax
.
plot
(
sigmas
,
y
,
alpha
=
.
8
,
linewidth
=
5
)
plt
.
xlim
(
xmin
=
1
,
xmax
=
1000
)
plt
.
ylim
(
ymin
=
0
)
# plt.yticks([0, .0004, .0008, .0012])
ax
.
tick_params
(
labelleft
=
'off'
)
plt
.
xlabel
(
r
'Noise $\sigma$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP at order $\alpha={}$'
.
format
(
order
),
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
# plt.legend(loc=0, fontsize=13)
plt
.
show
()
def
compute_rdp_curve
(
votes
,
threshold
,
sigma1
,
sigma2
,
orders
,
def
compute_rdp_curve
(
votes
,
threshold
,
sigma1
,
sigma2
,
orders
,
target_answered
):
target_answered
):
rdp_cum
=
np
.
zeros
(
len
(
orders
))
rdp_cum
=
np
.
zeros
(
len
(
orders
))
answered
=
0
answered
=
0
for
i
,
v
in
enumerate
(
votes
):
for
i
,
v
in
enumerate
(
votes
):
...
@@ -115,46 +145,138 @@ def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
...
@@ -115,46 +145,138 @@ def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
def
plot_rdp_total
(
votes
,
sigmas
):
def
plot_rdp_total
(
votes
,
sigmas
):
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
100
)
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
100
)
orders
[
0
]
=
1.
5
orders
[
0
]
=
1.
1
fig
,
ax
=
plt
.
sub
plot
s
()
fig
,
ax
=
setup_
plot
()
fig
.
set_figheight
(
4.5
)
fig
.
set_figwidth
(
4.7
)
target_answered
=
2000
for
sigma
in
sigmas
:
for
sigma
in
sigmas
:
rdp
=
compute_rdp_curve
(
votes
,
5000
,
1000
,
sigma
,
orders
,
2000
)
rdp
=
compute_rdp_curve
(
votes
,
5000
,
1000
,
sigma
,
orders
,
target_answered
)
ax
.
plot
(
ax
.
plot
(
orders
,
orders
,
rdp
,
rdp
,
alpha
=
.
8
,
alpha
=
.
8
,
label
=
r
'$\sigma$={}'
.
format
(
int
(
sigma
)),
label
=
r
'
Data-dependent bound,
$\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
5
)
linewidth
=
5
)
plt
.
xlabel
(
r
'Order $\lambda$'
,
fontsize
=
16
)
# for sigma in sigmas:
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\lambda$'
,
fontsize
=
16
)
# ax.plot(
# orders,
# target_answered * pate.rdp_data_independent_gaussian(sigma, orders),
# alpha=.3,
# label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
# linewidth=10)
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0005
,
.
001
,
.
0015
,
.
002
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
def
plot_data_ind_curve
():
fig
,
ax
=
setup_plot
()
orders
=
np
.
linspace
(
1.
,
10.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.01
ax
.
plot
(
orders
,
pate
.
rdp_data_independent_gaussian
(
1.
,
orders
),
alpha
=
.
5
,
color
=
'gray'
,
linewidth
=
10
)
# plt.yticks([])
plt
.
xlim
(
xmin
=
1
,
xmax
=
10
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
3
,
5
,
7
,
9
])
ax
.
tick_params
(
labelsize
=
14
)
plt
.
show
()
def
plot_two_data_ind_curves
():
orders
=
np
.
linspace
(
1.
,
100.
,
endpoint
=
True
,
num
=
1000
)
orders
[
0
]
=
1.001
fig
,
ax
=
setup_plot
()
for
sigma
in
[
100
,
150
]:
ax
.
plot
(
orders
,
pate
.
rdp_data_independent_gaussian
(
sigma
,
orders
),
alpha
=
.
3
,
label
=
r
'Data-independent bound, $\sigma$={}'
.
format
(
int
(
sigma
)),
linewidth
=
10
)
plt
.
xlim
(
xmin
=
1
,
xmax
=
100
)
plt
.
ylim
(
ymin
=
0
)
plt
.
xticks
([
1
,
20
,
40
,
60
,
80
,
100
])
plt
.
yticks
([
0
,
.
0025
,
.
005
,
.
0075
,
.
01
])
plt
.
xlabel
(
r
'Order $\alpha$'
,
fontsize
=
16
)
plt
.
ylabel
(
r
'RDP value $\varepsilon$ at $\alpha$'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
ax
.
tick_params
(
labelsize
=
14
)
fout_name
=
os
.
path
.
join
(
FLAGS
.
figures_dir
,
'rdp_flow2.pdf'
)
print
(
'Saving the graph to '
+
fout_name
)
fig
.
savefig
(
fout_name
,
bbox_inches
=
'tight'
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
legend
(
loc
=
0
,
fontsize
=
13
)
plt
.
show
()
plt
.
show
()
def
scatter_plot
(
votes
,
threshold
,
sigma1
,
sigma2
,
order
):
fig
,
ax
=
setup_plot
()
x
=
[]
y
=
[]
for
i
,
v
in
enumerate
(
votes
):
if
threshold
is
not
None
and
sigma1
is
not
None
:
q_step1
=
math
.
exp
(
pate
.
compute_logpr_answered
(
threshold
,
sigma1
,
v
))
else
:
q_step1
=
1.
if
random
.
random
()
<
q_step1
:
logq_step2
=
pate
.
compute_logq_gaussian
(
v
,
sigma2
)
x
.
append
(
max
(
v
))
y
.
append
(
pate
.
rdp_gaussian
(
logq_step2
,
sigma2
,
order
))
print
(
'Selected {} queries.'
.
format
(
len
(
x
)))
# Plot the data-independent curve:
# data_ind = pate.rdp_data_independent_gaussian(sigma, order)
# plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
ax
.
set_yscale
(
'log'
)
plt
.
xlim
(
xmin
=
0
,
xmax
=
5000
)
plt
.
ylim
(
ymin
=
1e-300
,
ymax
=
1
)
plt
.
yticks
([
1
,
1e-100
,
1e-200
,
1e-300
])
plt
.
scatter
(
x
,
y
,
s
=
1
,
alpha
=
0.5
)
plt
.
ylabel
(
r
'RDP at $\alpha={}$'
.
format
(
order
),
fontsize
=
16
)
plt
.
xlabel
(
r
'max count'
,
fontsize
=
16
)
ax
.
tick_params
(
labelsize
=
14
)
plt
.
show
()
def
main
(
argv
):
def
main
(
argv
):
del
argv
# Unused.
del
argv
# Unused.
fin_name
=
os
.
path
.
expanduser
(
FLAGS
.
counts_file
)
fin_name
=
os
.
path
.
expanduser
(
FLAGS
.
counts_file
)
print
(
'Reading raw votes from '
+
fin_name
)
print
(
'Reading raw votes from '
+
fin_name
)
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
votes
=
np
.
load
(
fin_name
)
plot_data_ind_curve
(
)
votes
=
votes
[:
12000
,]
# truncate to 4000 samples
plot_two_data_ind_curves
()
v1
=
[
2550
,
2200
,
250
]
# based on votes[2,]
v1
=
[
2550
,
2200
,
250
]
# based on votes[2,]
v2
=
[
2600
,
2200
,
200
]
# based on votes[381,]
# v2 = [2600, 2200, 200] # based on votes[381,]
plot_rdp_curve_per_example
(
np
.
array
([
v1
,
v2
]),
(
100.
,
150.
))
plot_rdp_curve_per_example
(
np
.
array
([
v1
]),
(
100.
,
150.
))
plot_rdp_of_sigma
(
np
.
array
(
v1
),
20.
)
votes
=
np
.
load
(
fin_name
)
plot_rdp_total
(
votes
,
(
100.
,
150.
))
plot_rdp_total
(
votes
[:
12000
,
],
(
100.
,
150.
))
scatter_plot
(
votes
[:
6000
,
],
None
,
None
,
100
,
20
)
# w/o thresholding
scatter_plot
(
votes
[:
6000
,
],
3500
,
1500
,
100
,
20
)
# with thresholding
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
research/differential_privacy/pate/README.md
View file @
911a0d23
...
@@ -15,7 +15,7 @@ dataset.
...
@@ -15,7 +15,7 @@ dataset.
The framework consists of _teachers_, the _student_ model, and the _aggregator_. The
The framework consists of _teachers_, the _student_ model, and the _aggregator_. The
teachers are models trained on disjoint subsets of the training datasets. The student
teachers are models trained on disjoint subsets of the training datasets. The student
model has access to an insensitive (
i.e
., public) unlabelled dataset, which is labelled by
model has access to an insensitive (
e.g
., public) unlabelled dataset, which is labelled by
interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies
interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies
outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or
refuses to answer.
refuses to answer.
...
@@ -57,13 +57,13 @@ $ python smooth_sensitivity_test.py
...
@@ -57,13 +57,13 @@ $ python smooth_sensitivity_test.py
## Files in this directory
## Files in this directory
*
core.py
---
RDP privacy accountant for several vote aggregators (GNMax,
*
core.py
—
RDP privacy accountant for several vote aggregators (GNMax,
Threshold, Laplace).
Threshold, Laplace).
*
smooth_sensitivity.py
---
Smooth sensitivity analysis for GNMax and
*
smooth_sensitivity.py
—
Smooth sensitivity analysis for GNMax and
Threshold mechanisms.
Threshold mechanisms.
*
core_test.py and smooth_sensitivity_test.py
---
Unit tests for the
*
core_test.py and smooth_sensitivity_test.py
—
Unit tests for the
files above.
files above.
## Contact information
## Contact information
...
...
research/differential_privacy/pate/core_test.py
View file @
911a0d23
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Tests for
google3.experimental.brain.privacy.
pate.
pat
e."""
"""Tests for pate.
cor
e."""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
research/differential_privacy/pate/smooth_sensitivity_test.py
View file @
911a0d23
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Tests for
google3.experimental.brain.privacy.pate.pate_
smooth_sensitivity."""
"""Tests for
pate.
smooth_sensitivity."""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
research/differential_privacy/privacy_accountant/python/gaussian_moments.py
deleted
100644 → 0
View file @
bc0edaf8
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A standalone utility for computing the log moments.
The utility for computing the log moments. It consists of two methods.
compute_log_moment(q, sigma, T, lmbd) computes the log moment with sampling
probability q, noise sigma, order lmbd, and T steps. get_privacy_spent computes
delta (or eps) given log moments and eps (or delta).
Example use:
Suppose that we have run an algorithm with parameters, an array of
(q1, sigma1, T1) ... (qk, sigmak, Tk), and we wish to compute eps for a given
delta. The example code would be:
max_lmbd = 32
lmbds = xrange(1, max_lmbd + 1)
log_moments = []
for lmbd in lmbds:
log_moment = 0
for q, sigma, T in parameters:
log_moment += compute_log_moment(q, sigma, T, lmbd)
log_moments.append((lmbd, log_moment))
eps, delta = get_privacy_spent(log_moments, target_delta=delta)
To verify that the I1 >= I2 (see comments in GaussianMomentsAccountant in
accountant.py for the context), run the same loop above with verify=True
passed to compute_log_moment.
"""
from
__future__
import
print_function
import
math
import
sys
import
numpy
as
np
import
scipy.integrate
as
integrate
import
scipy.stats
from
six.moves
import
xrange
from
sympy.mpmath
import
mp
def
_to_np_float64
(
v
):
if
math
.
isnan
(
v
)
or
math
.
isinf
(
v
):
return
np
.
inf
return
np
.
float64
(
v
)
######################
# FLOAT64 ARITHMETIC #
######################
def
pdf_gauss
(
x
,
sigma
,
mean
=
0
):
return
scipy
.
stats
.
norm
.
pdf
(
x
,
loc
=
mean
,
scale
=
sigma
)
def
cropped_ratio
(
a
,
b
):
if
a
<
1E-50
and
b
<
1E-50
:
return
1.
else
:
return
a
/
b
def
integral_inf
(
fn
):
integral
,
_
=
integrate
.
quad
(
fn
,
-
np
.
inf
,
np
.
inf
)
return
integral
def
integral_bounded
(
fn
,
lb
,
ub
):
integral
,
_
=
integrate
.
quad
(
fn
,
lb
,
ub
)
return
integral
def
distributions
(
sigma
,
q
):
mu0
=
lambda
y
:
pdf_gauss
(
y
,
sigma
=
sigma
,
mean
=
0.0
)
mu1
=
lambda
y
:
pdf_gauss
(
y
,
sigma
=
sigma
,
mean
=
1.0
)
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
a_lambda_first_term_exact
=
0
a_lambda_second_term_exact
=
0
for
i
in
xrange
(
lmbd_int
+
1
):
coef_i
=
scipy
.
special
.
binom
(
lmbd_int
,
i
)
*
(
q
**
i
)
s1
,
s2
=
0
,
0
for
j
in
xrange
(
i
+
1
):
coef_j
=
scipy
.
special
.
binom
(
i
,
j
)
*
(
-
1
)
**
(
i
-
j
)
s1
+=
coef_j
*
np
.
exp
((
j
*
j
-
j
)
/
(
2.0
*
(
sigma
**
2
)))
s2
+=
coef_j
*
np
.
exp
((
j
*
j
+
j
)
/
(
2.0
*
(
sigma
**
2
)))
a_lambda_first_term_exact
+=
coef_i
*
s1
a_lambda_second_term_exact
+=
coef_i
*
s2
a_lambda_exact
=
((
1.0
-
q
)
*
a_lambda_first_term_exact
+
q
*
a_lambda_second_term_exact
)
if
verbose
:
print
(
"A: by binomial expansion {} = {} + {}"
.
format
(
a_lambda_exact
,
(
1.0
-
q
)
*
a_lambda_first_term_exact
,
q
*
a_lambda_second_term_exact
))
return
_to_np_float64
(
a_lambda_exact
)
def
compute_b
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
mu0
,
_
,
mu
=
distributions
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu0
(
z
),
mu
(
z
)),
lmbd
)
b_lambda
=
integral_inf
(
b_lambda_fn
)
m
=
sigma
**
2
*
(
np
.
log
((
2.
-
q
)
/
(
1.
-
q
))
+
1.
/
(
2
*
sigma
**
2
))
b_fn
=
lambda
z
:
(
np
.
power
(
mu0
(
z
)
/
mu
(
z
),
lmbd
)
-
np
.
power
(
mu
(
-
z
)
/
mu0
(
z
),
lmbd
))
if
verbose
:
print
(
"M ="
,
m
)
print
(
"f(-M) = {} f(M) = {}"
.
format
(
b_fn
(
-
m
),
b_fn
(
m
)))
assert
b_fn
(
-
m
)
<
0
and
b_fn
(
m
)
<
0
b_lambda_int1_fn
=
lambda
z
:
(
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu0
(
z
),
mu
(
z
)),
lmbd
))
b_lambda_int2_fn
=
lambda
z
:
(
mu0
(
z
)
*
np
.
power
(
cropped_ratio
(
mu
(
z
),
mu0
(
z
)),
lmbd
))
b_int1
=
integral_bounded
(
b_lambda_int1_fn
,
-
m
,
m
)
b_int2
=
integral_bounded
(
b_lambda_int2_fn
,
-
m
,
m
)
a_lambda_m1
=
compute_a
(
sigma
,
q
,
lmbd
-
1
)
b_bound
=
a_lambda_m1
+
b_int1
-
b_int2
if
verbose
:
print
(
"B: by numerical integration"
,
b_lambda
)
print
(
"B must be no more than "
,
b_bound
)
print
(
b_lambda
,
b_bound
)
return
_to_np_float64
(
b_lambda
)
###########################
# MULTIPRECISION ROUTINES #
###########################
def
pdf_gauss_mp
(
x
,
sigma
,
mean
):
return
mp
.
mpf
(
1.
)
/
mp
.
sqrt
(
mp
.
mpf
(
"2."
)
*
sigma
**
2
*
mp
.
pi
)
*
mp
.
exp
(
-
(
x
-
mean
)
**
2
/
(
mp
.
mpf
(
"2."
)
*
sigma
**
2
))
def
integral_inf_mp
(
fn
):
integral
,
_
=
mp
.
quad
(
fn
,
[
-
mp
.
inf
,
mp
.
inf
],
error
=
True
)
return
integral
def
integral_bounded_mp
(
fn
,
lb
,
ub
):
integral
,
_
=
mp
.
quad
(
fn
,
[
lb
,
ub
],
error
=
True
)
return
integral
def
distributions_mp
(
sigma
,
q
):
mu0
=
lambda
y
:
pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
mp
.
mpf
(
0
))
mu1
=
lambda
y
:
pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
mp
.
mpf
(
1
))
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a_mp
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
mu0
,
mu1
,
mu
=
distributions_mp
(
sigma
,
q
)
a_lambda_fn
=
lambda
z
:
mu
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda_first_term_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda_second_term_fn
=
lambda
z
:
mu1
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
a_lambda
=
integral_inf_mp
(
a_lambda_fn
)
a_lambda_first_term
=
integral_inf_mp
(
a_lambda_first_term_fn
)
a_lambda_second_term
=
integral_inf_mp
(
a_lambda_second_term_fn
)
if
verbose
:
print
(
"A: by numerical integration {} = {} + {}"
.
format
(
a_lambda
,
(
1
-
q
)
*
a_lambda_first_term
,
q
*
a_lambda_second_term
))
return
_to_np_float64
(
a_lambda
)
def
compute_b_mp
(
sigma
,
q
,
lmbd
,
verbose
=
False
):
lmbd_int
=
int
(
math
.
ceil
(
lmbd
))
if
lmbd_int
==
0
:
return
1.0
mu0
,
_
,
mu
=
distributions_mp
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
b_lambda
=
integral_inf_mp
(
b_lambda_fn
)
m
=
sigma
**
2
*
(
mp
.
log
((
2
-
q
)
/
(
1
-
q
))
+
1
/
(
2
*
(
sigma
**
2
)))
b_fn
=
lambda
z
:
((
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
-
(
mu
(
-
z
)
/
mu0
(
z
))
**
lmbd_int
)
if
verbose
:
print
(
"M ="
,
m
)
print
(
"f(-M) = {} f(M) = {}"
.
format
(
b_fn
(
-
m
),
b_fn
(
m
)))
assert
b_fn
(
-
m
)
<
0
and
b_fn
(
m
)
<
0
b_lambda_int1_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
lmbd_int
b_lambda_int2_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
lmbd_int
b_int1
=
integral_bounded_mp
(
b_lambda_int1_fn
,
-
m
,
m
)
b_int2
=
integral_bounded_mp
(
b_lambda_int2_fn
,
-
m
,
m
)
a_lambda_m1
=
compute_a_mp
(
sigma
,
q
,
lmbd
-
1
)
b_bound
=
a_lambda_m1
+
b_int1
-
b_int2
if
verbose
:
print
(
"B by numerical integration"
,
b_lambda
)
print
(
"B must be no more than "
,
b_bound
)
assert
b_lambda
<
b_bound
+
1e-5
return
_to_np_float64
(
b_lambda
)
def
_compute_delta
(
log_moments
,
eps
):
"""Compute delta for given log_moments and eps.
Args:
log_moments: the log moments of privacy loss, in the form of pairs
of (moment_order, log_moment)
eps: the target epsilon.
Returns:
delta
"""
min_delta
=
1.0
for
moment_order
,
log_moment
in
log_moments
:
if
moment_order
==
0
:
continue
if
math
.
isinf
(
log_moment
)
or
math
.
isnan
(
log_moment
):
sys
.
stderr
.
write
(
"The %d-th order is inf or Nan
\n
"
%
moment_order
)
continue
if
log_moment
<
moment_order
*
eps
:
min_delta
=
min
(
min_delta
,
math
.
exp
(
log_moment
-
moment_order
*
eps
))
return
min_delta
def
_compute_eps
(
log_moments
,
delta
):
"""Compute epsilon for given log_moments and delta.
Args:
log_moments: the log moments of privacy loss, in the form of pairs
of (moment_order, log_moment)
delta: the target delta.
Returns:
epsilon
"""
min_eps
=
float
(
"inf"
)
for
moment_order
,
log_moment
in
log_moments
:
if
moment_order
==
0
:
continue
if
math
.
isinf
(
log_moment
)
or
math
.
isnan
(
log_moment
):
sys
.
stderr
.
write
(
"The %d-th order is inf or Nan
\n
"
%
moment_order
)
continue
min_eps
=
min
(
min_eps
,
(
log_moment
-
math
.
log
(
delta
))
/
moment_order
)
return
min_eps
def
compute_log_moment
(
q
,
sigma
,
steps
,
lmbd
,
verify
=
False
,
verbose
=
False
):
"""Compute the log moment of Gaussian mechanism for given parameters.
Args:
q: the sampling ratio.
sigma: the noise sigma.
steps: the number of steps.
lmbd: the moment order.
verify: if False, only compute the symbolic version. If True, computes
both symbolic and numerical solutions and verifies the results match.
verbose: if True, print out debug information.
Returns:
the log moment with type np.float64, could be np.inf.
"""
moment
=
compute_a
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
if
verify
:
mp
.
dps
=
50
moment_a_mp
=
compute_a_mp
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
moment_b_mp
=
compute_b_mp
(
sigma
,
q
,
lmbd
,
verbose
=
verbose
)
np
.
testing
.
assert_allclose
(
moment
,
moment_a_mp
,
rtol
=
1e-10
)
if
not
np
.
isinf
(
moment_a_mp
):
# The following test fails for (1, np.inf)!
np
.
testing
.
assert_array_less
(
moment_b_mp
,
moment_a_mp
)
if
np
.
isinf
(
moment
):
return
np
.
inf
else
:
return
np
.
log
(
moment
)
*
steps
def
get_privacy_spent
(
log_moments
,
target_eps
=
None
,
target_delta
=
None
):
"""Compute delta (or eps) for given eps (or delta) from log moments.
Args:
log_moments: array of (moment_order, log_moment) pairs.
target_eps: if not None, the epsilon for which we would like to compute
corresponding delta value.
target_delta: if not None, the delta for which we would like to compute
corresponding epsilon value. Exactly one of target_eps and target_delta
is None.
Returns:
eps, delta pair
"""
assert
(
target_eps
is
None
)
^
(
target_delta
is
None
)
assert
not
((
target_eps
is
None
)
and
(
target_delta
is
None
))
if
target_eps
is
not
None
:
return
(
target_eps
,
_compute_delta
(
log_moments
,
target_eps
))
else
:
return
(
_compute_eps
(
log_moments
,
target_delta
),
target_delta
)
research/differential_privacy/privacy_accountant/python/rdp_accountant.py
0 → 100644
View file @
911a0d23
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RDP analysis of the Gaussian-with-sampling mechanism.
Functionality for computing Renyi differential privacy of an additive Gaussian
mechanism with sampling. Its public interface consists of two methods:
compute_rdp(q, sigma, T, order) computes RDP with sampling probability q,
noise sigma, T steps at a given order.
get_privacy_spent computes delta (or eps) given RDP and eps (or delta).
Example use:
Suppose that we have run an algorithm with parameters, an array of
(q1, sigma1, T1) ... (qk, sigma_k, Tk), and we wish to compute eps for a given
delta. The example code would be:
max_order = 32
orders = range(2, max_order + 1)
rdp = np.zeros_like(orders, dtype=float)
for q, sigma, T in parameters:
rdp += rdp_accountant.compute_rdp(q, sigma, T, orders)
eps, _, opt_order = rdp_accountant.get_privacy_spent(rdp, target_delta=delta)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
sys
from
absl
import
app
from
absl
import
flags
import
numpy
as
np
from
scipy
import
special
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_boolean
(
"rdp_verbose"
,
False
,
"Output intermediate results for RDP computation."
)
FLAGS
(
sys
.
argv
)
# Load the flags (including on import)
########################
# LOG-SPACE ARITHMETIC #
########################
def
_log_add
(
logx
,
logy
):
"""Add two numbers in the log space."""
a
,
b
=
min
(
logx
,
logy
),
max
(
logx
,
logy
)
if
a
==
-
np
.
inf
:
# adding 0
return
b
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return
math
.
log1p
(
math
.
exp
(
a
-
b
))
+
b
# log1p(x) = log(x + 1)
def
_log_sub
(
logx
,
logy
):
"""Subtract two numbers in the log space. Answer must be positive."""
if
logy
==
-
np
.
inf
:
# subtracting 0
return
logx
assert
logx
>
logy
try
:
# Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
return
math
.
log
(
math
.
expm1
(
logx
-
logy
))
+
logy
# expm1(x) = exp(x) - 1
except
OverflowError
:
return
logx
def
_log_print
(
logx
):
"""Pretty print."""
if
logx
<
math
.
log
(
sys
.
float_info
.
max
):
return
"{}"
.
format
(
math
.
exp
(
logx
))
else
:
return
"exp({})"
.
format
(
logx
)
def
_compute_log_a_int
(
q
,
sigma
,
alpha
):
"""Compute log(A_alpha) for integer alpha."""
assert
isinstance
(
alpha
,
(
int
,
long
))
# The first and second terms of A_alpha in the log space:
log_a1
,
log_a2
=
-
np
.
inf
,
-
np
.
inf
for
i
in
range
(
alpha
+
1
):
# Compute in the log space. Extra care needed for q = 0 or 1.
log_coef_i
=
math
.
log
(
special
.
binom
(
alpha
,
i
))
if
q
>
0
:
log_coef_i
+=
i
*
math
.
log
(
q
)
elif
i
>
0
:
continue
# The term is 0, skip the rest.
if
q
<
1.0
:
log_coef_i
+=
(
alpha
-
i
)
*
math
.
log
(
1
-
q
)
elif
i
<
alpha
:
continue
# The term is 0, skip the rest.
s1
=
log_coef_i
+
(
i
*
i
-
i
)
/
(
2.0
*
(
sigma
**
2
))
s2
=
log_coef_i
+
(
i
*
i
+
i
)
/
(
2.0
*
(
sigma
**
2
))
log_a1
=
_log_add
(
log_a1
,
s1
)
log_a2
=
_log_add
(
log_a2
,
s2
)
log_a
=
_log_add
(
math
.
log
(
1
-
q
)
+
log_a1
,
math
.
log
(
q
)
+
log_a2
)
if
FLAGS
.
rdp_verbose
:
print
(
"A: by binomial expansion {} = {} + {}"
.
format
(
_log_print
(
log_a
),
_log_print
(
math
.
log
(
1
-
q
)
+
log_a1
),
_log_print
(
math
.
log
(
q
)
+
log_a2
)))
return
float
(
log_a
)
def
_compute_log_a_frac
(
q
,
sigma
,
alpha
):
"""Compute log(A_alpha) for fractional alpha."""
# The four parts of A_alpha in the log space:
log_a11
,
log_a12
=
-
np
.
inf
,
-
np
.
inf
log_a21
,
log_a22
=
-
np
.
inf
,
-
np
.
inf
i
=
0
z0
,
_
=
_compute_zs
(
sigma
,
q
)
while
True
:
# do ... until loop
coef
=
special
.
binom
(
alpha
,
i
)
log_coef
=
math
.
log
(
abs
(
coef
))
j
=
alpha
-
i
log_t1
=
log_coef
+
i
*
math
.
log
(
q
)
+
j
*
math
.
log
(
1
-
q
)
log_t2
=
log_coef
+
j
*
math
.
log
(
q
)
+
i
*
math
.
log
(
1
-
q
)
log_e11
=
math
.
log
(.
5
)
+
_log_erfc
((
i
-
z0
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e12
=
math
.
log
(.
5
)
+
_log_erfc
((
z0
-
j
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e21
=
math
.
log
(.
5
)
+
_log_erfc
((
i
-
(
z0
-
1
))
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_e22
=
math
.
log
(.
5
)
+
_log_erfc
((
z0
-
1
-
j
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_s11
=
log_t1
+
(
i
*
i
-
i
)
/
(
2
*
(
sigma
**
2
))
+
log_e11
log_s12
=
log_t2
+
(
j
*
j
-
j
)
/
(
2
*
(
sigma
**
2
))
+
log_e12
log_s21
=
log_t1
+
(
i
*
i
+
i
)
/
(
2
*
(
sigma
**
2
))
+
log_e21
log_s22
=
log_t2
+
(
j
*
j
+
j
)
/
(
2
*
(
sigma
**
2
))
+
log_e22
if
coef
>
0
:
log_a11
=
_log_add
(
log_a11
,
log_s11
)
log_a12
=
_log_add
(
log_a12
,
log_s12
)
log_a21
=
_log_add
(
log_a21
,
log_s21
)
log_a22
=
_log_add
(
log_a22
,
log_s22
)
else
:
log_a11
=
_log_sub
(
log_a11
,
log_s11
)
log_a12
=
_log_sub
(
log_a12
,
log_s12
)
log_a21
=
_log_sub
(
log_a21
,
log_s21
)
log_a22
=
_log_sub
(
log_a22
,
log_s22
)
i
+=
1
if
max
(
log_s11
,
log_s21
,
log_s21
,
log_s22
)
<
-
30
:
break
log_a
=
_log_add
(
math
.
log
(
1.
-
q
)
+
_log_add
(
log_a11
,
log_a12
),
math
.
log
(
q
)
+
_log_add
(
log_a21
,
log_a22
))
return
log_a
def
_compute_log_a
(
q
,
sigma
,
alpha
):
if
float
(
alpha
).
is_integer
():
return
_compute_log_a_int
(
q
,
sigma
,
int
(
alpha
))
else
:
return
_compute_log_a_frac
(
q
,
sigma
,
alpha
)
def
_log_erfc
(
x
):
# Can be replaced with a single call to log_ntdr if available:
# return np.log(2.) + special.log_ntdr(-x * 2**.5)
r
=
special
.
erfc
(
x
)
if
r
==
0.0
:
# Using the Laurent series at infinity for the tail of the erfc function:
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
# To verify in Mathematica:
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
return
(
-
math
.
log
(
math
.
pi
)
/
2
-
math
.
log
(
x
)
-
x
**
2
-
.
5
*
x
**
-
2
+
.
625
*
x
**
-
4
-
37.
/
24.
*
x
**
-
6
+
353.
/
64.
*
x
**
-
8
)
else
:
return
math
.
log
(
r
)
def
_compute_zs
(
sigma
,
q
):
z0
=
sigma
**
2
*
math
.
log
(
1
/
q
-
1
)
+
.
5
z1
=
min
(
z0
-
2
,
z0
/
2
)
return
z0
,
z1
def
_compute_log_b0
(
sigma
,
q
,
alpha
,
z1
):
"""Return an approximation to B0 or None if failed to converge."""
z0
,
_
=
_compute_zs
(
sigma
,
q
)
s
,
log_term
,
log_b0
,
k
,
sign
,
max_log_term
=
0
,
1.
,
0
,
0
,
1
,
-
np
.
inf
# Keep adding new terms until precision is no longer preserved.
# Don't stop on the negative.
while
(
k
<
alpha
or
(
log_term
>
max_log_term
-
36
and
log_term
>
-
30
)
or
sign
<
0.
):
log_b1
=
k
*
(
k
-
2
*
z0
)
/
(
2
*
sigma
**
2
)
log_b2
=
_log_erfc
((
k
-
z1
)
/
(
math
.
sqrt
(
2
)
*
sigma
))
log_term
=
log_b0
+
log_b1
+
log_b2
max_log_term
=
max
(
max_log_term
,
log_term
)
s
+=
sign
*
math
.
exp
(
log_term
)
k
+=
1
# Maintain invariant: sign * exp(log_b0) = {-alpha choose k}
log_b0
+=
math
.
log
(
abs
(
-
alpha
-
k
+
1
))
-
math
.
log
(
k
)
sign
*=
-
1
if
s
==
0
:
# May happen if all terms are < 1e-324.
return
-
np
.
inf
if
s
<
0
or
math
.
log
(
s
)
<
max_log_term
-
25
:
# The series failed to converge.
return
None
c
=
math
.
log
(.
5
)
-
math
.
log
(
1
-
q
)
*
alpha
return
c
+
math
.
log
(
s
)
def
_bound_log_b1
(
sigma
,
q
,
alpha
,
z1
):
log_c
=
_log_add
(
math
.
log
(
1
-
q
),
math
.
log
(
q
)
+
(
2
*
z1
-
1.
)
/
(
2
*
sigma
**
2
))
return
math
.
log
(.
5
)
-
log_c
*
alpha
+
_log_erfc
(
z1
/
(
math
.
sqrt
(
2
)
*
sigma
))
def
_bound_log_b
(
q
,
sigma
,
alpha
):
"""Compute a numerically stable bound on log(B_alpha)."""
if
q
==
1.
:
# If the sampling rate is 100%, A and B are symmetric.
return
_compute_log_a
(
q
,
sigma
,
alpha
)
z0
,
z1
=
_compute_zs
(
sigma
,
q
)
log_b_bound
=
np
.
inf
# Puts a lower bound on B1: it cannot be less than its value at z0.
log_lb_b1
=
_bound_log_b1
(
sigma
,
q
,
alpha
,
z0
)
while
z0
-
z1
>
1e-3
:
m
=
(
z0
+
z1
)
/
2
log_b0
=
_compute_log_b0
(
sigma
,
q
,
alpha
,
m
)
if
log_b0
is
None
:
z0
=
m
continue
log_b1
=
_bound_log_b1
(
sigma
,
q
,
alpha
,
m
)
log_b_bound
=
min
(
log_b_bound
,
_log_add
(
log_b0
,
log_b1
))
log_b_min_bound
=
_log_add
(
log_b0
,
log_lb_b1
)
if
(
log_b_bound
<
0
or
log_b_min_bound
<
0
or
log_b_bound
>
log_b_min_bound
+
.
01
):
# If the bound is likely to be too loose, move z1 closer to z0 and repeat.
z1
=
m
else
:
break
return
log_b_bound
def
_log_bound_b_elementary
(
q
,
alpha
):
return
-
math
.
log
(
1
-
q
)
*
alpha
def
_compute_delta
(
orders
,
rdp
,
eps
):
"""Compute delta given an RDP curve and target epsilon.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
eps: The target epsilon.
Returns:
Pair of (delta, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec
=
np
.
atleast_1d
(
orders
)
rdp_vec
=
np
.
atleast_1d
(
rdp
)
if
len
(
orders_vec
)
!=
len
(
rdp_vec
):
raise
ValueError
(
"Input lists must have the same length."
)
deltas
=
np
.
exp
((
rdp_vec
-
eps
)
*
(
orders_vec
-
1
))
idx_opt
=
np
.
argmin
(
deltas
)
return
min
(
deltas
[
idx_opt
],
1.
),
orders_vec
[
idx_opt
]
def
_compute_eps
(
orders
,
rdp
,
delta
):
"""Compute epsilon given an RDP curve and target delta.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
delta: The target delta.
Returns:
Pair of (eps, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec
=
np
.
atleast_1d
(
orders
)
rdp_vec
=
np
.
atleast_1d
(
rdp
)
if
len
(
orders_vec
)
!=
len
(
rdp_vec
):
raise
ValueError
(
"Input lists must have the same length."
)
eps
=
rdp_vec
-
math
.
log
(
delta
)
/
(
orders_vec
-
1
)
idx_opt
=
np
.
nanargmin
(
eps
)
# Ignore NaNs
return
eps
[
idx_opt
],
orders_vec
[
idx_opt
]
def
_compute_rdp
(
q
,
sigma
,
steps
,
alpha
):
log_moment_a
=
_compute_log_a
(
q
,
sigma
,
alpha
-
1
)
log_bound_b
=
_log_bound_b_elementary
(
q
,
alpha
-
1
)
# does not require sigma
if
log_bound_b
<
log_moment_a
:
if
FLAGS
.
rdp_verbose
:
print
(
"Elementary bound suffices : {} < {}"
.
format
(
_log_print
(
log_bound_b
),
_log_print
(
log_moment_a
)))
else
:
log_bound_b2
=
_bound_log_b
(
q
,
sigma
,
alpha
-
1
)
if
math
.
isnan
(
log_bound_b2
):
if
FLAGS
.
rdp_verbose
:
print
(
"B bound failed to converge"
)
else
:
if
FLAGS
.
rdp_verbose
and
(
log_bound_b2
<
log_bound_b
):
print
(
"Elementary bound is stronger: {} < {}"
.
format
(
_log_print
(
log_bound_b2
),
_log_print
(
log_bound_b
)))
log_bound_b
=
min
(
log_bound_b
,
log_bound_b2
)
return
max
(
log_moment_a
,
log_bound_b
)
*
steps
/
(
alpha
-
1
)
def
compute_rdp
(
q
,
sigma
,
steps
,
orders
):
"""Compute RDP of Gaussian mechanism with sampling for given parameters.
Args:
q: The sampling ratio.
sigma: The noise sigma.
steps: The number of steps.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders, can be np.inf.
"""
if
np
.
isscalar
(
orders
):
return
_compute_rdp
(
q
,
sigma
,
steps
,
orders
)
else
:
rdp
=
np
.
zeros_like
(
orders
,
dtype
=
float
)
for
i
,
order
in
enumerate
(
orders
):
rdp
[
i
]
=
_compute_rdp
(
q
,
sigma
,
steps
,
order
)
return
rdp
def
get_privacy_spent
(
orders
,
rdp
,
target_eps
=
None
,
target_delta
=
None
):
"""Compute delta (or eps) for given eps (or delta) from the RDP curve.
Args:
orders: An array (or a scalar) of RDP orders.
rdp: An array of RDP values.
target_eps: If not None, the epsilon for which we compute the corresponding
delta.
target_delta: If not None, the delta for which we compute the corresponding
epsilon. Exactly one of target_eps and target_delta must be None.
Returns:
eps, delta, opt_order.
Raises:
ValueError: If target_eps and target_delta are messed up.
"""
if
target_eps
is
None
and
target_delta
is
None
:
raise
ValueError
(
"Exactly one out of eps and delta must be None. (Both are)."
)
if
target_eps
is
not
None
and
target_delta
is
not
None
:
raise
ValueError
(
"Exactly one out of eps and delta must be None. (None is)."
)
if
target_eps
is
not
None
:
delta
,
opt_order
=
_compute_delta
(
orders
,
rdp
,
target_eps
)
return
target_eps
,
delta
,
opt_order
else
:
eps
,
opt_order
=
_compute_eps
(
orders
,
rdp
,
target_delta
)
return
eps
,
target_delta
,
opt_order
def
main
(
_
):
pass
if
__name__
==
"__main__"
:
app
.
run
(
main
)
research/differential_privacy/privacy_accountant/python/rdp_accountant_test.py
0 → 100644
View file @
911a0d23
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for rdp_accountant.py."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
absltest
import
mpmath
as
mp
import
numpy
as
np
import
rdp_accountant
class
TestGaussianMoments
(
absltest
.
TestCase
):
##############################
# MULTI-PRECISION ARITHMETIC #
##############################
def
_pdf_gauss_mp
(
self
,
x
,
sigma
,
mean
):
return
1.
/
mp
.
sqrt
(
2.
*
sigma
**
2
*
mp
.
pi
)
*
mp
.
exp
(
-
(
x
-
mean
)
**
2
/
(
2.
*
sigma
**
2
))
def
_integral_inf_mp
(
self
,
fn
):
integral
,
_
=
mp
.
quad
(
fn
,
[
-
mp
.
inf
,
mp
.
inf
],
error
=
True
,
maxdegree
=
7
)
# maxdegree = 6 is not enough
return
integral
def
_integral_bounded_mp
(
self
,
fn
,
lb
,
ub
):
integral
,
_
=
mp
.
quad
(
fn
,
[
lb
,
ub
],
error
=
True
)
return
integral
def
_distributions_mp
(
self
,
sigma
,
q
):
mu0
=
lambda
y
:
self
.
_pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
0
)
mu1
=
lambda
y
:
self
.
_pdf_gauss_mp
(
y
,
sigma
=
sigma
,
mean
=
1.
)
mu
=
lambda
y
:
(
1
-
q
)
*
mu0
(
y
)
+
q
*
mu1
(
y
)
return
mu0
,
mu1
,
mu
def
compute_a_mp
(
self
,
sigma
,
q
,
order
,
verbose
=
False
):
"""Compute A_lambda for arbitrary lambda by numerical integration."""
mp
.
dps
=
100
mu0
,
mu1
,
mu
=
self
.
_distributions_mp
(
sigma
,
q
)
a_lambda_fn
=
lambda
z
:
mu
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda
=
self
.
_integral_inf_mp
(
a_lambda_fn
)
if
verbose
:
a_lambda_first_term_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda_second_term_fn
=
lambda
z
:
mu1
(
z
)
*
(
mu
(
z
)
/
mu0
(
z
))
**
order
a_lambda_first_term
=
self
.
_integral_inf_mp
(
a_lambda_first_term_fn
)
a_lambda_second_term
=
self
.
_integral_inf_mp
(
a_lambda_second_term_fn
)
print
(
"A: by numerical integration {} = {} + {}"
.
format
(
a_lambda
,
(
1
-
q
)
*
a_lambda_first_term
,
q
*
a_lambda_second_term
))
return
a_lambda
def
compute_b_mp
(
self
,
sigma
,
q
,
order
,
verbose
=
False
):
"""Compute B_lambda for arbitrary lambda by numerical integration."""
mu0
,
_
,
mu
=
self
.
_distributions_mp
(
sigma
,
q
)
b_lambda_fn
=
lambda
z
:
mu0
(
z
)
*
(
mu0
(
z
)
/
mu
(
z
))
**
order
b_numeric
=
self
.
_integral_inf_mp
(
b_lambda_fn
)
if
verbose
:
_
,
z1
=
rdp_accountant
.
_compute_zs
(
sigma
,
q
)
print
(
"z1 = "
,
z1
)
print
(
"x in the Taylor series = "
,
q
/
(
1
-
q
)
*
np
.
exp
(
(
2
*
z1
-
1
)
/
(
2
*
sigma
**
2
)))
b0_numeric
=
self
.
_integral_bounded_mp
(
b_lambda_fn
,
-
np
.
inf
,
z1
)
b1_numeric
=
self
.
_integral_bounded_mp
(
b_lambda_fn
,
z1
,
+
np
.
inf
)
print
(
"B: numerically {} = {} + {}"
.
format
(
b_numeric
,
b0_numeric
,
b1_numeric
))
return
float
(
b_numeric
)
def
_compute_rdp_mp
(
self
,
q
,
sigma
,
order
):
log_a_mp
=
float
(
mp
.
log
(
self
.
compute_a_mp
(
sigma
,
q
,
order
)))
log_b_mp
=
float
(
mp
.
log
(
self
.
compute_b_mp
(
sigma
,
q
,
order
)))
return
log_a_mp
,
log_b_mp
# TEST ROUTINES
def
_almost_equal
(
self
,
a
,
b
,
rtol
,
atol
=
0
):
# Analogue of np.testing.assert_allclose(a, b, rtol, atol).
self
.
assertBetween
(
a
,
b
*
(
1
-
rtol
)
-
atol
,
b
*
(
1
+
rtol
)
+
atol
)
def
_compare_bounds
(
self
,
q
,
sigma
,
order
):
log_a_mp
,
log_b_mp
=
self
.
_compute_rdp_mp
(
q
,
sigma
,
order
)
log_a
=
rdp_accountant
.
_compute_log_a
(
q
,
sigma
,
order
)
log_bound_b
=
rdp_accountant
.
_bound_log_b
(
q
,
sigma
,
order
)
if
log_a_mp
<
1000
and
log_a_mp
>
1e-6
:
self
.
_almost_equal
(
log_a
,
log_a_mp
,
rtol
=
1e-6
)
else
:
# be more tolerant for _very_ large or small logarithms
self
.
_almost_equal
(
log_a
,
log_a_mp
,
rtol
=
1e-3
,
atol
=
1e-14
)
if
np
.
isfinite
(
log_bound_b
):
# Ignore divergence between the bound and exact value of B if
# they don't matter anyway (bound on A is larger) or q > .5
if
log_bound_b
>
log_a
and
q
<=
.
5
:
self
.
_almost_equal
(
log_b_mp
,
log_bound_b
,
rtol
=
1e-6
,
atol
=
1e-14
)
if
np
.
isfinite
(
log_a_mp
)
and
np
.
isfinite
(
log_b_mp
):
# We hypothesize that this assertion is always true; no proof yet.
self
.
assertLessEqual
(
log_b_mp
,
log_a_mp
+
1e-6
)
def
test_compute_rdp
(
self
):
rdp_scalar
=
rdp_accountant
.
compute_rdp
(
0.1
,
2
,
10
,
5
)
self
.
assertAlmostEqual
(
rdp_scalar
,
0.07737
,
places
=
5
)
rdp_vec
=
rdp_accountant
.
compute_rdp
(
0.01
,
2.5
,
50
,
[
1.5
,
2.5
,
5
,
50
,
100
])
correct
=
[
0.00065
,
0.001085
,
0.00218075
,
0.023846
,
167.416307
]
for
i
in
range
(
len
(
rdp_vec
)):
self
.
assertAlmostEqual
(
rdp_vec
[
i
],
correct
[
i
],
places
=
5
)
def
test_compare_with_mp
(
self
):
# Compare the cheap computation with an expensive, multi-precision
# computation for a few parameters. Takes a few seconds.
self
.
_compare_bounds
(
q
=
.
01
,
sigma
=
.
1
,
order
=
.
5
)
self
.
_compare_bounds
(
q
=
.
1
,
sigma
=
1.
,
order
=
5
)
self
.
_compare_bounds
(
q
=
.
5
,
sigma
=
2.
,
order
=
32.5
)
for
q
in
(
1e-6
,
.
1
,
.
999
):
for
sigma
in
(.
1
,
10.
,
100.
):
for
order
in
(
1.01
,
2
,
255.9
,
256
):
self
.
_compare_bounds
(
q
,
sigma
,
order
)
def
test_get_privacy_spent
(
self
):
orders
=
range
(
2
,
33
)
rdp
=
rdp_accountant
.
compute_rdp
(
0.01
,
4
,
10000
,
orders
)
eps
,
delta
,
opt_order
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_delta
=
1e-5
)
self
.
assertAlmostEqual
(
eps
,
1.258575
,
places
=
5
)
self
.
assertEqual
(
opt_order
,
20
)
eps
,
delta
,
_
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_eps
=
1.258575
)
self
.
assertAlmostEqual
(
delta
,
1e-5
)
def
test_compute_privacy_loss
(
self
):
parameters
=
[(
0.01
,
4
,
10000
),
(
0.1
,
2
,
100
)]
delta
=
1e-5
orders
=
(
1.25
,
1.5
,
1.75
,
2.
,
2.5
,
3.
,
4.
,
5.
,
6.
,
7.
,
8.
,
10.
,
12.
,
14.
,
16.
,
20.
,
24.
,
28.
,
32.
,
64.
,
256.
)
rdp
=
np
.
zeros_like
(
orders
,
dtype
=
float
)
for
q
,
sigma
,
steps
in
parameters
:
rdp
+=
rdp_accountant
.
compute_rdp
(
q
,
sigma
,
steps
,
orders
)
eps
,
delta
,
opt_order
=
rdp_accountant
.
get_privacy_spent
(
orders
,
rdp
,
target_delta
=
delta
)
self
.
assertAlmostEqual
(
eps
,
3.276237
,
places
=
5
)
self
.
assertEqual
(
opt_order
,
8
)
if
__name__
==
"__main__"
:
absltest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment