Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fe89fdab
Commit
fe89fdab
authored
Oct 19, 2016
by
npapernot
Browse files
add privacy analysis script and teacher labels required to predict the epsilon
parent
77080ad1
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
282 additions
and
2 deletions
+282
-2
privacy/README.md
privacy/README.md
+15
-2
privacy/analysis.py
privacy/analysis.py
+267
-0
privacy/mnist_250_teachers_100_indices_used_by_student.npy
privacy/mnist_250_teachers_100_indices_used_by_student.npy
+0
-0
privacy/mnist_250_teachers_labels.npy
privacy/mnist_250_teachers_labels.npy
+0
-0
privacy/svhn_250_teachers_labels.npy
privacy/svhn_250_teachers_labels.npy
+0
-0
No files found.
privacy/README.md
View file @
fe89fdab
...
@@ -8,8 +8,7 @@ Knowledge acquired by teachers is transferred to the student in a differentially
...
@@ -8,8 +8,7 @@ Knowledge acquired by teachers is transferred to the student in a differentially
private manner by noisily aggregating the teacher decisions before feeding them
private manner by noisily aggregating the teacher decisions before feeding them
to the student during training.
to the student during training.
A paper describing the approach is in preparation. A link will be added to this
The paper describing the approach is
[
arXiv:1610.05755
](
https://arxiv.org/abs/1610.05755
)
README when available.
## Dependencies
## Dependencies
...
@@ -72,6 +71,20 @@ functions `inference` and `inference_deeper`. Use the flag `--deeper=true`
...
@@ -72,6 +71,20 @@ functions `inference` and `inference_deeper`. Use the flag `--deeper=true`
to switch to that model when launching
`train_teachers.py`
and
to switch to that model when launching
`train_teachers.py`
and
`train_student.py`
.
`train_student.py`
.
## Privacy analysis
In the paper, we detail how data-dependent differential privacy bounds can be
computed to estimate the cost of training the student. In order to reproduce
the bounds given in the paper, we include the label predicted by our two
teacher ensembles: MNIST and SVHN. You can run the privacy analysis for each
dataset with the following commands:
```
python analysis.py --counts_file=mnist_250_teachers_labels.npy --indices_file=mnist_250_teachers_100_indices_used_by_student.npy
python analysis.py --counts_file=svhn_250_teachers_labels.npy --max_examples=1000 --delta=1e-6
```
## Contact
## Contact
To ask questions, please email
`nicolas@papernot.fr`
or open an issue on
To ask questions, please email
`nicolas@papernot.fr`
or open an issue on
...
...
privacy/analysis.py
0 → 100644
View file @
fe89fdab
"""
This script computes bounds on the privacy cost of training the
student model from noisy aggregation of labels predicted by teachers.
It should be used only after training the student (and therefore the
teachers as well). We however include the label files required to
reproduce key results from our paper (https://arxiv.org/abs/1610.05755):
the epsilon bounds for MNIST and SVHN students.
The command that computes the epsilon bound associated
with the training of the MNIST student model (100 label queries
with a (1/20)*2=0.1 epsilon bound each) is:
python analysis.py
--counts_file=mnist_250_teachers_labels.npy
--indices_file=mnist_250_teachers_100_indices_used_by_student.npy
The command that computes the epsilon bound associated
with the training of the SVHN student model (1000 label queries
with a (1/20)*2=0.1 epsilon bound each) is:
python analysis.py
--counts_file=svhn_250_teachers_labels.npy
--max_examples=1000
--delta=1e-6
"""
import
math
import
numpy
as
np
import
tensorflow
as
tf
# These parameters can be changed to compute bounds for different failure rates
# or different model predictions.
tf
.
flags
.
DEFINE_integer
(
"moments"
,
8
,
"Number of moments"
)
tf
.
flags
.
DEFINE_float
(
"noise_eps"
,
0.1
,
"Eps value for each call to noisymax."
)
tf
.
flags
.
DEFINE_float
(
"delta"
,
1e-5
,
"Target value of delta."
)
tf
.
flags
.
DEFINE_float
(
"beta"
,
0.09
,
"Value of beta for smooth sensitivity"
)
tf
.
flags
.
DEFINE_string
(
"counts_file"
,
""
,
"Numpy matrix with raw counts"
)
tf
.
flags
.
DEFINE_string
(
"indices_file"
,
""
,
"File containting a numpy matrix with indices used."
"Optional. Use the first max_examples indices if this is not provided."
)
tf
.
flags
.
DEFINE_integer
(
"max_examples"
,
1000
,
"Number of examples to use. We will use the first"
" max_examples many examples from the counts_file"
" or indices_file to do the privacy cost estimate"
)
tf
.
flags
.
DEFINE_float
(
"too_small"
,
1e-10
,
"Small threshold to avoid log of 0"
)
tf
.
flags
.
DEFINE_bool
(
"input_is_counts"
,
False
,
"False if labels, True if counts"
)
FLAGS
=
tf
.
flags
.
FLAGS
def
compute_q_noisy_max
(
counts
,
noise_eps
):
"""returns ~ Pr[outcome != winner].
Args:
counts: a list of scores
noise_eps: privacy parameter for noisy_max
Returns:
q: the probability that outcome is different from true winner.
"""
# For noisy max, we only get an upper bound.
# Pr[ j beats i*] \leq (2+gap(j,i*))/ 4 exp(gap(j,i*)
# proof at http://mathoverflow.net/questions/66763/
# tight-bounds-on-probability-of-sum-of-laplace-random-variables
winner
=
np
.
argmax
(
counts
)
counts_normalized
=
noise_eps
*
(
counts
-
counts
[
winner
])
counts_rest
=
np
.
array
(
[
counts_normalized
[
i
]
for
i
in
xrange
(
len
(
counts
))
if
i
!=
winner
])
q
=
0.0
for
c
in
counts_rest
:
gap
=
-
c
q
+=
(
gap
+
2.0
)
/
(
4.0
*
math
.
exp
(
gap
))
return
min
(
q
,
1.0
-
(
1.0
/
len
(
counts
)))
def
compute_q_noisy_max_approx
(
counts
,
noise_eps
):
"""returns ~ Pr[outcome != winner].
Args:
counts: a list of scores
noise_eps: privacy parameter for noisy_max
Returns:
q: the probability that outcome is different from true winner.
"""
# For noisy max, we only get an upper bound.
# Pr[ j beats i*] \leq (2+gap(j,i*))/ 4 exp(gap(j,i*)
# proof at http://mathoverflow.net/questions/66763/
# tight-bounds-on-probability-of-sum-of-laplace-random-variables
# This code uses an approximation that is faster and easier
# to get local sensitivity bound on.
winner
=
np
.
argmax
(
counts
)
counts_normalized
=
noise_eps
*
(
counts
-
counts
[
winner
])
counts_rest
=
np
.
array
(
[
counts_normalized
[
i
]
for
i
in
xrange
(
len
(
counts
))
if
i
!=
winner
])
gap
=
-
max
(
counts_rest
)
q
=
(
len
(
counts
)
-
1
)
*
(
gap
+
2.0
)
/
(
4.0
*
math
.
exp
(
gap
))
return
min
(
q
,
1.0
-
(
1.0
/
len
(
counts
)))
def
logmgf_exact
(
q
,
priv_eps
,
l
):
"""Computes the logmgf value given q and privacy eps.
The bound used is the min of three terms. The first term is from
https://arxiv.org/pdf/1605.02065.pdf.
The second term is based on the fact that when event has probability (1-q) for
q close to zero, q can only change by exp(eps), which corresponds to a
much smaller multiplicative change in (1-q)
The third term comes directly from the privacy guarantee.
Args:
q: pr of non-optimal outcome
priv_eps: eps parameter for DP
l: moment to compute.
Returns:
Upper bound on logmgf
"""
if
q
<
0.5
:
t_one
=
(
1
-
q
)
*
math
.
pow
((
1
-
q
)
/
(
1
-
math
.
exp
(
priv_eps
)
*
q
),
l
)
t_two
=
q
*
math
.
exp
(
priv_eps
*
l
)
t
=
t_one
+
t_two
try
:
log_t
=
math
.
log
(
t
)
except
ValueError
:
print
"Got ValueError in math.log for values :"
+
str
((
q
,
priv_eps
,
l
,
t
))
log_t
=
priv_eps
*
l
else
:
log_t
=
priv_eps
*
l
return
min
(
0.5
*
priv_eps
*
priv_eps
*
l
*
(
l
+
1
),
log_t
,
priv_eps
*
l
)
def
logmgf_from_counts
(
counts
,
noise_eps
,
l
):
"""
ReportNoisyMax mechanism with noise_eps with 2*noise_eps-DP
in our setting where one count can go up by one and another
can go down by 1.
"""
q
=
compute_q_noisy_max
(
counts
,
noise_eps
)
return
logmgf_exact
(
q
,
2.0
*
noise_eps
,
l
)
def
sens_at_k
(
counts
,
noise_eps
,
l
,
k
):
"""Return sensitivity at distane k.
Args:
counts: an array of scores
noise_eps: noise parameter used
l: moment whose sensitivity is being computed
k: distance
Returns:
sensitivity: at distance k
"""
counts_sorted
=
sorted
(
counts
,
reverse
=
True
)
if
0.5
*
noise_eps
*
l
>
1
:
print
"l too large to compute sensitivity"
return
0
# Now we can assume that at k, gap remains positive
# or we have reached the point where logmgf_exact is
# determined by the first term and ind of q.
if
counts
[
0
]
<
counts
[
1
]
+
k
:
return
0
counts_sorted
[
0
]
-=
k
counts_sorted
[
1
]
+=
k
val
=
logmgf_from_counts
(
counts_sorted
,
noise_eps
,
l
)
counts_sorted
[
0
]
-=
1
counts_sorted
[
1
]
+=
1
val_changed
=
logmgf_from_counts
(
counts_sorted
,
noise_eps
,
l
)
return
val_changed
-
val
def
smoothed_sens
(
counts
,
noise_eps
,
l
,
beta
):
"""Compute beta-smooth sensitivity.
Args:
counts: array of scors
noise_eps: noise parameter
l: moment of interest
beta: smoothness parameter
Returns:
smooth_sensitivity: a beta smooth upper bound
"""
k
=
0
smoothed_sensitivity
=
sens_at_k
(
counts
,
noise_eps
,
l
,
k
)
while
k
<
max
(
counts
):
k
+=
1
sensitivity_at_k
=
sens_at_k
(
counts
,
noise_eps
,
l
,
k
)
smoothed_sensitivity
=
max
(
smoothed_sensitivity
,
math
.
exp
(
-
beta
*
k
)
*
sensitivity_at_k
)
if
sensitivity_at_k
==
0.0
:
break
return
smoothed_sensitivity
def
main
(
unused_argv
):
input_mat
=
np
.
load
(
FLAGS
.
counts_file
)
if
FLAGS
.
input_is_counts
:
counts_mat
=
input_mat
else
:
# In this case, the input is the raw predictions. Transform
num_teachers
,
n
=
input_mat
.
shape
counts_mat
=
np
.
zeros
((
n
,
10
)).
astype
(
np
.
int32
)
for
i
in
range
(
n
):
for
j
in
range
(
num_teachers
):
counts_mat
[
i
,
input_mat
[
j
,
i
]]
+=
1
n
=
counts_mat
.
shape
[
0
]
num_examples
=
min
(
n
,
FLAGS
.
max_examples
)
if
not
FLAGS
.
indices_file
:
indices
=
np
.
array
(
range
(
num_examples
))
else
:
index_list
=
np
.
load
(
FLAGS
.
indices_file
)
indices
=
index_list
[:
num_examples
]
l_list
=
1.0
+
np
.
array
(
xrange
(
FLAGS
.
moments
))
beta
=
FLAGS
.
beta
total_log_mgf_nm
=
np
.
array
([
0.0
for
_
in
l_list
])
total_ss_nm
=
np
.
array
([
0.0
for
_
in
l_list
])
noise_eps
=
FLAGS
.
noise_eps
for
i
in
indices
:
total_log_mgf_nm
+=
np
.
array
(
[
logmgf_from_counts
(
counts_mat
[
i
],
noise_eps
,
l
)
for
l
in
l_list
])
total_ss_nm
+=
np
.
array
(
[
smoothed_sens
(
counts_mat
[
i
],
noise_eps
,
l
,
beta
)
for
l
in
l_list
])
delta
=
FLAGS
.
delta
# We want delta = exp(alpha - eps l).
# Solving gives eps = (alpha - ln (delta))/l
eps_list_nm
=
(
total_log_mgf_nm
-
math
.
log
(
delta
))
/
l_list
print
"Epsilons (Noisy Max): "
+
str
(
eps_list_nm
)
print
"Smoothed sensitivities (Noisy Max): "
+
str
(
total_ss_nm
/
l_list
)
# If beta < eps / 2 ln (1/delta), then adding noise Lap(1) * 2 SS/eps
# is eps,delta DP
# Also if beta < eps / 2(gamma +1), then adding noise 2(gamma+1) SS eta / eps
# where eta has density proportional to 1 / (1+|z|^gamma) is eps-DP
# Both from Corolloary 2.4 in
# http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf
# Print the first one's scale
ss_eps
=
2.0
*
beta
*
math
.
log
(
1
/
delta
)
ss_scale
=
2.0
/
ss_eps
print
"To get an "
+
str
(
ss_eps
)
+
"-DP estimate of epsilon, "
print
"..add noise ~ "
+
str
(
ss_scale
)
print
"... times "
+
str
(
total_ss_nm
/
l_list
)
print
"Epsilon = "
+
str
(
min
(
eps_list_nm
))
+
"."
if
min
(
eps_list_nm
)
==
eps_list_nm
[
-
1
]:
print
"Warning: May not have used enough values of l"
# Data indpendent bound, as mechanism is
# 2*noise_eps DP.
data_ind_log_mgf
=
np
.
array
([
0.0
for
_
in
l_list
])
data_ind_log_mgf
+=
num_examples
*
np
.
array
(
[
logmgf_exact
(
1.0
,
2.0
*
noise_eps
,
l
)
for
l
in
l_list
])
data_ind_eps_list
=
(
data_ind_log_mgf
-
math
.
log
(
delta
))
/
l_list
print
"Data independent bound = "
+
str
(
min
(
data_ind_eps_list
))
+
"."
return
if
__name__
==
"__main__"
:
tf
.
app
.
run
()
privacy/mnist_250_teachers_100_indices_used_by_student.npy
0 → 100644
View file @
fe89fdab
File added
privacy/mnist_250_teachers_labels.npy
0 → 100644
View file @
fe89fdab
File added
privacy/svhn_250_teachers_labels.npy
0 → 100644
View file @
fe89fdab
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment