"git@developer.sourcefind.cn:change/sglang.git" did not exist on "fc419b62e8fec1e9c87d205008f1d3cb71e46bfd"
Commit a90db800 authored by Ilya Mironov's avatar Ilya Mironov
Browse files

Changing plots generating scripts for the slide deck.

parent 7f351c62
......@@ -292,6 +292,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
l1 = ax.plot(
x, y3, color='b', ls='-', label=r'Total privacy cost', linewidth=1).pop()
......@@ -311,8 +313,8 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
plt.xlim([0, xlim])
ax.set_ylim([0, 3.])
ax.set_xlabel('Number of queries answered', fontsize=10)
ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=10)
ax.set_xlabel('Number of queries answered', fontsize=16)
ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16)
# Merging legends.
if print_order:
......@@ -321,11 +323,11 @@ def plot_partition(figures_dir, gnmax_conf, print_order):
x, y_right, 'r', ls='-', label=r'Optimal order', linewidth=5,
alpha=.5).pop()
ax2.grid(False)
ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
# ax2.set_ylabel(r'Optimal Renyi order', fontsize=16)
ax2.set_ylim([0, 200.])
ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
# ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13)
ax.tick_params(labelsize=10)
ax.tick_params(labelsize=14)
plot_filename = os.path.join(figures_dir, 'partition.pdf')
print('Saving the graph to ' + plot_filename)
fig.savefig(plot_filename, bbox_inches='tight', dpi=800)
......@@ -387,7 +389,7 @@ def main(argv):
figures_dir = os.path.expanduser(FLAGS.figures_dir)
plot_comparison(figures_dir, simple_ind, conf_ind, simple_dep, conf_dep)
plot_partition(figures_dir, conf_dep, False)
plot_partition(figures_dir, conf_dep, True)
plt.close('all')
......
......@@ -43,6 +43,7 @@ matplotlib.use('TkAgg')
import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top
import numpy as np
import core as pate
import random
plt.style.use('ggplot')
......@@ -53,25 +54,23 @@ flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.')
def plot_rdp_curve_per_example(votes, sigmas):
orders = np.linspace(1., 100., endpoint=True, num=1000)
orders[0] = 1.5
orders[0] = 1.001
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
styles = [':', '-']
labels = ['ex1', 'ex2']
fig.patch.set_alpha(0)
for i in xrange(votes.shape[0]):
print(sorted(votes[i,], reverse=True)[:10])
for sigma in sigmas:
logq = pate.compute_logq_gaussian(votes[i,], sigma)
rdp = pate.rdp_gaussian(logq, sigma, orders)
ax.plot(
orders,
rdp,
label=r'{} $\sigma$={}'.format(labels[i], int(sigma)),
linestyle=styles[i],
alpha=1.,
label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
linewidth=5)
for sigma in sigmas:
......@@ -79,21 +78,52 @@ def plot_rdp_curve_per_example(votes, sigmas):
orders,
pate.rdp_data_independent_gaussian(sigma, orders),
alpha=.3,
label=r'Data-ind bound $\sigma$={}'.format(int(sigma)),
label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
linewidth=10)
plt.yticks([0, .01])
plt.xlabel(r'Order $\lambda$', fontsize=16)
plt.ylabel(r'RDP value $\varepsilon$ at $\lambda$', fontsize=16)
plt.xlim(xmin=1, xmax=100)
plt.ylim(ymin=0)
plt.xticks([1, 20, 40, 60, 80, 100])
plt.yticks([0, .0025, .005, .0075, .01])
plt.xlabel(r'Order $\alpha$', fontsize=16)
plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
ax.tick_params(labelsize=14)
fout_name = os.path.join(FLAGS.figures_dir, 'rdp_flow1.pdf')
print('Saving the graph to ' + fout_name)
fig.savefig(fout_name, bbox_inches='tight')
plt.legend(loc=0, fontsize=13)
plt.show()
def plot_rdp_of_sigma(v, order):
sigmas = np.linspace(1., 1000., endpoint=True, num=1000)
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
y = np.zeros(len(sigmas))
for i, sigma in enumerate(sigmas):
logq = pate.compute_logq_gaussian(v, sigma)
y[i] = pate.rdp_gaussian(logq, sigma, order)
ax.plot(
sigmas,
y,
alpha=.8,
linewidth=5)
plt.xlim(xmin=1, xmax=1000)
plt.ylim(ymin=0)
#plt.yticks([0, .0004, .0008, .0012])
ax.tick_params(labelleft='off')
plt.xlabel(r'Noise $\sigma$', fontsize=16)
plt.ylabel(r'RDP at order $\alpha={}$'.format(order), fontsize=16)
ax.tick_params(labelsize=14)
# plt.legend(loc=0, fontsize=13)
plt.show()
def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
target_answered):
rdp_cum = np.zeros(len(orders))
......@@ -115,46 +145,154 @@ def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders,
def plot_rdp_total(votes, sigmas):
orders = np.linspace(1., 100., endpoint=True, num=100)
orders[0] = 1.5
orders[0] = 1.1
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
target_answered = 2000
for sigma in sigmas:
rdp = compute_rdp_curve(votes, 5000, 1000, sigma, orders, 2000)
rdp = compute_rdp_curve(votes, 5000, 1000, sigma, orders, target_answered)
ax.plot(
orders,
rdp,
alpha=.8,
label=r'$\sigma$={}'.format(int(sigma)),
label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)),
linewidth=5)
plt.xlabel(r'Order $\lambda$', fontsize=16)
plt.ylabel(r'RDP value $\varepsilon$ at $\lambda$', fontsize=16)
# for sigma in sigmas:
# ax.plot(
# orders,
# target_answered * pate.rdp_data_independent_gaussian(sigma, orders),
# alpha=.3,
# label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
# linewidth=10)
plt.xlim(xmin=1, xmax=100)
plt.ylim(ymin=0)
plt.xticks([1, 20, 40, 60, 80, 100])
plt.yticks([0, .0005, .001, .0015, .002])
plt.xlabel(r'Order $\alpha$', fontsize=16)
plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
ax.tick_params(labelsize=14)
fout_name = os.path.join(FLAGS.figures_dir, 'rdp_flow2.pdf')
print('Saving the graph to ' + fout_name)
fig.savefig(fout_name, bbox_inches='tight')
plt.legend(loc=0, fontsize=13)
plt.show()
def plot_one_curve():
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
orders = np.linspace(1., 10., endpoint=True, num=1000)
orders[0] = 1.01
ax.plot(
orders,
pate.rdp_data_independent_gaussian(1., orders),
alpha=.5,
color='gray',
linewidth=10)
#plt.yticks([])
plt.xlim(xmin=1, xmax=10)
plt.ylim(ymin=0)
plt.show()
def plot_two_curves():
orders = np.linspace(1., 100., endpoint=True, num=1000)
orders[0] = 1.001
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
ax.plot([], [])
ax.plot([], [])
for sigma in [100, 150]:
ax.plot(
orders,
pate.rdp_data_independent_gaussian(sigma, orders),
alpha=.3,
label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)),
linewidth=10)
plt.xlim(xmin=1, xmax=100)
plt.ylim(ymin=0)
plt.xticks([1, 20, 40, 60, 80, 100])
plt.yticks([0, .0025, .005, .0075, .01])
plt.xlabel(r'Order $\alpha$', fontsize=16)
plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16)
ax.tick_params(labelsize=14)
plt.legend(loc=0, fontsize=13)
plt.show()
def scatter_plot(votes, threshold, sigma1, sigma2, order):
fig, ax = plt.subplots()
fig.set_figheight(4.5)
fig.set_figwidth(4.7)
fig.patch.set_alpha(0)
x = []
y = []
for i, v in enumerate(votes):
if threshold is not None and sigma1 is not None:
q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v))
else:
q_step1 = 1.
if random.random() < q_step1:
logq_step2 = pate.compute_logq_gaussian(v, sigma2)
x.append(max(v))
y.append(pate.rdp_gaussian(logq_step2, sigma2, order))
print('Selected {} queries.'.format(len(x)))
#data_ind = pate.rdp_data_independent_gaussian(sigma, order)
#plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2)
ax.set_yscale('log')
plt.xlim(xmin=0, xmax=5000)
plt.ylim(ymin=1e-300, ymax=1)
plt.yticks([1, 1e-100, 1e-200, 1e-300])
plt.scatter(x, y, s = 1, alpha=0.5)
plt.ylabel(r'RDP at $\alpha={}$'.format(order), fontsize=16)
plt.xlabel(r'max count', fontsize=16)
ax.tick_params(labelsize=14)
plt.show()
def main(argv):
del argv # Unused.
fin_name = os.path.expanduser(FLAGS.counts_file)
print('Reading raw votes from ' + fin_name)
sys.stdout.flush()
#plot_one_curve()
#plot_two_curves()
votes = np.load(fin_name)
votes = votes[:12000,] # truncate to 4000 samples
v1 = [2550, 2200, 250] # based on votes[2,]
v2 = [2600, 2200, 200] # based on votes[381,]
plot_rdp_curve_per_example(np.array([v1, v2]), (100., 150.))
#v2 = [2600, 2200, 200] # based on votes[381,]
#plot_rdp_curve_per_example(np.array([v1]), (100., 150.))
#plot_rdp_of_sigma(np.array(v1), 20.)
plot_rdp_total(votes, (100., 150.))
#plot_rdp_total(votes, (100., 150.))
scatter_plot(votes[:6000, ], None, None, 100, 20)
scatter_plot(votes[:6000, ], 3500, 1500, 100, 20)
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment