Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
5a3c97b9
Commit
5a3c97b9
authored
Jul 23, 2018
by
Carlos Riquelme
Browse files
Added new model.
parent
ae8e0f53
Changes
40
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2810 additions
and
0 deletions
+2810
-0
research/deep_contextual_bandits/bandits/algorithms/multitask_gp.py
...eep_contextual_bandits/bandits/algorithms/multitask_gp.py
+374
-0
research/deep_contextual_bandits/bandits/algorithms/neural_bandit_model.py
...textual_bandits/bandits/algorithms/neural_bandit_model.py
+220
-0
research/deep_contextual_bandits/bandits/algorithms/neural_linear_sampling.py
...tual_bandits/bandits/algorithms/neural_linear_sampling.py
+180
-0
research/deep_contextual_bandits/bandits/algorithms/parameter_noise_sampling.py
...al_bandits/bandits/algorithms/parameter_noise_sampling.py
+187
-0
research/deep_contextual_bandits/bandits/algorithms/posterior_bnn_sampling.py
...tual_bandits/bandits/algorithms/posterior_bnn_sampling.py
+92
-0
research/deep_contextual_bandits/bandits/algorithms/uniform_sampling.py
...contextual_bandits/bandits/algorithms/uniform_sampling.py
+43
-0
research/deep_contextual_bandits/bandits/algorithms/variational_neural_bandit_model.py
...its/bandits/algorithms/variational_neural_bandit_model.py
+346
-0
research/deep_contextual_bandits/bandits/core/__pycache__/bandit_algorithm.cpython-36.pyc
.../bandits/core/__pycache__/bandit_algorithm.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/core/__pycache__/bayesian_nn.cpython-36.pyc
...ndits/bandits/core/__pycache__/bayesian_nn.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/core/__pycache__/contextual_bandit.cpython-36.pyc
...bandits/core/__pycache__/contextual_bandit.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/core/__pycache__/contextual_dataset.cpython-36.pyc
...andits/core/__pycache__/contextual_dataset.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/core/bandit_algorithm.py
.../deep_contextual_bandits/bandits/core/bandit_algorithm.py
+34
-0
research/deep_contextual_bandits/bandits/core/bayesian_nn.py
research/deep_contextual_bandits/bandits/core/bayesian_nn.py
+36
-0
research/deep_contextual_bandits/bandits/core/contextual_bandit.py
...deep_contextual_bandits/bandits/core/contextual_bandit.py
+125
-0
research/deep_contextual_bandits/bandits/core/contextual_dataset.py
...eep_contextual_bandits/bandits/core/contextual_dataset.py
+166
-0
research/deep_contextual_bandits/bandits/data/__pycache__/data_sampler.cpython-36.pyc
...dits/bandits/data/__pycache__/data_sampler.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/data/__pycache__/synthetic_data_sampler.cpython-36.pyc
...ts/data/__pycache__/synthetic_data_sampler.cpython-36.pyc
+0
-0
research/deep_contextual_bandits/bandits/data/data_sampler.py
...arch/deep_contextual_bandits/bandits/data/data_sampler.py
+374
-0
research/deep_contextual_bandits/bandits/data/synthetic_data_sampler.py
...contextual_bandits/bandits/data/synthetic_data_sampler.py
+179
-0
research/deep_contextual_bandits/example_main.py
research/deep_contextual_bandits/example_main.py
+454
-0
No files found.
research/deep_contextual_bandits/bandits/algorithms/multitask_gp.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A Multitask Gaussian process."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
bandits.core.bayesian_nn
import
BayesianNN
FLAGS
=
flags
.
FLAGS
tfd
=
tf
.
contrib
.
distributions
class
MultitaskGP
(
BayesianNN
):
"""Implements a Gaussian process with multi-task outputs.
Optimizes the hyperparameters over the log marginal likelihood.
Uses a Matern 3/2 + linear covariance and returns
sampled predictions for test inputs. The outputs are optionally
correlated where the correlation structure is learned through latent
embeddings of the tasks.
"""
def
__init__
(
self
,
hparams
):
self
.
name
=
"MultiTaskGP"
self
.
hparams
=
hparams
self
.
n_in
=
self
.
hparams
.
context_dim
self
.
n_out
=
self
.
hparams
.
num_outputs
self
.
keep_fixed_after_max_obs
=
self
.
hparams
.
keep_fixed_after_max_obs
self
.
_show_training
=
self
.
hparams
.
show_training
self
.
_freq_summary
=
self
.
hparams
.
freq_summary
# Dimensionality of the latent task vectors
self
.
task_latent_dim
=
self
.
hparams
.
task_latent_dim
# Maximum number of observations to include
self
.
max_num_points
=
self
.
hparams
.
max_num_points
if
self
.
hparams
.
learn_embeddings
:
self
.
learn_embeddings
=
self
.
hparams
.
learn_embeddings
else
:
self
.
learn_embeddings
=
False
# create the graph corresponding to the BNN instance
self
.
graph
=
tf
.
Graph
()
with
self
.
graph
.
as_default
():
# store a new session for the graph
self
.
sess
=
tf
.
Session
()
with
tf
.
variable_scope
(
self
.
name
,
reuse
=
tf
.
AUTO_REUSE
):
self
.
n
=
tf
.
placeholder
(
shape
=
[],
dtype
=
tf
.
float64
)
self
.
x
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_in
],
dtype
=
tf
.
float64
)
self
.
x_in
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_in
],
dtype
=
tf
.
float64
)
self
.
y
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_out
],
dtype
=
tf
.
float64
)
self
.
weights
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_out
],
dtype
=
tf
.
float64
)
self
.
build_model
()
self
.
sess
.
run
(
tf
.
global_variables_initializer
())
def
atleast_2d
(
self
,
x
,
dims
):
return
tf
.
reshape
(
tf
.
expand_dims
(
x
,
axis
=
0
),
(
-
1
,
dims
))
def
sq_dist
(
self
,
x
,
x2
):
a2
=
tf
.
reduce_sum
(
tf
.
square
(
x
),
1
)
b2
=
tf
.
reduce_sum
(
tf
.
square
(
x2
),
1
)
sqdists
=
tf
.
expand_dims
(
a2
,
1
)
+
b2
-
2.0
*
tf
.
matmul
(
x
,
tf
.
transpose
(
x2
))
return
sqdists
# Covariance between outputs
def
task_cov
(
self
,
x
,
x2
):
"""Squared Exponential Covariance Kernel over latent task embeddings."""
# Index into latent task vectors
x_vecs
=
tf
.
gather
(
self
.
task_vectors
,
tf
.
argmax
(
x
,
axis
=
1
),
axis
=
0
)
x2_vecs
=
tf
.
gather
(
self
.
task_vectors
,
tf
.
argmax
(
x2
,
axis
=
1
),
axis
=
0
)
r
=
self
.
sq_dist
(
self
.
atleast_2d
(
x_vecs
,
self
.
task_latent_dim
),
self
.
atleast_2d
(
x2_vecs
,
self
.
task_latent_dim
))
return
tf
.
exp
(
-
r
)
def
cov
(
self
,
x
,
x2
):
"""Matern 3/2 + Linear Gaussian Process Covariance Function."""
ls
=
tf
.
clip_by_value
(
self
.
length_scales
,
-
5.0
,
5.0
)
ls_lin
=
tf
.
clip_by_value
(
self
.
length_scales_lin
,
-
5.0
,
5.0
)
r
=
self
.
sq_dist
(
self
.
atleast_2d
(
x
,
self
.
n_in
)
/
tf
.
nn
.
softplus
(
ls
),
self
.
atleast_2d
(
x2
,
self
.
n_in
)
/
tf
.
nn
.
softplus
(
ls
))
r
=
tf
.
clip_by_value
(
r
,
0
,
1e8
)
# Matern 3/2 Covariance
matern
=
(
1.0
+
tf
.
sqrt
(
3.0
*
r
+
1e-16
))
*
tf
.
exp
(
-
tf
.
sqrt
(
3.0
*
r
+
1e-16
))
# Linear Covariance
lin
=
tf
.
matmul
(
x
/
tf
.
nn
.
softplus
(
ls_lin
),
x2
/
tf
.
nn
.
softplus
(
ls_lin
),
transpose_b
=
True
)
return
(
tf
.
nn
.
softplus
(
self
.
amplitude
)
*
matern
+
tf
.
nn
.
softplus
(
self
.
amplitude_linear
)
*
lin
)
def
build_model
(
self
):
"""Defines the GP model.
The loss is computed for partial feedback settings (bandits), so only
the observed outcome is backpropagated (see weighted loss).
Selects the optimizer and, finally, it also initializes the graph.
"""
logging
.
info
(
"Initializing model %s."
,
self
.
name
)
self
.
global_step
=
tf
.
train
.
get_or_create_global_step
()
# Define state for the model (inputs, etc.)
self
.
x_train
=
tf
.
get_variable
(
"training_data"
,
initializer
=
tf
.
ones
(
[
self
.
hparams
.
batch_size
,
self
.
n_in
],
dtype
=
tf
.
float64
),
validate_shape
=
False
,
trainable
=
False
)
self
.
y_train
=
tf
.
get_variable
(
"training_labels"
,
initializer
=
tf
.
zeros
([
self
.
hparams
.
batch_size
,
1
],
dtype
=
tf
.
float64
),
validate_shape
=
False
,
trainable
=
False
)
self
.
weights_train
=
tf
.
get_variable
(
"weights_train"
,
initializer
=
tf
.
ones
(
[
self
.
hparams
.
batch_size
,
self
.
n_out
],
dtype
=
tf
.
float64
),
validate_shape
=
False
,
trainable
=
False
)
self
.
input_op
=
tf
.
assign
(
self
.
x_train
,
self
.
x_in
,
validate_shape
=
False
)
self
.
input_w_op
=
tf
.
assign
(
self
.
weights_train
,
self
.
weights
,
validate_shape
=
False
)
self
.
input_std
=
tf
.
get_variable
(
"data_standard_deviation"
,
initializer
=
tf
.
ones
([
1
,
self
.
n_out
],
dtype
=
tf
.
float64
),
dtype
=
tf
.
float64
,
trainable
=
False
)
self
.
input_mean
=
tf
.
get_variable
(
"data_mean"
,
initializer
=
tf
.
zeros
([
1
,
self
.
n_out
],
dtype
=
tf
.
float64
),
dtype
=
tf
.
float64
,
trainable
=
True
)
# GP Hyperparameters
self
.
noise
=
tf
.
get_variable
(
"noise"
,
initializer
=
tf
.
cast
(
0.0
,
dtype
=
tf
.
float64
))
self
.
amplitude
=
tf
.
get_variable
(
"amplitude"
,
initializer
=
tf
.
cast
(
1.0
,
dtype
=
tf
.
float64
))
self
.
amplitude_linear
=
tf
.
get_variable
(
"linear_amplitude"
,
initializer
=
tf
.
cast
(
1.0
,
dtype
=
tf
.
float64
))
self
.
length_scales
=
tf
.
get_variable
(
"length_scales"
,
initializer
=
tf
.
zeros
([
1
,
self
.
n_in
],
dtype
=
tf
.
float64
))
self
.
length_scales_lin
=
tf
.
get_variable
(
"length_scales_linear"
,
initializer
=
tf
.
zeros
([
1
,
self
.
n_in
],
dtype
=
tf
.
float64
))
# Latent embeddings of the different outputs for task covariance
self
.
task_vectors
=
tf
.
get_variable
(
"latent_task_vectors"
,
initializer
=
tf
.
random_normal
(
[
self
.
n_out
,
self
.
task_latent_dim
],
dtype
=
tf
.
float64
))
# Normalize outputs across each dimension
# Since we have different numbers of observations across each task, we
# normalize by their respective counts.
index_counts
=
self
.
atleast_2d
(
tf
.
reduce_sum
(
self
.
weights
,
axis
=
0
),
self
.
n_out
)
index_counts
=
tf
.
where
(
index_counts
>
0
,
index_counts
,
tf
.
ones
(
tf
.
shape
(
index_counts
),
dtype
=
tf
.
float64
))
self
.
mean_op
=
tf
.
assign
(
self
.
input_mean
,
tf
.
reduce_sum
(
self
.
y
,
axis
=
0
)
/
index_counts
)
self
.
var_op
=
tf
.
assign
(
self
.
input_std
,
tf
.
sqrt
(
1e-4
+
tf
.
reduce_sum
(
tf
.
square
(
self
.
y
-
tf
.
reduce_sum
(
self
.
y
,
axis
=
0
)
/
index_counts
),
axis
=
0
)
/
index_counts
))
with
tf
.
control_dependencies
([
self
.
var_op
]):
y_normed
=
self
.
atleast_2d
(
(
self
.
y
-
self
.
input_mean
)
/
self
.
input_std
,
self
.
n_out
)
y_normed
=
self
.
atleast_2d
(
tf
.
boolean_mask
(
y_normed
,
self
.
weights
>
0
),
1
)
self
.
out_op
=
tf
.
assign
(
self
.
y_train
,
y_normed
,
validate_shape
=
False
)
# Observation noise
alpha
=
tf
.
nn
.
softplus
(
self
.
noise
)
+
1e-6
# Covariance
with
tf
.
control_dependencies
([
self
.
input_op
,
self
.
input_w_op
,
self
.
out_op
]):
self
.
self_cov
=
(
self
.
cov
(
self
.
x_in
,
self
.
x_in
)
*
self
.
task_cov
(
self
.
weights
,
self
.
weights
)
+
tf
.
eye
(
tf
.
shape
(
self
.
x_in
)[
0
],
dtype
=
tf
.
float64
)
*
alpha
)
self
.
chol
=
tf
.
cholesky
(
self
.
self_cov
)
self
.
kinv
=
tf
.
cholesky_solve
(
self
.
chol
,
tf
.
eye
(
tf
.
shape
(
self
.
x_in
)[
0
],
dtype
=
tf
.
float64
))
self
.
input_inv
=
tf
.
Variable
(
tf
.
eye
(
self
.
hparams
.
batch_size
,
dtype
=
tf
.
float64
),
validate_shape
=
False
,
trainable
=
False
)
self
.
input_cov_op
=
tf
.
assign
(
self
.
input_inv
,
self
.
kinv
,
validate_shape
=
False
)
# Log determinant by taking the singular values along the diagonal
# of self.chol
with
tf
.
control_dependencies
([
self
.
input_cov_op
]):
logdet
=
2.0
*
tf
.
reduce_sum
(
tf
.
log
(
tf
.
diag_part
(
self
.
chol
)
+
1e-16
))
# Log Marginal likelihood
self
.
marginal_ll
=
-
tf
.
reduce_sum
(
-
0.5
*
tf
.
matmul
(
tf
.
transpose
(
y_normed
),
tf
.
matmul
(
self
.
kinv
,
y_normed
))
-
0.5
*
logdet
-
0.5
*
self
.
n
*
np
.
log
(
2
*
np
.
pi
))
zero
=
tf
.
cast
(
0.
,
dtype
=
tf
.
float64
)
one
=
tf
.
cast
(
1.
,
dtype
=
tf
.
float64
)
standard_normal
=
tfd
.
Normal
(
loc
=
zero
,
scale
=
one
)
# Loss is marginal likelihood and priors
self
.
loss
=
tf
.
reduce_sum
(
self
.
marginal_ll
-
(
standard_normal
.
log_prob
(
self
.
amplitude
)
+
standard_normal
.
log_prob
(
tf
.
exp
(
self
.
noise
))
+
standard_normal
.
log_prob
(
self
.
amplitude_linear
)
+
tfd
.
Normal
(
loc
=
zero
,
scale
=
one
*
10.
).
log_prob
(
self
.
task_vectors
))
)
# Optimizer for hyperparameters
optimizer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
=
self
.
hparams
.
lr
)
vars_to_optimize
=
[
self
.
amplitude
,
self
.
length_scales
,
self
.
length_scales_lin
,
self
.
amplitude_linear
,
self
.
noise
,
self
.
input_mean
]
if
self
.
learn_embeddings
:
vars_to_optimize
.
append
(
self
.
task_vectors
)
grads
=
optimizer
.
compute_gradients
(
self
.
loss
,
vars_to_optimize
)
self
.
train_op
=
optimizer
.
apply_gradients
(
grads
,
global_step
=
self
.
global_step
)
# Predictions for test data
self
.
y_mean
,
self
.
y_pred
=
self
.
posterior_mean_and_sample
(
self
.
x
)
# create tensorboard metrics
self
.
create_summaries
()
self
.
summary_writer
=
tf
.
summary
.
FileWriter
(
"{}/graph_{}"
.
format
(
FLAGS
.
logdir
,
self
.
name
),
self
.
sess
.
graph
)
self
.
check
=
tf
.
add_check_numerics_ops
()
def
posterior_mean_and_sample
(
self
,
candidates
):
"""Draw samples for test predictions.
Given a Tensor of 'candidates' inputs, returns samples from the posterior
and the posterior mean prediction for those inputs.
Args:
candidates: A (num-examples x num-dims) Tensor containing the inputs for
which to return predictions.
Returns:
y_mean: The posterior mean prediction given these inputs
y_sample: A sample from the posterior of the outputs given these inputs
"""
# Cross-covariance for test predictions
w
=
tf
.
identity
(
self
.
weights_train
)
inds
=
tf
.
squeeze
(
tf
.
reshape
(
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
self
.
n_out
),
(
self
.
n_out
,
1
)),
(
1
,
tf
.
shape
(
candidates
)[
0
])),
(
-
1
,
1
)))
cross_cov
=
self
.
cov
(
tf
.
tile
(
candidates
,
[
self
.
n_out
,
1
]),
self
.
x_train
)
cross_task_cov
=
self
.
task_cov
(
tf
.
one_hot
(
inds
,
self
.
n_out
),
w
)
cross_cov
*=
cross_task_cov
# Test mean prediction
y_mean
=
tf
.
matmul
(
cross_cov
,
tf
.
matmul
(
self
.
input_inv
,
self
.
y_train
))
# Test sample predictions
# Note this can be done much more efficiently using Kronecker products
# if all tasks are fully observed (which we won't assume)
test_cov
=
(
self
.
cov
(
tf
.
tile
(
candidates
,
[
self
.
n_out
,
1
]),
tf
.
tile
(
candidates
,
[
self
.
n_out
,
1
]))
*
self
.
task_cov
(
tf
.
one_hot
(
inds
,
self
.
n_out
),
tf
.
one_hot
(
inds
,
self
.
n_out
))
-
tf
.
matmul
(
cross_cov
,
tf
.
matmul
(
self
.
input_inv
,
tf
.
transpose
(
cross_cov
))))
# Get the matrix square root through an SVD for drawing samples
# This seems more numerically stable than the Cholesky
s
,
_
,
v
=
tf
.
svd
(
test_cov
,
full_matrices
=
True
)
test_sqrt
=
tf
.
matmul
(
v
,
tf
.
matmul
(
tf
.
diag
(
s
),
tf
.
transpose
(
v
)))
y_sample
=
(
tf
.
matmul
(
test_sqrt
,
tf
.
random_normal
([
tf
.
shape
(
test_sqrt
)[
0
],
1
],
dtype
=
tf
.
float64
))
+
y_mean
)
y_sample
=
(
tf
.
transpose
(
tf
.
reshape
(
y_sample
,
(
self
.
n_out
,
-
1
)))
*
self
.
input_std
+
self
.
input_mean
)
return
y_mean
,
y_sample
def
create_summaries
(
self
):
with
self
.
graph
.
as_default
():
tf
.
summary
.
scalar
(
"loss"
,
self
.
loss
)
tf
.
summary
.
scalar
(
"log_noise"
,
self
.
noise
)
tf
.
summary
.
scalar
(
"log_amp"
,
self
.
amplitude
)
tf
.
summary
.
scalar
(
"log_amp_lin"
,
self
.
amplitude_linear
)
tf
.
summary
.
histogram
(
"length_scales"
,
self
.
length_scales
)
tf
.
summary
.
histogram
(
"length_scales_lin"
,
self
.
length_scales_lin
)
self
.
summary_op
=
tf
.
summary
.
merge_all
()
def
train
(
self
,
data
,
num_steps
):
"""Trains the GP for num_steps, using the data in 'data'.
Args:
data: ContextualDataset object that provides the data.
num_steps: Number of minibatches to train the network for.
"""
logging
.
info
(
"Training %s for %d steps..."
,
self
.
name
,
num_steps
)
for
step
in
range
(
num_steps
):
numpts
=
min
(
data
.
num_points
(
None
),
self
.
max_num_points
)
if
numpts
>=
self
.
max_num_points
and
self
.
keep_fixed_after_max_obs
:
x
=
data
.
contexts
[:
numpts
,
:]
y
=
data
.
rewards
[:
numpts
,
:]
weights
=
np
.
zeros
((
x
.
shape
[
0
],
self
.
n_out
))
for
i
,
val
in
enumerate
(
data
.
actions
[:
numpts
]):
weights
[
i
,
val
]
=
1.0
else
:
x
,
y
,
weights
=
data
.
get_batch_with_weights
(
numpts
)
ops
=
[
self
.
global_step
,
self
.
summary_op
,
self
.
loss
,
self
.
noise
,
self
.
amplitude
,
self
.
amplitude_linear
,
self
.
length_scales
,
self
.
length_scales_lin
,
self
.
input_cov_op
,
self
.
input_op
,
self
.
var_op
,
self
.
input_w_op
,
self
.
out_op
,
self
.
train_op
]
res
=
self
.
sess
.
run
(
ops
,
feed_dict
=
{
self
.
x
:
x
,
self
.
x_in
:
x
,
self
.
y
:
y
,
self
.
weights
:
weights
,
self
.
n
:
numpts
,
})
if
step
%
self
.
_freq_summary
==
0
:
if
self
.
_show_training
:
logging
.
info
(
"step: %d, loss: %g noise: %f amp: %f amp_lin: %f"
,
step
,
res
[
2
],
res
[
3
],
res
[
4
],
res
[
5
])
summary
=
res
[
1
]
global_step
=
res
[
0
]
self
.
summary_writer
.
add_summary
(
summary
,
global_step
=
global_step
)
research/deep_contextual_bandits/bandits/algorithms/neural_bandit_model.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define a family of neural network architectures for bandits.
The network accepts different type of optimizers that could lead to different
approximations of the posterior distribution or simply to point estimates.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
absl
import
flags
from
bandits.core.bayesian_nn
import
BayesianNN
FLAGS
=
flags
.
FLAGS
class
NeuralBanditModel
(
BayesianNN
):
"""Implements a neural network for bandit problems."""
def
__init__
(
self
,
optimizer
,
hparams
,
name
):
"""Saves hyper-params and builds the Tensorflow graph."""
self
.
opt_name
=
optimizer
self
.
name
=
name
self
.
hparams
=
hparams
self
.
verbose
=
getattr
(
self
.
hparams
,
"verbose"
,
True
)
self
.
times_trained
=
0
self
.
build_model
()
def
build_layer
(
self
,
x
,
num_units
):
"""Builds a layer with input x; dropout and layer norm if specified."""
init_s
=
self
.
hparams
.
init_scale
layer_n
=
getattr
(
self
.
hparams
,
"layer_norm"
,
False
)
dropout
=
getattr
(
self
.
hparams
,
"use_dropout"
,
False
)
nn
=
tf
.
contrib
.
layers
.
fully_connected
(
x
,
num_units
,
activation_fn
=
self
.
hparams
.
activation
,
normalizer_fn
=
None
if
not
layer_n
else
tf
.
contrib
.
layers
.
layer_norm
,
normalizer_params
=
{},
weights_initializer
=
tf
.
random_uniform_initializer
(
-
init_s
,
init_s
)
)
if
dropout
:
nn
=
tf
.
nn
.
dropout
(
nn
,
self
.
hparams
.
keep_prob
)
return
nn
def
forward_pass
(
self
):
init_s
=
self
.
hparams
.
init_scale
scope_name
=
"prediction_{}"
.
format
(
self
.
name
)
with
tf
.
variable_scope
(
scope_name
,
reuse
=
tf
.
AUTO_REUSE
):
nn
=
self
.
x
for
num_units
in
self
.
hparams
.
layer_sizes
:
if
num_units
>
0
:
nn
=
self
.
build_layer
(
nn
,
num_units
)
y_pred
=
tf
.
layers
.
dense
(
nn
,
self
.
hparams
.
num_actions
,
kernel_initializer
=
tf
.
random_uniform_initializer
(
-
init_s
,
init_s
))
return
nn
,
y_pred
def
build_model
(
self
):
"""Defines the actual NN model with fully connected layers.
The loss is computed for partial feedback settings (bandits), so only
the observed outcome is backpropagated (see weighted loss).
Selects the optimizer and, finally, it also initializes the graph.
"""
# create and store the graph corresponding to the BNN instance
self
.
graph
=
tf
.
Graph
()
with
self
.
graph
.
as_default
():
# create and store a new session for the graph
self
.
sess
=
tf
.
Session
()
with
tf
.
name_scope
(
self
.
name
):
self
.
global_step
=
tf
.
train
.
get_or_create_global_step
()
# context
self
.
x
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
hparams
.
context_dim
],
dtype
=
tf
.
float32
,
name
=
"{}_x"
.
format
(
self
.
name
))
# reward vector
self
.
y
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
hparams
.
num_actions
],
dtype
=
tf
.
float32
,
name
=
"{}_y"
.
format
(
self
.
name
))
# weights (1 for selected action, 0 otherwise)
self
.
weights
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
hparams
.
num_actions
],
dtype
=
tf
.
float32
,
name
=
"{}_w"
.
format
(
self
.
name
))
# with tf.variable_scope("prediction_{}".format(self.name)):
self
.
nn
,
self
.
y_pred
=
self
.
forward_pass
()
self
.
loss
=
tf
.
squared_difference
(
self
.
y_pred
,
self
.
y
)
self
.
weighted_loss
=
tf
.
multiply
(
self
.
weights
,
self
.
loss
)
self
.
cost
=
tf
.
reduce_sum
(
self
.
weighted_loss
)
/
self
.
hparams
.
batch_size
if
self
.
hparams
.
activate_decay
:
self
.
lr
=
tf
.
train
.
inverse_time_decay
(
self
.
hparams
.
initial_lr
,
self
.
global_step
,
1
,
self
.
hparams
.
lr_decay_rate
)
else
:
self
.
lr
=
tf
.
Variable
(
self
.
hparams
.
initial_lr
,
trainable
=
False
)
# create tensorboard metrics
self
.
create_summaries
()
self
.
summary_writer
=
tf
.
summary
.
FileWriter
(
"{}/graph_{}"
.
format
(
FLAGS
.
logdir
,
self
.
name
),
self
.
sess
.
graph
)
tvars
=
tf
.
trainable_variables
()
grads
,
_
=
tf
.
clip_by_global_norm
(
tf
.
gradients
(
self
.
cost
,
tvars
),
self
.
hparams
.
max_grad_norm
)
self
.
optimizer
=
self
.
select_optimizer
()
self
.
train_op
=
self
.
optimizer
.
apply_gradients
(
zip
(
grads
,
tvars
),
global_step
=
self
.
global_step
)
self
.
init
=
tf
.
global_variables_initializer
()
self
.
initialize_graph
()
def
initialize_graph
(
self
):
"""Initializes all variables."""
with
self
.
graph
.
as_default
():
if
self
.
verbose
:
print
(
"Initializing model {}."
.
format
(
self
.
name
))
self
.
sess
.
run
(
self
.
init
)
def
assign_lr
(
self
):
"""Resets the learning rate in dynamic schedules for subsequent trainings.
In bandits settings, we do expand our dataset over time. Then, we need to
re-train the network with the new data. The algorithms that do not keep
the step constant, can reset it at the start of each *training* process.
"""
decay_steps
=
1
if
self
.
hparams
.
activate_decay
:
current_gs
=
self
.
sess
.
run
(
self
.
global_step
)
with
self
.
graph
.
as_default
():
self
.
lr
=
tf
.
train
.
inverse_time_decay
(
self
.
hparams
.
initial_lr
,
self
.
global_step
-
current_gs
,
decay_steps
,
self
.
hparams
.
lr_decay_rate
)
def
select_optimizer
(
self
):
"""Selects optimizer. To be extended (SGLD, KFAC, etc)."""
return
tf
.
train
.
RMSPropOptimizer
(
self
.
lr
)
def
create_summaries
(
self
):
"""Defines summaries including mean loss, learning rate, and global step."""
with
self
.
graph
.
as_default
():
with
tf
.
name_scope
(
self
.
name
+
"_summaries"
):
tf
.
summary
.
scalar
(
"cost"
,
self
.
cost
)
tf
.
summary
.
scalar
(
"lr"
,
self
.
lr
)
tf
.
summary
.
scalar
(
"global_step"
,
self
.
global_step
)
self
.
summary_op
=
tf
.
summary
.
merge_all
()
def
train
(
self
,
data
,
num_steps
):
"""Trains the network for num_steps, using the provided data.
Args:
data: ContextualDataset object that provides the data.
num_steps: Number of minibatches to train the network for.
"""
if
self
.
verbose
:
print
(
"Training {} for {} steps..."
.
format
(
self
.
name
,
num_steps
))
with
self
.
graph
.
as_default
():
for
step
in
range
(
num_steps
):
x
,
y
,
w
=
data
.
get_batch_with_weights
(
self
.
hparams
.
batch_size
)
_
,
cost
,
summary
,
lr
=
self
.
sess
.
run
(
[
self
.
train_op
,
self
.
cost
,
self
.
summary_op
,
self
.
lr
],
feed_dict
=
{
self
.
x
:
x
,
self
.
y
:
y
,
self
.
weights
:
w
})
if
step
%
self
.
hparams
.
freq_summary
==
0
:
if
self
.
hparams
.
show_training
:
print
(
"{} | step: {}, lr: {}, loss: {}"
.
format
(
self
.
name
,
step
,
lr
,
cost
))
self
.
summary_writer
.
add_summary
(
summary
,
step
)
self
.
times_trained
+=
1
research/deep_contextual_bandits/bandits/algorithms/neural_linear_sampling.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Thompson Sampling with linear posterior over a learnt deep representation."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
scipy.stats
import
invgamma
from
bandits.core.bandit_algorithm
import
BanditAlgorithm
from
bandits.core.contextual_dataset
import
ContextualDataset
from
bandits.algorithms.neural_bandit_model
import
NeuralBanditModel
class
NeuralLinearPosteriorSampling
(
BanditAlgorithm
):
"""Full Bayesian linear regression on the last layer of a deep neural net."""
def
__init__
(
self
,
name
,
hparams
,
optimizer
=
'RMS'
):
self
.
name
=
name
self
.
hparams
=
hparams
self
.
latent_dim
=
self
.
hparams
.
layer_sizes
[
-
1
]
# Gaussian prior for each beta_i
self
.
_lambda_prior
=
self
.
hparams
.
lambda_prior
self
.
mu
=
[
np
.
zeros
(
self
.
latent_dim
)
for
_
in
range
(
self
.
hparams
.
num_actions
)
]
self
.
cov
=
[(
1.0
/
self
.
lambda_prior
)
*
np
.
eye
(
self
.
latent_dim
)
for
_
in
range
(
self
.
hparams
.
num_actions
)]
self
.
precision
=
[
self
.
lambda_prior
*
np
.
eye
(
self
.
latent_dim
)
for
_
in
range
(
self
.
hparams
.
num_actions
)
]
# Inverse Gamma prior for each sigma2_i
self
.
_a0
=
self
.
hparams
.
a0
self
.
_b0
=
self
.
hparams
.
b0
self
.
a
=
[
self
.
_a0
for
_
in
range
(
self
.
hparams
.
num_actions
)]
self
.
b
=
[
self
.
_b0
for
_
in
range
(
self
.
hparams
.
num_actions
)]
# Regression and NN Update Frequency
self
.
update_freq_lr
=
hparams
.
training_freq
self
.
update_freq_nn
=
hparams
.
training_freq_network
self
.
t
=
0
self
.
optimizer_n
=
optimizer
self
.
num_epochs
=
hparams
.
training_epochs
self
.
data_h
=
ContextualDataset
(
hparams
.
context_dim
,
hparams
.
num_actions
,
intercept
=
False
)
self
.
latent_h
=
ContextualDataset
(
self
.
latent_dim
,
hparams
.
num_actions
,
intercept
=
False
)
self
.
bnn
=
NeuralBanditModel
(
optimizer
,
hparams
,
'{}-bnn'
.
format
(
name
))
def
action
(
self
,
context
):
"""Samples beta's from posterior, and chooses best action accordingly."""
# Round robin until each action has been selected "initial_pulls" times
if
self
.
t
<
self
.
hparams
.
num_actions
*
self
.
hparams
.
initial_pulls
:
return
self
.
t
%
self
.
hparams
.
num_actions
# Sample sigma2, and beta conditional on sigma2
sigma2_s
=
[
self
.
b
[
i
]
*
invgamma
.
rvs
(
self
.
a
[
i
])
for
i
in
range
(
self
.
hparams
.
num_actions
)
]
try
:
beta_s
=
[
np
.
random
.
multivariate_normal
(
self
.
mu
[
i
],
sigma2_s
[
i
]
*
self
.
cov
[
i
])
for
i
in
range
(
self
.
hparams
.
num_actions
)
]
except
np
.
linalg
.
LinAlgError
as
e
:
# Sampling could fail if covariance is not positive definite
print
(
'Exception when sampling for {}.'
.
format
(
self
.
name
))
print
(
'Details: {} | {}.'
.
format
(
e
.
message
,
e
.
args
))
d
=
self
.
latent_dim
beta_s
=
[
np
.
random
.
multivariate_normal
(
np
.
zeros
((
d
)),
np
.
eye
(
d
))
for
i
in
range
(
self
.
hparams
.
num_actions
)
]
# Compute last-layer representation for the current context
with
self
.
bnn
.
graph
.
as_default
():
c
=
context
.
reshape
((
1
,
self
.
hparams
.
context_dim
))
z_context
=
self
.
bnn
.
sess
.
run
(
self
.
bnn
.
nn
,
feed_dict
=
{
self
.
bnn
.
x
:
c
})
# Apply Thompson Sampling to last-layer representation
vals
=
[
np
.
dot
(
beta_s
[
i
],
z_context
.
T
)
for
i
in
range
(
self
.
hparams
.
num_actions
)
]
return
np
.
argmax
(
vals
)
def
update
(
self
,
context
,
action
,
reward
):
"""Updates the posterior using linear bayesian regression formula."""
self
.
t
+=
1
self
.
data_h
.
add
(
context
,
action
,
reward
)
c
=
context
.
reshape
((
1
,
self
.
hparams
.
context_dim
))
z_context
=
self
.
bnn
.
sess
.
run
(
self
.
bnn
.
nn
,
feed_dict
=
{
self
.
bnn
.
x
:
c
})
self
.
latent_h
.
add
(
z_context
,
action
,
reward
)
# Retrain the network on the original data (data_h)
if
self
.
t
%
self
.
update_freq_nn
==
0
:
if
self
.
hparams
.
reset_lr
:
self
.
bnn
.
assign_lr
()
self
.
bnn
.
train
(
self
.
data_h
,
self
.
num_epochs
)
# Update the latent representation of every datapoint collected so far
new_z
=
self
.
bnn
.
sess
.
run
(
self
.
bnn
.
nn
,
feed_dict
=
{
self
.
bnn
.
x
:
self
.
data_h
.
contexts
})
self
.
latent_h
.
replace_data
(
contexts
=
new_z
)
# Update the Bayesian Linear Regression
if
self
.
t
%
self
.
update_freq_lr
==
0
:
# Find all the actions to update
actions_to_update
=
self
.
latent_h
.
actions
[:
-
self
.
update_freq_lr
]
for
action_v
in
np
.
unique
(
actions_to_update
):
# Update action posterior with formulas: \beta | z,y ~ N(mu_q, cov_q)
z
,
y
=
self
.
latent_h
.
get_data
(
action_v
)
# The algorithm could be improved with sequential formulas (cheaper)
s
=
np
.
dot
(
z
.
T
,
z
)
# Some terms are removed as we assume prior mu_0 = 0.
precision_a
=
s
+
self
.
lambda_prior
*
np
.
eye
(
self
.
latent_dim
)
cov_a
=
np
.
linalg
.
inv
(
precision_a
)
mu_a
=
np
.
dot
(
cov_a
,
np
.
dot
(
z
.
T
,
y
))
# Inverse Gamma posterior update
a_post
=
self
.
a0
+
z
.
shape
[
0
]
/
2.0
b_upd
=
0.5
*
np
.
dot
(
y
.
T
,
y
)
b_upd
-=
0.5
*
np
.
dot
(
mu_a
.
T
,
np
.
dot
(
precision_a
,
mu_a
))
b_post
=
self
.
b0
+
b_upd
# Store new posterior distributions
self
.
mu
[
action_v
]
=
mu_a
self
.
cov
[
action_v
]
=
cov_a
self
.
precision
[
action_v
]
=
precision_a
self
.
a
[
action_v
]
=
a_post
self
.
b
[
action_v
]
=
b_post
@
property
def
a0
(
self
):
return
self
.
_a0
@
property
def
b0
(
self
):
return
self
.
_b0
@
property
def
lambda_prior
(
self
):
return
self
.
_lambda_prior
research/deep_contextual_bandits/bandits/algorithms/parameter_noise_sampling.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual algorithm based on Thompson Sampling + direct noise injection."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
scipy.special
import
logsumexp
import
tensorflow
as
tf
from
absl
import
flags
from
bandits.core.bandit_algorithm
import
BanditAlgorithm
from
bandits.core.contextual_dataset
import
ContextualDataset
from
bandits.algorithms.neural_bandit_model
import
NeuralBanditModel
FLAGS
=
flags
.
FLAGS
class
ParameterNoiseSampling
(
BanditAlgorithm
):
"""Parameter Noise Sampling algorithm based on adding noise to net params.
Described in https://arxiv.org/abs/1706.01905
"""
def
__init__
(
self
,
name
,
hparams
):
"""Creates the algorithm, and sets up the adaptive Gaussian noise."""
self
.
name
=
name
self
.
hparams
=
hparams
self
.
verbose
=
getattr
(
self
.
hparams
,
'verbose'
,
True
)
self
.
noise_std
=
getattr
(
self
.
hparams
,
'noise_std'
,
0.005
)
self
.
eps
=
getattr
(
self
.
hparams
,
'eps'
,
0.05
)
self
.
d_samples
=
getattr
(
self
.
hparams
,
'd_samples'
,
300
)
self
.
optimizer
=
getattr
(
self
.
hparams
,
'optimizer'
,
'RMS'
)
# keep track of noise heuristic statistics
self
.
std_h
=
[
self
.
noise_std
]
self
.
eps_h
=
[
self
.
eps
]
self
.
kl_h
=
[]
self
.
t
=
0
self
.
freq_update
=
hparams
.
training_freq
self
.
num_epochs
=
hparams
.
training_epochs
self
.
data_h
=
ContextualDataset
(
hparams
.
context_dim
,
hparams
.
num_actions
,
hparams
.
buffer_s
)
self
.
bnn
=
NeuralBanditModel
(
self
.
optimizer
,
hparams
,
'{}-bnn'
.
format
(
name
))
with
self
.
bnn
.
graph
.
as_default
():
# noise-injection std placeholder
self
.
bnn
.
noise_std_ph
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
())
# create noise corruption op; adds noise to all weights
tvars
=
tf
.
trainable_variables
()
self
.
bnn
.
noisy_grads
=
[
tf
.
random_normal
(
v
.
get_shape
(),
0
,
self
.
bnn
.
noise_std_ph
)
for
v
in
tvars
]
# add noise to all params, then compute prediction, then subtract.
with
tf
.
control_dependencies
(
self
.
bnn
.
noisy_grads
):
self
.
bnn
.
noise_add_ops
=
[
tvars
[
i
].
assign_add
(
n
)
for
i
,
n
in
enumerate
(
self
.
bnn
.
noisy_grads
)
]
with
tf
.
control_dependencies
(
self
.
bnn
.
noise_add_ops
):
# we force the prediction for 'y' to be recomputed after adding noise
self
.
bnn
.
noisy_nn
,
self
.
bnn
.
noisy_pred_val
=
self
.
bnn
.
forward_pass
()
self
.
bnn
.
noisy_pred
=
tf
.
identity
(
self
.
bnn
.
noisy_pred_val
)
with
tf
.
control_dependencies
([
tf
.
identity
(
self
.
bnn
.
noisy_pred
)]):
self
.
bnn
.
noise_sub_ops
=
[
tvars
[
i
].
assign_add
(
-
n
)
for
i
,
n
in
enumerate
(
self
.
bnn
.
noisy_grads
)
]
def
action
(
self
,
context
):
"""Selects action based on Thompson Sampling *after* adding noise."""
if
self
.
t
<
self
.
hparams
.
num_actions
*
self
.
hparams
.
initial_pulls
:
# round robin until each action has been taken "initial_pulls" times
return
self
.
t
%
self
.
hparams
.
num_actions
with
self
.
bnn
.
graph
.
as_default
():
# run noise prediction op to choose action, and subtract noise op after.
c
=
context
.
reshape
((
1
,
self
.
hparams
.
context_dim
))
output
,
_
=
self
.
bnn
.
sess
.
run
(
[
self
.
bnn
.
noisy_pred
,
self
.
bnn
.
noise_sub_ops
],
feed_dict
=
{
self
.
bnn
.
x
:
c
,
self
.
bnn
.
noise_std_ph
:
self
.
noise_std
})
return
np
.
argmax
(
output
)
def
update
(
self
,
context
,
action
,
reward
):
"""Updates the data buffer, and re-trains the BNN and noise level."""
self
.
t
+=
1
self
.
data_h
.
add
(
context
,
action
,
reward
)
if
self
.
t
%
self
.
freq_update
==
0
:
self
.
bnn
.
train
(
self
.
data_h
,
self
.
num_epochs
)
self
.
update_noise
()
def
update_noise
(
self
):
"""Increase noise if distance btw original and corrupted distrib small."""
kl
=
self
.
compute_distance
()
delta
=
-
np
.
log1p
(
-
self
.
eps
+
self
.
eps
/
self
.
hparams
.
num_actions
)
if
kl
<
delta
:
self
.
noise_std
*=
1.01
else
:
self
.
noise_std
/=
1.01
self
.
eps
*=
0.99
if
self
.
verbose
:
print
(
'Update eps={} | kl={} | std={} | delta={} | increase={}.'
.
format
(
self
.
eps
,
kl
,
self
.
noise_std
,
delta
,
kl
<
delta
))
# store noise-injection statistics for inspection: std, KL, eps.
self
.
std_h
.
append
(
self
.
noise_std
)
self
.
kl_h
.
append
(
kl
)
self
.
eps_h
.
append
(
self
.
eps
)
def
compute_distance
(
self
):
"""Computes empirical KL for original and corrupted output distributions."""
random_inputs
,
_
=
self
.
data_h
.
get_batch
(
self
.
d_samples
)
y_model
=
self
.
bnn
.
sess
.
run
(
self
.
bnn
.
y_pred
,
feed_dict
=
{
self
.
bnn
.
x
:
random_inputs
,
self
.
bnn
.
noise_std_ph
:
self
.
noise_std
})
y_noisy
,
_
=
self
.
bnn
.
sess
.
run
(
[
self
.
bnn
.
noisy_pred
,
self
.
bnn
.
noise_sub_ops
],
feed_dict
=
{
self
.
bnn
.
x
:
random_inputs
,
self
.
bnn
.
noise_std_ph
:
self
.
noise_std
})
if
self
.
verbose
:
# display how often original & perturbed models propose different actions
s
=
np
.
sum
([
np
.
argmax
(
y_model
[
i
,
:])
==
np
.
argmax
(
y_noisy
[
i
,
:])
for
i
in
range
(
y_model
.
shape
[
0
])])
print
(
'{} | % of agreement btw original / corrupted actions: {}.'
.
format
(
self
.
name
,
s
/
self
.
d_samples
))
kl
=
self
.
compute_kl_with_logits
(
y_model
,
y_noisy
)
return
kl
def
compute_kl_with_logits
(
self
,
logits1
,
logits2
):
"""Computes KL from logits samples from two distributions."""
def
exp_times_diff
(
a
,
b
):
return
np
.
multiply
(
np
.
exp
(
a
),
a
-
b
)
logsumexp1
=
logsumexp
(
logits1
,
axis
=
1
)
logsumexp2
=
logsumexp
(
logits2
,
axis
=
1
)
logsumexp_diff
=
logsumexp2
-
logsumexp1
exp_diff
=
exp_times_diff
(
logits1
,
logits2
)
exp_diff
=
np
.
sum
(
exp_diff
,
axis
=
1
)
inv_exp_sum
=
np
.
sum
(
np
.
exp
(
logits1
),
axis
=
1
)
term1
=
np
.
divide
(
exp_diff
,
inv_exp_sum
)
kl
=
term1
+
logsumexp_diff
kl
=
np
.
maximum
(
kl
,
0.0
)
kl
=
np
.
nan_to_num
(
kl
)
return
np
.
mean
(
kl
)
research/deep_contextual_bandits/bandits/algorithms/posterior_bnn_sampling.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual bandit algorithm based on Thompson Sampling and a Bayesian NN."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
bandits.core.bandit_algorithm
import
BanditAlgorithm
from
bandits.algorithms.bb_alpha_divergence_model
import
BBAlphaDivergence
from
bandits.algorithms.bf_variational_neural_bandit_model
import
BfVariationalNeuralBanditModel
from
bandits.core.contextual_dataset
import
ContextualDataset
from
bandits.algorithms.multitask_gp
import
MultitaskGP
from
bandits.algorithms.neural_bandit_model
import
NeuralBanditModel
from
bandits.algorithms.variational_neural_bandit_model
import
VariationalNeuralBanditModel
class
PosteriorBNNSampling
(
BanditAlgorithm
):
"""Posterior Sampling algorithm based on a Bayesian neural network."""
def
__init__
(
self
,
name
,
hparams
,
bnn_model
=
'RMSProp'
):
"""Creates a PosteriorBNNSampling object based on a specific optimizer.
The algorithm has two basic tools: an Approx BNN and a Contextual Dataset.
The Bayesian Network keeps the posterior based on the optimizer iterations.
Args:
name: Name of the algorithm.
hparams: Hyper-parameters of the algorithm.
bnn_model: Type of BNN. By default RMSProp (point estimate).
"""
self
.
name
=
name
self
.
hparams
=
hparams
self
.
optimizer_n
=
hparams
.
optimizer
self
.
training_freq
=
hparams
.
training_freq
self
.
training_epochs
=
hparams
.
training_epochs
self
.
t
=
0
self
.
data_h
=
ContextualDataset
(
hparams
.
context_dim
,
hparams
.
num_actions
,
hparams
.
buffer_s
)
# to be extended with more BNNs (BB alpha-div, GPs, SGFS, constSGD...)
bnn_name
=
'{}-bnn'
.
format
(
name
)
if
bnn_model
==
'Variational'
:
self
.
bnn
=
VariationalNeuralBanditModel
(
hparams
,
bnn_name
)
elif
bnn_model
==
'AlphaDiv'
:
self
.
bnn
=
BBAlphaDivergence
(
hparams
,
bnn_name
)
elif
bnn_model
==
'Variational_BF'
:
self
.
bnn
=
BfVariationalNeuralBanditModel
(
hparams
,
bnn_name
)
elif
bnn_model
==
'GP'
:
self
.
bnn
=
MultitaskGP
(
hparams
)
else
:
self
.
bnn
=
NeuralBanditModel
(
self
.
optimizer_n
,
hparams
,
bnn_name
)
def
action
(
self
,
context
):
"""Selects action for context based on Thompson Sampling using the BNN."""
if
self
.
t
<
self
.
hparams
.
num_actions
*
self
.
hparams
.
initial_pulls
:
# round robin until each action has been taken "initial_pulls" times
return
self
.
t
%
self
.
hparams
.
num_actions
with
self
.
bnn
.
graph
.
as_default
():
c
=
context
.
reshape
((
1
,
self
.
hparams
.
context_dim
))
output
=
self
.
bnn
.
sess
.
run
(
self
.
bnn
.
y_pred
,
feed_dict
=
{
self
.
bnn
.
x
:
c
})
return
np
.
argmax
(
output
)
def
update
(
self
,
context
,
action
,
reward
):
"""Updates data buffer, and re-trains the BNN every training_freq steps."""
self
.
t
+=
1
self
.
data_h
.
add
(
context
,
action
,
reward
)
if
self
.
t
%
self
.
training_freq
==
0
:
if
self
.
hparams
.
reset_lr
:
self
.
bnn
.
assign_lr
()
self
.
bnn
.
train
(
self
.
data_h
,
self
.
training_epochs
)
research/deep_contextual_bandits/bandits/algorithms/uniform_sampling.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual bandit algorithm that selects an action uniformly at random."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
bandits.core.bandit_algorithm
import
BanditAlgorithm
class
UniformSampling
(
BanditAlgorithm
):
"""Defines a baseline; returns one action uniformly at random."""
def
__init__
(
self
,
name
,
hparams
):
"""Creates a UniformSampling object.
Args:
name: Name of the algorithm.
hparams: Hyper-parameters, including the number of arms (num_actions).
"""
self
.
name
=
name
self
.
hparams
=
hparams
def
action
(
self
,
context
):
"""Selects an action uniformly at random."""
return
np
.
random
.
choice
(
range
(
self
.
hparams
.
num_actions
))
research/deep_contextual_bandits/bandits/algorithms/variational_neural_bandit_model.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bayesian NN using factorized VI (Bayes By Backprop. Blundell et al. 2014).
See https://arxiv.org/abs/1505.05424 for details.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
from
absl
import
flags
from
bandits.core.bayesian_nn
import
BayesianNN
FLAGS
=
flags
.
FLAGS
def
log_gaussian
(
x
,
mu
,
sigma
,
reduce_sum
=
True
):
"""Returns log Gaussian pdf."""
res
=
(
-
0.5
*
np
.
log
(
2
*
np
.
pi
)
-
tf
.
log
(
sigma
)
-
tf
.
square
(
x
-
mu
)
/
(
2
*
tf
.
square
(
sigma
)))
if
reduce_sum
:
return
tf
.
reduce_sum
(
res
)
else
:
return
res
def
analytic_kl
(
mu_1
,
sigma_1
,
mu_2
,
sigma_2
):
"""KL for two Gaussian distributions with diagonal covariance matrix."""
sigma_1_sq
=
tf
.
square
(
sigma_1
)
sigma_2_sq
=
tf
.
square
(
sigma_2
)
t1
=
tf
.
square
(
mu_1
-
mu_2
)
/
(
2.
*
sigma_2_sq
)
t2
=
(
sigma_1_sq
/
sigma_2_sq
-
1.
-
tf
.
log
(
sigma_1_sq
)
+
tf
.
log
(
sigma_2_sq
))
/
2.
return
tf
.
reduce_sum
(
t1
+
t2
)
class
VariationalNeuralBanditModel
(
BayesianNN
):
"""Implements an approximate Bayesian NN using Variational Inference."""
def
__init__
(
self
,
hparams
,
name
=
"BBBNN"
):
self
.
name
=
name
self
.
hparams
=
hparams
self
.
n_in
=
self
.
hparams
.
context_dim
self
.
n_out
=
self
.
hparams
.
num_actions
self
.
layers
=
self
.
hparams
.
layer_sizes
self
.
init_scale
=
self
.
hparams
.
init_scale
self
.
f_num_points
=
None
if
"f_num_points"
in
hparams
:
self
.
f_num_points
=
self
.
hparams
.
f_num_points
self
.
cleared_times_trained
=
self
.
hparams
.
cleared_times_trained
self
.
initial_training_steps
=
self
.
hparams
.
initial_training_steps
self
.
training_schedule
=
np
.
linspace
(
self
.
initial_training_steps
,
self
.
hparams
.
training_epochs
,
self
.
cleared_times_trained
)
self
.
verbose
=
getattr
(
self
.
hparams
,
"verbose"
,
True
)
self
.
weights_m
=
{}
self
.
weights_std
=
{}
self
.
biases_m
=
{}
self
.
biases_std
=
{}
self
.
times_trained
=
0
if
self
.
hparams
.
use_sigma_exp_transform
:
self
.
sigma_transform
=
tf
.
exp
self
.
inverse_sigma_transform
=
np
.
log
else
:
self
.
sigma_transform
=
tf
.
nn
.
softplus
self
.
inverse_sigma_transform
=
lambda
y
:
y
+
np
.
log
(
1.
-
np
.
exp
(
-
y
))
# Whether to use the local reparameterization trick to compute the loss.
# See details in https://arxiv.org/abs/1506.02557
self
.
use_local_reparameterization
=
True
self
.
build_graph
()
def
build_mu_variable
(
self
,
shape
):
"""Returns a mean variable initialized as N(0, 0.05)."""
return
tf
.
Variable
(
tf
.
random_normal
(
shape
,
0.0
,
0.05
))
def
build_sigma_variable
(
self
,
shape
,
init
=-
5.
):
"""Returns a sigma variable initialized as N(init, 0.05)."""
# Initialize sigma to be very small initially to encourage MAP opt first
return
tf
.
Variable
(
tf
.
random_normal
(
shape
,
init
,
0.05
))
def
build_layer
(
self
,
input_x
,
input_x_local
,
shape
,
layer_id
,
activation_fn
=
tf
.
nn
.
relu
):
"""Builds a variational layer, and computes KL term.
Args:
input_x: Input to the variational layer.
input_x_local: Input when the local reparameterization trick was applied.
shape: [number_inputs, number_outputs] for the layer.
layer_id: Number of layer in the architecture.
activation_fn: Activation function to apply.
Returns:
output_h: Output of the variational layer.
output_h_local: Output when local reparameterization trick was applied.
neg_kl: Negative KL term for the layer.
"""
w_mu
=
self
.
build_mu_variable
(
shape
)
w_sigma
=
self
.
sigma_transform
(
self
.
build_sigma_variable
(
shape
))
w_noise
=
tf
.
random_normal
(
shape
)
w
=
w_mu
+
w_sigma
*
w_noise
b_mu
=
self
.
build_mu_variable
([
1
,
shape
[
1
]])
b_sigma
=
self
.
sigma_transform
(
self
.
build_sigma_variable
([
1
,
shape
[
1
]]))
b
=
b_mu
# Store means and stds
self
.
weights_m
[
layer_id
]
=
w_mu
self
.
weights_std
[
layer_id
]
=
w_sigma
self
.
biases_m
[
layer_id
]
=
b_mu
self
.
biases_std
[
layer_id
]
=
b_sigma
# Create outputs
output_h
=
activation_fn
(
tf
.
matmul
(
input_x
,
w
)
+
b
)
if
self
.
use_local_reparameterization
:
# Use analytic KL divergence wrt the prior
neg_kl
=
-
analytic_kl
(
w_mu
,
w_sigma
,
0.
,
tf
.
to_float
(
np
.
sqrt
(
2.
/
shape
[
0
])))
else
:
# Create empirical KL loss terms
log_p
=
log_gaussian
(
w
,
0.
,
tf
.
to_float
(
np
.
sqrt
(
2.
/
shape
[
0
])))
log_q
=
log_gaussian
(
w
,
tf
.
stop_gradient
(
w_mu
),
tf
.
stop_gradient
(
w_sigma
))
neg_kl
=
log_p
-
log_q
# Apply local reparameterization trick: sample activations pre nonlinearity
m_h
=
tf
.
matmul
(
input_x_local
,
w_mu
)
+
b
v_h
=
tf
.
matmul
(
tf
.
square
(
input_x_local
),
tf
.
square
(
w_sigma
))
output_h_local
=
m_h
+
tf
.
sqrt
(
v_h
+
1e-6
)
*
tf
.
random_normal
(
tf
.
shape
(
v_h
))
output_h_local
=
activation_fn
(
output_h_local
)
return
output_h
,
output_h_local
,
neg_kl
def
build_action_noise
(
self
):
"""Defines a model for additive noise per action, and its KL term."""
# Define mean and std variables (log-normal dist) for each action.
noise_sigma_mu
=
(
self
.
build_mu_variable
([
1
,
self
.
n_out
])
+
self
.
inverse_sigma_transform
(
self
.
hparams
.
noise_sigma
))
noise_sigma_sigma
=
self
.
sigma_transform
(
self
.
build_sigma_variable
([
1
,
self
.
n_out
]))
pre_noise_sigma
=
(
noise_sigma_mu
+
tf
.
random_normal
([
1
,
self
.
n_out
])
*
noise_sigma_sigma
)
self
.
noise_sigma
=
self
.
sigma_transform
(
pre_noise_sigma
)
# Compute KL for additive noise sigma terms.
if
getattr
(
self
.
hparams
,
"infer_noise_sigma"
,
False
):
neg_kl_term
=
log_gaussian
(
pre_noise_sigma
,
self
.
inverse_sigma_transform
(
self
.
hparams
.
noise_sigma
),
self
.
hparams
.
prior_sigma
)
neg_kl_term
-=
log_gaussian
(
pre_noise_sigma
,
noise_sigma_mu
,
noise_sigma_sigma
)
else
:
neg_kl_term
=
0.
return
neg_kl_term
def
build_model
(
self
,
activation_fn
=
tf
.
nn
.
relu
):
"""Defines the actual NN model with fully connected layers.
The loss is computed for partial feedback settings (bandits), so only
the observed outcome is backpropagated (see weighted loss).
Selects the optimizer and, finally, it also initializes the graph.
Args:
activation_fn: the activation function used in the nn layers.
"""
if
self
.
verbose
:
print
(
"Initializing model {}."
.
format
(
self
.
name
))
neg_kl_term
,
l_number
=
0
,
0
use_local_reparameterization
=
self
.
use_local_reparameterization
# Compute model additive noise for each action with log-normal distribution
neg_kl_term
+=
self
.
build_action_noise
()
# Build network.
input_x
=
self
.
x
input_local
=
self
.
x
n_in
=
self
.
n_in
for
l_number
,
n_nodes
in
enumerate
(
self
.
layers
):
if
n_nodes
>
0
:
h
,
h_local
,
neg_kl
=
self
.
build_layer
(
input_x
,
input_local
,
[
n_in
,
n_nodes
],
l_number
)
neg_kl_term
+=
neg_kl
input_x
,
input_local
=
h
,
h_local
n_in
=
n_nodes
# Create last linear layer
h
,
h_local
,
neg_kl
=
self
.
build_layer
(
input_x
,
input_local
,
[
n_in
,
self
.
n_out
],
l_number
+
1
,
activation_fn
=
lambda
x
:
x
)
neg_kl_term
+=
neg_kl
self
.
y_pred
=
h
self
.
y_pred_local
=
h_local
# Compute log likelihood (with learned or fixed noise level)
if
getattr
(
self
.
hparams
,
"infer_noise_sigma"
,
False
):
log_likelihood
=
log_gaussian
(
self
.
y
,
self
.
y_pred_local
,
self
.
noise_sigma
,
reduce_sum
=
False
)
else
:
y_hat
=
self
.
y_pred_local
if
use_local_reparameterization
else
self
.
y_pred
log_likelihood
=
log_gaussian
(
self
.
y
,
y_hat
,
self
.
hparams
.
noise_sigma
,
reduce_sum
=
False
)
# Only take into account observed outcomes (bandits setting)
batch_size
=
tf
.
to_float
(
tf
.
shape
(
self
.
x
)[
0
])
weighted_log_likelihood
=
tf
.
reduce_sum
(
log_likelihood
*
self
.
weights
)
/
batch_size
# The objective is 1/n * (\sum_i log_like_i - KL); neg_kl_term estimates -KL
elbo
=
weighted_log_likelihood
+
(
neg_kl_term
/
self
.
n
)
self
.
loss
=
-
elbo
self
.
global_step
=
tf
.
train
.
get_or_create_global_step
()
self
.
train_op
=
tf
.
train
.
AdamOptimizer
(
self
.
hparams
.
initial_lr
).
minimize
(
self
.
loss
,
global_step
=
self
.
global_step
)
# Create tensorboard metrics
self
.
create_summaries
()
self
.
summary_writer
=
tf
.
summary
.
FileWriter
(
"{}/graph_{}"
.
format
(
FLAGS
.
logdir
,
self
.
name
),
self
.
sess
.
graph
)
def
build_graph
(
self
):
"""Defines graph, session, placeholders, and model.
Placeholders are: n (size of the dataset), x and y (context and observed
reward for each action), and weights (one-hot encoding of selected action
for each context, i.e., only possibly non-zero element in each y).
"""
self
.
graph
=
tf
.
Graph
()
with
self
.
graph
.
as_default
():
self
.
sess
=
tf
.
Session
()
self
.
n
=
tf
.
placeholder
(
shape
=
[],
dtype
=
tf
.
float32
)
self
.
x
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_in
],
dtype
=
tf
.
float32
)
self
.
y
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_out
],
dtype
=
tf
.
float32
)
self
.
weights
=
tf
.
placeholder
(
shape
=
[
None
,
self
.
n_out
],
dtype
=
tf
.
float32
)
self
.
build_model
()
self
.
sess
.
run
(
tf
.
global_variables_initializer
())
def
create_summaries
(
self
):
"""Defines summaries including mean loss, and global step."""
with
self
.
graph
.
as_default
():
with
tf
.
name_scope
(
self
.
name
+
"_summaries"
):
tf
.
summary
.
scalar
(
"loss"
,
self
.
loss
)
tf
.
summary
.
scalar
(
"global_step"
,
self
.
global_step
)
self
.
summary_op
=
tf
.
summary
.
merge_all
()
def
assign_lr
(
self
):
"""Resets the learning rate in dynamic schedules for subsequent trainings.
In bandits settings, we do expand our dataset over time. Then, we need to
re-train the network with the new data. The algorithms that do not keep
the step constant, can reset it at the start of each *training* process.
"""
decay_steps
=
1
if
self
.
hparams
.
activate_decay
:
current_gs
=
self
.
sess
.
run
(
self
.
global_step
)
with
self
.
graph
.
as_default
():
self
.
lr
=
tf
.
train
.
inverse_time_decay
(
self
.
hparams
.
initial_lr
,
self
.
global_step
-
current_gs
,
decay_steps
,
self
.
hparams
.
lr_decay_rate
)
def
train
(
self
,
data
,
num_steps
):
"""Trains the BNN for num_steps, using the data in 'data'.
Args:
data: ContextualDataset object that provides the data.
num_steps: Number of minibatches to train the network for.
Returns:
losses: Loss history during training.
"""
if
self
.
times_trained
<
self
.
cleared_times_trained
:
num_steps
=
int
(
self
.
training_schedule
[
self
.
times_trained
])
self
.
times_trained
+=
1
losses
=
[]
with
self
.
graph
.
as_default
():
if
self
.
verbose
:
print
(
"Training {} for {} steps..."
.
format
(
self
.
name
,
num_steps
))
for
step
in
range
(
num_steps
):
x
,
y
,
weights
=
data
.
get_batch_with_weights
(
self
.
hparams
.
batch_size
)
_
,
summary
,
global_step
,
loss
=
self
.
sess
.
run
(
[
self
.
train_op
,
self
.
summary_op
,
self
.
global_step
,
self
.
loss
],
feed_dict
=
{
self
.
x
:
x
,
self
.
y
:
y
,
self
.
weights
:
weights
,
self
.
n
:
data
.
num_points
(
self
.
f_num_points
),
})
losses
.
append
(
loss
)
if
step
%
self
.
hparams
.
freq_summary
==
0
:
if
self
.
hparams
.
show_training
:
print
(
"{} | step: {}, loss: {}"
.
format
(
self
.
name
,
global_step
,
loss
))
self
.
summary_writer
.
add_summary
(
summary
,
global_step
)
return
losses
research/deep_contextual_bandits/bandits/core/__pycache__/bandit_algorithm.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/core/__pycache__/bayesian_nn.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/core/__pycache__/contextual_bandit.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/core/__pycache__/contextual_dataset.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/core/bandit_algorithm.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define the abstract class for contextual bandit algorithms."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
class
BanditAlgorithm
(
object
):
"""A bandit algorithm must be able to do two basic operations.
1. Choose an action given a context.
2. Update its internal model given a triple (context, played action, reward).
"""
def
action
(
self
,
context
):
pass
def
update
(
self
,
context
,
action
,
reward
):
pass
research/deep_contextual_bandits/bandits/core/bayesian_nn.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define the abstract class for Bayesian Neural Networks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
class
BayesianNN
(
object
):
"""A Bayesian neural network keeps a distribution over neural nets."""
def
__init__
(
self
,
optimizer
):
pass
def
build_model
(
self
):
pass
def
train
(
self
,
data
):
pass
def
sample
(
self
,
steps
):
pass
research/deep_contextual_bandits/bandits/core/contextual_bandit.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define a contextual bandit from which we can sample and compute rewards.
We can feed the data, sample a context, its reward for a specific action, and
also the optimal action for a given context.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
def
run_contextual_bandit
(
context_dim
,
num_actions
,
dataset
,
algos
):
"""Run a contextual bandit problem on a set of algorithms.
Args:
context_dim: Dimension of the context.
num_actions: Number of available actions.
dataset: Matrix where every row is a context + num_actions rewards.
algos: List of algorithms to use in the contextual bandit instance.
Returns:
h_actions: Matrix with actions: size (num_context, num_algorithms).
h_rewards: Matrix with rewards: size (num_context, num_algorithms).
"""
num_contexts
=
dataset
.
shape
[
0
]
# Create contextual bandit
cmab
=
ContextualBandit
(
context_dim
,
num_actions
)
cmab
.
feed_data
(
dataset
)
h_actions
=
np
.
empty
((
0
,
len
(
algos
)),
float
)
h_rewards
=
np
.
empty
((
0
,
len
(
algos
)),
float
)
# Run the contextual bandit process
for
i
in
range
(
num_contexts
):
context
=
cmab
.
context
(
i
)
actions
=
[
a
.
action
(
context
)
for
a
in
algos
]
rewards
=
[
cmab
.
reward
(
i
,
action
)
for
action
in
actions
]
for
j
,
a
in
enumerate
(
algos
):
a
.
update
(
context
,
actions
[
j
],
rewards
[
j
])
h_actions
=
np
.
vstack
((
h_actions
,
np
.
array
(
actions
)))
h_rewards
=
np
.
vstack
((
h_rewards
,
np
.
array
(
rewards
)))
return
h_actions
,
h_rewards
class
ContextualBandit
(
object
):
"""Implements a Contextual Bandit with d-dimensional contexts and k arms."""
def
__init__
(
self
,
context_dim
,
num_actions
):
"""Creates a contextual bandit object.
Args:
context_dim: Dimension of the contexts.
num_actions: Number of arms for the multi-armed bandit.
"""
self
.
_context_dim
=
context_dim
self
.
_num_actions
=
num_actions
def
feed_data
(
self
,
data
):
"""Feeds the data (contexts + rewards) to the bandit object.
Args:
data: Numpy array with shape [n, d+k], where n is the number of contexts,
d is the dimension of each context, and k the number of arms (rewards).
Raises:
ValueError: when data dimensions do not correspond to the object values.
"""
if
data
.
shape
[
1
]
!=
self
.
context_dim
+
self
.
num_actions
:
raise
ValueError
(
'Data dimensions do not match.'
)
self
.
_number_contexts
=
data
.
shape
[
0
]
self
.
data
=
data
self
.
order
=
range
(
self
.
number_contexts
)
def
reset
(
self
):
"""Randomly shuffle the order of the contexts to deliver."""
self
.
order
=
np
.
random
.
permutation
(
self
.
number_contexts
)
def
context
(
self
,
number
):
"""Returns the number-th context."""
return
self
.
data
[
self
.
order
[
number
]][:
self
.
context_dim
]
def
reward
(
self
,
number
,
action
):
"""Returns the reward for the number-th context and action."""
return
self
.
data
[
self
.
order
[
number
]][
self
.
context_dim
+
action
]
def
optimal
(
self
,
number
):
"""Returns the optimal action (in hindsight) for the number-th context."""
return
np
.
argmax
(
self
.
data
[
self
.
order
[
number
]][
self
.
context_dim
:])
@
property
def
context_dim
(
self
):
return
self
.
_context_dim
@
property
def
num_actions
(
self
):
return
self
.
_num_actions
@
property
def
number_contexts
(
self
):
return
self
.
_number_contexts
research/deep_contextual_bandits/bandits/core/contextual_dataset.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Define a data buffer for contextual bandit algorithms."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
class
ContextualDataset
(
object
):
"""The buffer is able to append new data, and sample random minibatches."""
def
__init__
(
self
,
context_dim
,
num_actions
,
buffer_s
=-
1
,
intercept
=
False
):
"""Creates a ContextualDataset object.
The data is stored in attributes: contexts and rewards.
The sequence of taken actions are stored in attribute actions.
Args:
context_dim: Dimension of the contexts.
num_actions: Number of arms for the multi-armed bandit.
buffer_s: Size of buffer for training. Only last buffer_s will be
returned as minibatch. If buffer_s = -1, all data will be used.
intercept: If True, it adds a constant (1.0) dimension to each context X,
at the end.
"""
self
.
_context_dim
=
context_dim
self
.
_num_actions
=
num_actions
self
.
_contexts
=
None
self
.
_rewards
=
None
self
.
actions
=
[]
self
.
buffer_s
=
buffer_s
self
.
intercept
=
intercept
def
add
(
self
,
context
,
action
,
reward
):
"""Adds a new triplet (context, action, reward) to the dataset.
The reward for the actions that weren't played is assumed to be zero.
Args:
context: A d-dimensional vector with the context.
action: Integer between 0 and k-1 representing the chosen arm.
reward: Real number representing the reward for the (context, action).
"""
if
self
.
intercept
:
c
=
np
.
array
(
context
[:])
c
=
np
.
append
(
c
,
1.0
).
reshape
((
1
,
self
.
context_dim
+
1
))
else
:
c
=
np
.
array
(
context
[:]).
reshape
((
1
,
self
.
context_dim
))
if
self
.
contexts
is
None
:
self
.
contexts
=
c
else
:
self
.
contexts
=
np
.
vstack
((
self
.
contexts
,
c
))
r
=
np
.
zeros
((
1
,
self
.
num_actions
))
r
[
0
,
action
]
=
reward
if
self
.
rewards
is
None
:
self
.
rewards
=
r
else
:
self
.
rewards
=
np
.
vstack
((
self
.
rewards
,
r
))
self
.
actions
.
append
(
action
)
def
replace_data
(
self
,
contexts
=
None
,
actions
=
None
,
rewards
=
None
):
if
contexts
is
not
None
:
self
.
contexts
=
contexts
if
actions
is
not
None
:
self
.
actions
=
actions
if
rewards
is
not
None
:
self
.
rewards
=
rewards
def
get_batch
(
self
,
batch_size
):
"""Returns a random minibatch of (contexts, rewards) with batch_size."""
n
,
_
=
self
.
contexts
.
shape
if
self
.
buffer_s
==
-
1
:
# use all the data
ind
=
np
.
random
.
choice
(
range
(
n
),
batch_size
)
else
:
# use only buffer (last buffer_s observations)
ind
=
np
.
random
.
choice
(
range
(
max
(
0
,
n
-
self
.
buffer_s
),
n
),
batch_size
)
return
self
.
contexts
[
ind
,
:],
self
.
rewards
[
ind
,
:]
def
get_data
(
self
,
action
):
"""Returns all (context, reward) where the action was played."""
n
,
_
=
self
.
contexts
.
shape
ind
=
np
.
array
([
i
for
i
in
range
(
n
)
if
self
.
actions
[
i
]
==
action
])
return
self
.
contexts
[
ind
,
:],
self
.
rewards
[
ind
,
action
]
def
get_data_with_weights
(
self
):
"""Returns all observations with one-hot weights for actions."""
weights
=
np
.
zeros
((
self
.
contexts
.
shape
[
0
],
self
.
num_actions
))
a_ind
=
np
.
array
([(
i
,
val
)
for
i
,
val
in
enumerate
(
self
.
actions
)])
weights
[
a_ind
[:,
0
],
a_ind
[:,
1
]]
=
1.0
return
self
.
contexts
,
self
.
rewards
,
weights
def
get_batch_with_weights
(
self
,
batch_size
):
"""Returns a random mini-batch with one-hot weights for actions."""
n
,
_
=
self
.
contexts
.
shape
if
self
.
buffer_s
==
-
1
:
# use all the data
ind
=
np
.
random
.
choice
(
range
(
n
),
batch_size
)
else
:
# use only buffer (last buffer_s obs)
ind
=
np
.
random
.
choice
(
range
(
max
(
0
,
n
-
self
.
buffer_s
),
n
),
batch_size
)
weights
=
np
.
zeros
((
batch_size
,
self
.
num_actions
))
sampled_actions
=
np
.
array
(
self
.
actions
)[
ind
]
a_ind
=
np
.
array
([(
i
,
val
)
for
i
,
val
in
enumerate
(
sampled_actions
)])
weights
[
a_ind
[:,
0
],
a_ind
[:,
1
]]
=
1.0
return
self
.
contexts
[
ind
,
:],
self
.
rewards
[
ind
,
:],
weights
def
num_points
(
self
,
f
=
None
):
"""Returns number of points in the buffer (after applying function f)."""
if
f
is
not
None
:
return
f
(
self
.
contexts
.
shape
[
0
])
return
self
.
contexts
.
shape
[
0
]
@
property
def
context_dim
(
self
):
return
self
.
_context_dim
@
property
def
num_actions
(
self
):
return
self
.
_num_actions
@
property
def
contexts
(
self
):
return
self
.
_contexts
@
contexts
.
setter
def
contexts
(
self
,
value
):
self
.
_contexts
=
value
@
property
def
actions
(
self
):
return
self
.
_actions
@
actions
.
setter
def
actions
(
self
,
value
):
self
.
_actions
=
value
@
property
def
rewards
(
self
):
return
self
.
_rewards
@
rewards
.
setter
def
rewards
(
self
,
value
):
self
.
_rewards
=
value
research/deep_contextual_bandits/bandits/data/__pycache__/data_sampler.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/data/__pycache__/synthetic_data_sampler.cpython-36.pyc
0 → 100644
View file @
5a3c97b9
File added
research/deep_contextual_bandits/bandits/data/data_sampler.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to create bandit problems from datasets."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
pandas
as
pd
import
tensorflow
as
tf
def
one_hot
(
df
,
cols
):
"""Returns one-hot encoding of DataFrame df including columns in cols."""
for
col
in
cols
:
dummies
=
pd
.
get_dummies
(
df
[
col
],
prefix
=
col
,
drop_first
=
False
)
df
=
pd
.
concat
([
df
,
dummies
],
axis
=
1
)
df
=
df
.
drop
(
col
,
axis
=
1
)
return
df
def
sample_mushroom_data
(
file_name
,
num_contexts
,
r_noeat
=
0
,
r_eat_safe
=
5
,
r_eat_poison_bad
=-
35
,
r_eat_poison_good
=
5
,
prob_poison_bad
=
0.5
):
"""Samples bandit game from Mushroom UCI Dataset.
Args:
file_name: Route of file containing the original Mushroom UCI dataset.
num_contexts: Number of points to sample, i.e. (context, action rewards).
r_noeat: Reward for not eating a mushroom.
r_eat_safe: Reward for eating a non-poisonous mushroom.
r_eat_poison_bad: Reward for eating a poisonous mushroom if harmed.
r_eat_poison_good: Reward for eating a poisonous mushroom if not harmed.
prob_poison_bad: Probability of being harmed by eating a poisonous mushroom.
Returns:
dataset: Sampled matrix with n rows: (context, eat_reward, no_eat_reward).
opt_vals: Vector of expected optimal (reward, action) for each context.
We assume r_eat_safe > r_noeat, and r_eat_poison_good > r_eat_poison_bad.
"""
# first two cols of df encode whether mushroom is edible or poisonous
df
=
pd
.
read_csv
(
file_name
,
header
=
None
)
df
=
one_hot
(
df
,
df
.
columns
)
ind
=
np
.
random
.
choice
(
range
(
df
.
shape
[
0
]),
num_contexts
,
replace
=
True
)
contexts
=
df
.
iloc
[
ind
,
2
:]
no_eat_reward
=
r_noeat
*
np
.
ones
((
num_contexts
,
1
))
random_poison
=
np
.
random
.
choice
(
[
r_eat_poison_bad
,
r_eat_poison_good
],
p
=
[
prob_poison_bad
,
1
-
prob_poison_bad
],
size
=
num_contexts
)
eat_reward
=
r_eat_safe
*
df
.
iloc
[
ind
,
0
]
eat_reward
+=
np
.
multiply
(
random_poison
,
df
.
iloc
[
ind
,
1
])
eat_reward
=
eat_reward
.
reshape
((
num_contexts
,
1
))
# compute optimal expected reward and optimal actions
exp_eat_poison_reward
=
r_eat_poison_bad
*
prob_poison_bad
exp_eat_poison_reward
+=
r_eat_poison_good
*
(
1
-
prob_poison_bad
)
opt_exp_reward
=
r_eat_safe
*
df
.
iloc
[
ind
,
0
]
+
max
(
r_noeat
,
exp_eat_poison_reward
)
*
df
.
iloc
[
ind
,
1
]
if
r_noeat
>
exp_eat_poison_reward
:
# actions: no eat = 0 ; eat = 1
opt_actions
=
df
.
iloc
[
ind
,
0
]
# indicator of edible
else
:
# should always eat (higher expected reward)
opt_actions
=
np
.
ones
((
num_contexts
,
1
))
opt_vals
=
(
opt_exp_reward
.
values
,
opt_actions
.
values
)
return
np
.
hstack
((
contexts
,
no_eat_reward
,
eat_reward
)),
opt_vals
def
sample_stock_data
(
file_name
,
context_dim
,
num_actions
,
num_contexts
,
sigma
,
shuffle_rows
=
True
):
"""Samples linear bandit game from stock prices dataset.
Args:
file_name: Route of file containing the stock prices dataset.
context_dim: Context dimension (i.e. vector with the price of each stock).
num_actions: Number of actions (different linear portfolio strategies).
num_contexts: Number of contexts to sample.
sigma: Vector with additive noise levels for each action.
shuffle_rows: If True, rows from original dataset are shuffled.
Returns:
dataset: Sampled matrix with rows: (context, reward_1, ..., reward_k).
opt_vals: Vector of expected optimal (reward, action) for each context.
"""
with
tf
.
gfile
.
Open
(
file_name
,
'r'
)
as
f
:
contexts
=
np
.
loadtxt
(
f
,
skiprows
=
1
)
if
shuffle_rows
:
np
.
random
.
shuffle
(
contexts
)
contexts
=
contexts
[:
num_contexts
,
:]
betas
=
np
.
random
.
uniform
(
-
1
,
1
,
(
context_dim
,
num_actions
))
betas
/=
np
.
linalg
.
norm
(
betas
,
axis
=
0
)
mean_rewards
=
np
.
dot
(
contexts
,
betas
)
noise
=
np
.
random
.
normal
(
scale
=
sigma
,
size
=
mean_rewards
.
shape
)
rewards
=
mean_rewards
+
noise
opt_actions
=
np
.
argmax
(
mean_rewards
,
axis
=
1
)
opt_rewards
=
[
mean_rewards
[
i
,
a
]
for
i
,
a
in
enumerate
(
opt_actions
)]
return
np
.
hstack
((
contexts
,
rewards
)),
(
np
.
array
(
opt_rewards
),
opt_actions
)
def
sample_jester_data
(
file_name
,
context_dim
,
num_actions
,
num_contexts
,
shuffle_rows
=
True
,
shuffle_cols
=
False
):
"""Samples bandit game from (user, joke) dense subset of Jester dataset.
Args:
file_name: Route of file containing the modified Jester dataset.
context_dim: Context dimension (i.e. vector with some ratings from a user).
num_actions: Number of actions (number of joke ratings to predict).
num_contexts: Number of contexts to sample.
shuffle_rows: If True, rows from original dataset are shuffled.
shuffle_cols: Whether or not context/action jokes are randomly shuffled.
Returns:
dataset: Sampled matrix with rows: (context, rating_1, ..., rating_k).
opt_vals: Vector of deterministic optimal (reward, action) for each context.
"""
with
tf
.
gfile
.
Open
(
file_name
,
'rb'
)
as
f
:
dataset
=
np
.
load
(
f
)
if
shuffle_cols
:
dataset
=
dataset
[:,
np
.
random
.
permutation
(
dataset
.
shape
[
1
])]
if
shuffle_rows
:
np
.
random
.
shuffle
(
dataset
)
dataset
=
dataset
[:
num_contexts
,
:]
assert
context_dim
+
num_actions
==
dataset
.
shape
[
1
],
'Wrong data dimensions.'
opt_actions
=
np
.
argmax
(
dataset
[:,
context_dim
:],
axis
=
1
)
opt_rewards
=
np
.
array
([
dataset
[
i
,
context_dim
+
a
]
for
i
,
a
in
enumerate
(
opt_actions
)])
return
dataset
,
(
opt_rewards
,
opt_actions
)
def
sample_statlog_data
(
file_name
,
num_contexts
,
shuffle_rows
=
True
,
remove_underrepresented
=
False
):
"""Returns bandit problem dataset based on the UCI statlog data.
Args:
file_name: Route of file containing the Statlog dataset.
num_contexts: Number of contexts to sample.
shuffle_rows: If True, rows from original dataset are shuffled.
remove_underrepresented: If True, removes arms with very few rewards.
Returns:
dataset: Sampled matrix with rows: (context, action rewards).
opt_vals: Vector of deterministic optimal (reward, action) for each context.
https://archive.ics.uci.edu/ml/datasets/Statlog+(Shuttle)
"""
with
tf
.
gfile
.
Open
(
file_name
,
'r'
)
as
f
:
data
=
np
.
loadtxt
(
f
)
num_actions
=
7
# some of the actions are very rarely optimal.
# Shuffle data
if
shuffle_rows
:
np
.
random
.
shuffle
(
data
)
data
=
data
[:
num_contexts
,
:]
# Last column is label, rest are features
contexts
=
data
[:,
:
-
1
]
labels
=
data
[:,
-
1
].
astype
(
int
)
-
1
# convert to 0 based index
if
remove_underrepresented
:
contexts
,
labels
=
remove_underrepresented_classes
(
contexts
,
labels
)
return
classification_to_bandit_problem
(
contexts
,
labels
,
num_actions
)
def
sample_adult_data
(
file_name
,
num_contexts
,
shuffle_rows
=
True
,
remove_underrepresented
=
False
):
"""Returns bandit problem dataset based on the UCI adult data.
Args:
file_name: Route of file containing the Adult dataset.
num_contexts: Number of contexts to sample.
shuffle_rows: If True, rows from original dataset are shuffled.
remove_underrepresented: If True, removes arms with very few rewards.
Returns:
dataset: Sampled matrix with rows: (context, action rewards).
opt_vals: Vector of deterministic optimal (reward, action) for each context.
Preprocessing:
* drop rows with missing values
* convert categorical variables to 1 hot encoding
https://archive.ics.uci.edu/ml/datasets/census+income
"""
with
tf
.
gfile
.
Open
(
file_name
,
'r'
)
as
f
:
df
=
pd
.
read_csv
(
f
,
header
=
None
,
na_values
=
[
' ?'
]).
dropna
()
num_actions
=
14
if
shuffle_rows
:
df
=
df
.
sample
(
frac
=
1
)
df
=
df
.
iloc
[:
num_contexts
,
:]
labels
=
df
[
6
].
astype
(
'category'
).
cat
.
codes
.
as_matrix
()
df
=
df
.
drop
([
6
],
axis
=
1
)
# Convert categorical variables to 1 hot encoding
cols_to_transform
=
[
1
,
3
,
5
,
7
,
8
,
9
,
13
,
14
]
df
=
pd
.
get_dummies
(
df
,
columns
=
cols_to_transform
)
if
remove_underrepresented
:
df
,
labels
=
remove_underrepresented_classes
(
df
,
labels
)
contexts
=
df
.
as_matrix
()
return
classification_to_bandit_problem
(
contexts
,
labels
,
num_actions
)
def
sample_census_data
(
file_name
,
num_contexts
,
shuffle_rows
=
True
,
remove_underrepresented
=
False
):
"""Returns bandit problem dataset based on the UCI census data.
Args:
file_name: Route of file containing the Census dataset.
num_contexts: Number of contexts to sample.
shuffle_rows: If True, rows from original dataset are shuffled.
remove_underrepresented: If True, removes arms with very few rewards.
Returns:
dataset: Sampled matrix with rows: (context, action rewards).
opt_vals: Vector of deterministic optimal (reward, action) for each context.
Preprocessing:
* drop rows with missing labels
* convert categorical variables to 1 hot encoding
Note: this is the processed (not the 'raw') dataset. It contains a subset
of the raw features and they've all been discretized.
https://archive.ics.uci.edu/ml/datasets/US+Census+Data+%281990%29
"""
# Note: this dataset is quite large. It will be slow to load and preprocess.
with
tf
.
gfile
.
Open
(
file_name
,
'r'
)
as
f
:
df
=
(
pd
.
read_csv
(
f
,
header
=
0
,
na_values
=
[
'?'
])
.
dropna
())
num_actions
=
9
if
shuffle_rows
:
df
=
df
.
sample
(
frac
=
1
)
df
=
df
.
iloc
[:
num_contexts
,
:]
# Assuming what the paper calls response variable is the label?
labels
=
df
[
'dOccup'
].
astype
(
'category'
).
cat
.
codes
.
as_matrix
()
# In addition to label, also drop the (unique?) key.
df
=
df
.
drop
([
'dOccup'
,
'caseid'
],
axis
=
1
)
# All columns are categorical. Convert to 1 hot encoding.
df
=
pd
.
get_dummies
(
df
,
columns
=
df
.
columns
)
if
remove_underrepresented
:
df
,
labels
=
remove_underrepresented_classes
(
df
,
labels
)
contexts
=
df
.
as_matrix
()
return
classification_to_bandit_problem
(
contexts
,
labels
,
num_actions
)
def
sample_covertype_data
(
file_name
,
num_contexts
,
shuffle_rows
=
True
,
remove_underrepresented
=
False
):
"""Returns bandit problem dataset based on the UCI Cover_Type data.
Args:
file_name: Route of file containing the Covertype dataset.
num_contexts: Number of contexts to sample.
shuffle_rows: If True, rows from original dataset are shuffled.
remove_underrepresented: If True, removes arms with very few rewards.
Returns:
dataset: Sampled matrix with rows: (context, action rewards).
opt_vals: Vector of deterministic optimal (reward, action) for each context.
Preprocessing:
* drop rows with missing labels
* convert categorical variables to 1 hot encoding
https://archive.ics.uci.edu/ml/datasets/Covertype
"""
with
tf
.
gfile
.
Open
(
file_name
,
'r'
)
as
f
:
df
=
(
pd
.
read_csv
(
f
,
header
=
0
,
na_values
=
[
'?'
])
.
dropna
())
num_actions
=
7
if
shuffle_rows
:
df
=
df
.
sample
(
frac
=
1
)
df
=
df
.
iloc
[:
num_contexts
,
:]
# Assuming what the paper calls response variable is the label?
# Last column is label.
labels
=
df
[
df
.
columns
[
-
1
]].
astype
(
'category'
).
cat
.
codes
.
as_matrix
()
df
=
df
.
drop
([
df
.
columns
[
-
1
]],
axis
=
1
)
# All columns are either quantitative or already converted to 1 hot.
if
remove_underrepresented
:
df
,
labels
=
remove_underrepresented_classes
(
df
,
labels
)
contexts
=
df
.
as_matrix
()
return
classification_to_bandit_problem
(
contexts
,
labels
,
num_actions
)
def
classification_to_bandit_problem
(
contexts
,
labels
,
num_actions
=
None
):
"""Normalize contexts and encode deterministic rewards."""
if
num_actions
is
None
:
num_actions
=
np
.
max
(
labels
)
+
1
num_contexts
=
contexts
.
shape
[
0
]
# Due to random subsampling in small problems, some features may be constant
sstd
=
safe_std
(
np
.
std
(
contexts
,
axis
=
0
,
keepdims
=
True
)[
0
,
:])
# Normalize features
contexts
=
((
contexts
-
np
.
mean
(
contexts
,
axis
=
0
,
keepdims
=
True
))
/
sstd
)
# One hot encode labels as rewards
rewards
=
np
.
zeros
((
num_contexts
,
num_actions
))
rewards
[
np
.
arange
(
num_contexts
),
labels
]
=
1.0
return
contexts
,
rewards
,
(
np
.
ones
(
num_contexts
),
labels
)
def
safe_std
(
values
):
"""Remove zero std values for ones."""
return
np
.
array
([
val
if
val
!=
0.0
else
1.0
for
val
in
values
])
def
remove_underrepresented_classes
(
features
,
labels
,
thresh
=
0.0005
):
"""Removes classes when number of datapoints fraction is below a threshold."""
# Threshold doesn't seem to agree with https://arxiv.org/pdf/1706.04687.pdf
# Example: for Covertype, they report 4 classes after filtering, we get 7?
total_count
=
labels
.
shape
[
0
]
unique
,
counts
=
np
.
unique
(
labels
,
return_counts
=
True
)
ratios
=
counts
.
astype
(
'float'
)
/
total_count
vals_and_ratios
=
dict
(
zip
(
unique
,
ratios
))
print
(
'Unique classes and their ratio of total: %s'
%
vals_and_ratios
)
keep
=
[
vals_and_ratios
[
v
]
>=
thresh
for
v
in
labels
]
return
features
[
keep
],
labels
[
np
.
array
(
keep
)]
research/deep_contextual_bandits/bandits/data/synthetic_data_sampler.py
0 → 100644
View file @
5a3c97b9
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Several functions to sample contextual data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
def
sample_contextual_data
(
num_contexts
,
dim_context
,
num_actions
,
sigma
):
"""Samples independent Gaussian data.
There is nothing to learn here as the rewards do not depend on the context.
Args:
num_contexts: Number of contexts to sample.
dim_context: Dimension of the contexts.
num_actions: Number of arms for the multi-armed bandit.
sigma: Standard deviation of the independent Gaussian samples.
Returns:
data: A [num_contexts, dim_context + num_actions] numpy array with the data.
"""
size_data
=
[
num_contexts
,
dim_context
+
num_actions
]
return
np
.
random
.
normal
(
scale
=
sigma
,
size
=
size_data
)
def
sample_linear_data
(
num_contexts
,
dim_context
,
num_actions
,
sigma
=
0.0
):
"""Samples data from linearly parameterized arms.
The reward for context X and arm j is given by X^T beta_j, for some latent
set of parameters {beta_j : j = 1, ..., k}. The beta's are sampled uniformly
at random, the contexts are Gaussian, and sigma-noise is added to the rewards.
Args:
num_contexts: Number of contexts to sample.
dim_context: Dimension of the contexts.
num_actions: Number of arms for the multi-armed bandit.
sigma: Standard deviation of the additive noise. Set to zero for no noise.
Returns:
data: A [n, d+k] numpy array with the data.
betas: Latent parameters that determine expected reward for each arm.
opt: (optimal_rewards, optimal_actions) for all contexts.
"""
betas
=
np
.
random
.
uniform
(
-
1
,
1
,
(
dim_context
,
num_actions
))
betas
/=
np
.
linalg
.
norm
(
betas
,
axis
=
0
)
contexts
=
np
.
random
.
normal
(
size
=
[
num_contexts
,
dim_context
])
rewards
=
np
.
dot
(
contexts
,
betas
)
opt_actions
=
np
.
argmax
(
rewards
,
axis
=
1
)
rewards
+=
np
.
random
.
normal
(
scale
=
sigma
,
size
=
rewards
.
shape
)
opt_rewards
=
np
.
array
([
rewards
[
i
,
act
]
for
i
,
act
in
enumerate
(
opt_actions
)])
return
np
.
hstack
((
contexts
,
rewards
)),
betas
,
(
opt_rewards
,
opt_actions
)
def
sample_sparse_linear_data
(
num_contexts
,
dim_context
,
num_actions
,
sparse_dim
,
sigma
=
0.0
):
"""Samples data from sparse linearly parameterized arms.
The reward for context X and arm j is given by X^T beta_j, for some latent
set of parameters {beta_j : j = 1, ..., k}. The beta's are sampled uniformly
at random, the contexts are Gaussian, and sigma-noise is added to the rewards.
Only s components out of d are non-zero for each arm's beta.
Args:
num_contexts: Number of contexts to sample.
dim_context: Dimension of the contexts.
num_actions: Number of arms for the multi-armed bandit.
sparse_dim: Dimension of the latent subspace (sparsity pattern dimension).
sigma: Standard deviation of the additive noise. Set to zero for no noise.
Returns:
data: A [num_contexts, dim_context+num_actions] numpy array with the data.
betas: Latent parameters that determine expected reward for each arm.
opt: (optimal_rewards, optimal_actions) for all contexts.
"""
flatten
=
lambda
l
:
[
item
for
sublist
in
l
for
item
in
sublist
]
sparse_pattern
=
flatten
(
[[(
j
,
i
)
for
j
in
np
.
random
.
choice
(
range
(
dim_context
),
sparse_dim
,
replace
=
False
)]
for
i
in
range
(
num_actions
)])
betas
=
np
.
random
.
uniform
(
-
1
,
1
,
(
dim_context
,
num_actions
))
mask
=
np
.
zeros
((
dim_context
,
num_actions
))
for
elt
in
sparse_pattern
:
mask
[
elt
]
=
1
betas
=
np
.
multiply
(
betas
,
mask
)
betas
/=
np
.
linalg
.
norm
(
betas
,
axis
=
0
)
contexts
=
np
.
random
.
normal
(
size
=
[
num_contexts
,
dim_context
])
rewards
=
np
.
dot
(
contexts
,
betas
)
opt_actions
=
np
.
argmax
(
rewards
,
axis
=
1
)
rewards
+=
np
.
random
.
normal
(
scale
=
sigma
,
size
=
rewards
.
shape
)
opt_rewards
=
np
.
array
([
rewards
[
i
,
act
]
for
i
,
act
in
enumerate
(
opt_actions
)])
return
np
.
hstack
((
contexts
,
rewards
)),
betas
,
(
opt_rewards
,
opt_actions
)
def
sample_wheel_bandit_data
(
num_contexts
,
delta
,
mean_v
,
std_v
,
mu_large
,
std_large
):
"""Samples from Wheel bandit game (see https://arxiv.org/abs/1802.09127).
Args:
num_contexts: Number of points to sample, i.e. (context, action rewards).
delta: Exploration parameter: high reward in one region if norm above delta.
mean_v: Mean reward for each action if context norm is below delta.
std_v: Gaussian reward std for each action if context norm is below delta.
mu_large: Mean reward for optimal action if context norm is above delta.
std_large: Reward std for optimal action if context norm is above delta.
Returns:
dataset: Sampled matrix with n rows: (context, action rewards).
opt_vals: Vector of expected optimal (reward, action) for each context.
"""
context_dim
=
2
num_actions
=
5
data
=
[]
rewards
=
[]
opt_actions
=
[]
opt_rewards
=
[]
# sample uniform contexts in unit ball
while
len
(
data
)
<
num_contexts
:
raw_data
=
np
.
random
.
uniform
(
-
1
,
1
,
(
int
(
num_contexts
/
3
),
context_dim
))
for
i
in
range
(
raw_data
.
shape
[
0
]):
if
np
.
linalg
.
norm
(
raw_data
[
i
,
:])
<=
1
:
data
.
append
(
raw_data
[
i
,
:])
contexts
=
np
.
stack
(
data
)[:
num_contexts
,
:]
# sample rewards
for
i
in
range
(
num_contexts
):
r
=
[
np
.
random
.
normal
(
mean_v
[
j
],
std_v
[
j
])
for
j
in
range
(
num_actions
)]
if
np
.
linalg
.
norm
(
contexts
[
i
,
:])
>=
delta
:
# large reward in the right region for the context
r_big
=
np
.
random
.
normal
(
mu_large
,
std_large
)
if
contexts
[
i
,
0
]
>
0
:
if
contexts
[
i
,
1
]
>
0
:
r
[
0
]
=
r_big
opt_actions
.
append
(
0
)
else
:
r
[
1
]
=
r_big
opt_actions
.
append
(
1
)
else
:
if
contexts
[
i
,
1
]
>
0
:
r
[
2
]
=
r_big
opt_actions
.
append
(
2
)
else
:
r
[
3
]
=
r_big
opt_actions
.
append
(
3
)
else
:
opt_actions
.
append
(
np
.
argmax
(
mean_v
))
opt_rewards
.
append
(
r
[
opt_actions
[
-
1
]])
rewards
.
append
(
r
)
rewards
=
np
.
stack
(
rewards
)
opt_rewards
=
np
.
array
(
opt_rewards
)
opt_actions
=
np
.
array
(
opt_actions
)
return
np
.
hstack
((
contexts
,
rewards
)),
(
opt_rewards
,
opt_actions
)
research/deep_contextual_bandits/example_main.py
0 → 100644
View file @
5a3c97b9
This diff is collapsed.
Click to expand it.
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment