Unverified Commit 05ec6d87 authored by Lukasz Kaiser's avatar Lukasz Kaiser Committed by GitHub
Browse files

Merge pull request #4871 from rikel/master

Deep Contextual Bandits code for tensorflow/models
parents ae8e0f53 6b5d9233
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
/research/brain_coder/ @danabo /research/brain_coder/ @danabo
/research/cognitive_mapping_and_planning/ @s-gupta /research/cognitive_mapping_and_planning/ @s-gupta
/research/compression/ @nmjohn /research/compression/ @nmjohn
/research/deep_contextual_bandits/ @rikel
/research/deeplab/ @aquariusjay @yknzhu @gpapan /research/deeplab/ @aquariusjay @yknzhu @gpapan
/research/delf/ @andrefaraujo /research/delf/ @andrefaraujo
/research/differential_privacy/ @ilyamironov @ananthr /research/differential_privacy/ @ilyamironov @ananthr
......
...@@ -22,6 +22,7 @@ request. ...@@ -22,6 +22,7 @@ request.
for visual navigation. for visual navigation.
- [compression](compression): compressing and decompressing images using a - [compression](compression): compressing and decompressing images using a
pre-trained Residual GRU network. pre-trained Residual GRU network.
- [deep_contextual_bandits](deep_contextual_bandits): code for a variety of contextual bandits algorithms using deep neural networks and Thompson sampling.
- [deep_speech](deep_speech): automatic speech recognition. - [deep_speech](deep_speech): automatic speech recognition.
- [deeplab](deeplab): deep labeling for semantic image segmentation. - [deeplab](deeplab): deep labeling for semantic image segmentation.
- [delf](delf): deep local features for image matching and retrieval. - [delf](delf): deep local features for image matching and retrieval.
......
This diff is collapsed.
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bayesian NN using expectation propagation (Black-Box Alpha-Divergence).
See https://arxiv.org/abs/1511.03243 for details.
All formulas used in this implementation are derived in:
https://www.overleaf.com/12837696kwzjxkyhdytk#/49028744/.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import tensorflow as tf
from absl import flags
from bandits.core.bayesian_nn import BayesianNN
FLAGS = flags.FLAGS
tfd = tf.contrib.distributions # update to: tensorflow_probability.distributions
def log_gaussian(x, mu, sigma, reduce_sum=True):
res = tfd.Normal(mu, sigma).log_prob(x)
if reduce_sum:
return tf.reduce_sum(res)
else:
return res
class BBAlphaDivergence(BayesianNN):
"""Implements an approximate Bayesian NN via Black-Box Alpha-Divergence."""
def __init__(self, hparams, name):
self.name = name
self.hparams = hparams
self.alpha = getattr(self.hparams, 'alpha', 1.0)
self.num_mc_nn_samples = getattr(self.hparams, 'num_mc_nn_samples', 10)
self.n_in = self.hparams.context_dim
self.n_out = self.hparams.num_actions
self.layers = self.hparams.layer_sizes
self.batch_size = self.hparams.batch_size
self.show_training = self.hparams.show_training
self.freq_summary = self.hparams.freq_summary
self.verbose = getattr(self.hparams, 'verbose', True)
self.cleared_times_trained = self.hparams.cleared_times_trained
self.initial_training_steps = self.hparams.initial_training_steps
self.training_schedule = np.linspace(self.initial_training_steps,
self.hparams.training_epochs,
self.cleared_times_trained)
self.times_trained = 0
self.initialize_model()
def initialize_model(self):
"""Builds and initialize the model."""
self.num_w = 0
self.num_b = 0
self.weights_m = {}
self.weights_std = {}
self.biases_m = {}
self.biases_std = {}
self.h_max_var = []
if self.hparams.use_sigma_exp_transform:
self.sigma_transform = tfd.bijectors.Exp()
else:
self.sigma_transform = tfd.bijectors.Softplus()
# Build the graph corresponding to the Bayesian NN instance.
self.graph = tf.Graph()
with self.graph.as_default():
self.sess = tf.Session()
self.x = tf.placeholder(shape=[None, self.n_in],
dtype=tf.float32, name='x')
self.y = tf.placeholder(shape=[None, self.n_out],
dtype=tf.float32, name='y')
self.weights = tf.placeholder(shape=[None, self.n_out],
dtype=tf.float32, name='w')
self.data_size = tf.placeholder(tf.float32, shape=(), name='data_size')
self.prior_variance = self.hparams.prior_variance
if self.prior_variance < 0:
# if not fixed, we learn the prior.
self.prior_variance = self.sigma_transform.forward(
self.build_mu_variable([1, 1]))
self.build_model()
self.sess.run(tf.global_variables_initializer())
def build_mu_variable(self, shape):
"""Returns a mean variable initialized as N(0, 0.05)."""
return tf.Variable(tf.random_normal(shape, 0.0, 0.05))
def build_sigma_variable(self, shape, init=-5.):
"""Returns a sigma variable initialized as N(init, 0.05)."""
# Initialize sigma to be very small initially to encourage MAP opt first
return tf.Variable(tf.random_normal(shape, init, 0.05))
def build_layer(self, input_x, shape, layer_id, activation_fn=tf.nn.relu):
"""Builds a layer with N(mean, std) for each weight, and samples from it."""
w_mu = self.build_mu_variable(shape)
w_sigma = self.sigma_transform.forward(self.build_sigma_variable(shape))
w_noise = tf.random_normal(shape)
w = w_mu + w_sigma * w_noise
b_mu = self.build_mu_variable([1, shape[1]])
b_sigma = self.sigma_transform.forward(
self.build_sigma_variable([1, shape[1]]))
b_noise = tf.random_normal([1, shape[1]])
b = b_mu + b_sigma * b_noise
# Create outputs
output_h = activation_fn(tf.matmul(input_x, w) + b)
# Store means and stds
self.weights_m[layer_id] = w_mu
self.weights_std[layer_id] = w_sigma
self.biases_m[layer_id] = b_mu
self.biases_std[layer_id] = b_sigma
return output_h
def sample_neural_network(self, activation_fn=tf.nn.relu):
"""Samples a nn from posterior, computes data log lk and log f factor."""
with self.graph.as_default():
log_f = 0
n = self.data_size
input_x = self.x
for layer_id in range(self.total_layers):
# load mean and std of each weight
w_mu = self.weights_m[layer_id]
w_sigma = self.weights_std[layer_id]
b_mu = self.biases_m[layer_id]
b_sigma = self.biases_std[layer_id]
# sample weights from Gaussian distribution
shape = w_mu.shape
w_noise = tf.random_normal(shape)
b_noise = tf.random_normal([1, int(shape[1])])
w = w_mu + w_sigma * w_noise
b = b_mu + b_sigma * b_noise
# compute contribution to log_f
t1 = w * w_mu / (n * w_sigma ** 2)
t2 = (0.5 * w ** 2 / n) * (1 / self.prior_variance - 1 / w_sigma ** 2)
log_f += tf.reduce_sum(t1 + t2)
t1 = b * b_mu / (n * b_sigma ** 2)
t2 = (0.5 * b ** 2 / n) * (1 / self.prior_variance - 1 / b_sigma ** 2)
log_f += tf.reduce_sum(t1 + t2)
if layer_id < self.total_layers - 1:
output_h = activation_fn(tf.matmul(input_x, w) + b)
else:
output_h = tf.matmul(input_x, w) + b
input_x = output_h
# compute log likelihood of the observed reward under the sampled nn
log_likelihood = log_gaussian(
self.y, output_h, self.noise_sigma, reduce_sum=False)
weighted_log_likelihood = tf.reduce_sum(log_likelihood * self.weights, -1)
return log_f, weighted_log_likelihood
def log_z_q(self):
"""Computes log-partition function of current posterior parameters."""
with self.graph.as_default():
log_z_q = 0
for layer_id in range(self.total_layers):
w_mu = self.weights_m[layer_id]
w_sigma = self.weights_std[layer_id]
b_mu = self.biases_m[layer_id]
b_sigma = self.biases_std[layer_id]
w_term = 0.5 * tf.reduce_sum(w_mu ** 2 / w_sigma ** 2)
w_term += 0.5 * tf.reduce_sum(tf.log(2 * np.pi) + 2 * tf.log(w_sigma))
b_term = 0.5 * tf.reduce_sum(b_mu ** 2 / b_sigma ** 2)
b_term += 0.5 * tf.reduce_sum(tf.log(2 * np.pi) + 2 * tf.log(b_sigma))
log_z_q += w_term + b_term
return log_z_q
def log_z_prior(self):
"""Computes log-partition function of the prior parameters."""
num_params = self.num_w + self.num_b
return num_params * 0.5 * tf.log(2 * np.pi * self.prior_variance)
def log_alpha_likelihood_ratio(self, activation_fn=tf.nn.relu):
# each nn sample returns (log f, log likelihoods)
nn_samples = [
self.sample_neural_network(activation_fn)
for _ in range(self.num_mc_nn_samples)
]
nn_log_f_samples = [elt[0] for elt in nn_samples]
nn_log_lk_samples = [elt[1] for elt in nn_samples]
# we stack the (log f, log likelihoods) from the k nn samples
nn_log_f_stack = tf.stack(nn_log_f_samples) # k x 1
nn_log_lk_stack = tf.stack(nn_log_lk_samples) # k x N
nn_f_tile = tf.tile(nn_log_f_stack, [self.batch_size])
nn_f_tile = tf.reshape(nn_f_tile,
[self.num_mc_nn_samples, self.batch_size])
# now both the log f and log likelihood terms have shape: k x N
# apply formula in https://www.overleaf.com/12837696kwzjxkyhdytk#/49028744/
nn_log_ratio = nn_log_lk_stack - nn_f_tile
nn_log_ratio = self.alpha * tf.transpose(nn_log_ratio)
logsumexp_value = tf.reduce_logsumexp(nn_log_ratio, -1)
log_k_scalar = tf.log(tf.cast(self.num_mc_nn_samples, tf.float32))
log_k = log_k_scalar * tf.ones([self.batch_size])
return tf.reduce_sum(logsumexp_value - log_k, -1)
def build_model(self, activation_fn=tf.nn.relu):
"""Defines the actual NN model with fully connected layers.
Args:
activation_fn: Activation function for the neural network.
The loss is computed for partial feedback settings (bandits), so only
the observed outcome is backpropagated (see weighted loss).
Selects the optimizer and, finally, it also initializes the graph.
"""
print('Initializing model {}.'.format(self.name))
# Build terms for the noise sigma estimation for each action.
noise_sigma_mu = (self.build_mu_variable([1, self.n_out])
+ self.sigma_transform.inverse(self.hparams.noise_sigma))
noise_sigma_sigma = self.sigma_transform.forward(
self.build_sigma_variable([1, self.n_out]))
pre_noise_sigma = noise_sigma_mu + tf.random_normal(
[1, self.n_out]) * noise_sigma_sigma
self.noise_sigma = self.sigma_transform.forward(pre_noise_sigma)
# Build network
input_x = self.x
n_in = self.n_in
self.total_layers = len(self.layers) + 1
if self.layers[0] == 0:
self.total_layers = 1
for l_number, n_nodes in enumerate(self.layers):
if n_nodes > 0:
h = self.build_layer(input_x, [n_in, n_nodes], l_number)
input_x = h
n_in = n_nodes
self.num_w += n_in * n_nodes
self.num_b += n_nodes
self.y_pred = self.build_layer(input_x, [n_in, self.n_out],
self.total_layers - 1,
activation_fn=lambda x: x)
# Compute energy function based on sampled nn's
log_coeff = self.data_size / (self.batch_size * self.alpha)
log_ratio = log_coeff * self.log_alpha_likelihood_ratio(activation_fn)
logzprior = self.log_z_prior()
logzq = self.log_z_q()
energy = logzprior - logzq - log_ratio
self.loss = energy
self.global_step = tf.train.get_or_create_global_step()
self.train_op = tf.train.AdamOptimizer(self.hparams.initial_lr).minimize(
self.loss, global_step=self.global_step)
# Useful for debugging
sq_loss = tf.squared_difference(self.y_pred, self.y)
weighted_sq_loss = self.weights * sq_loss
self.cost = tf.reduce_sum(weighted_sq_loss) / self.batch_size
# Create tensorboard metrics
self.create_summaries()
self.summary_writer = tf.summary.FileWriter('{}/graph_{}'.format(
FLAGS.logdir, self.name), self.sess.graph)
def create_summaries(self):
tf.summary.scalar('loss', self.loss)
tf.summary.scalar('cost', self.cost)
self.summary_op = tf.summary.merge_all()
def assign_lr(self):
"""Resets the learning rate in dynamic schedules for subsequent trainings.
In bandits settings, we do expand our dataset over time. Then, we need to
re-train the network with the new data. Those algorithms that do not keep
the step constant, can reset it at the start of each training process.
"""
decay_steps = 1
if self.hparams.activate_decay:
current_gs = self.sess.run(self.global_step)
with self.graph.as_default():
self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr,
self.global_step - current_gs,
decay_steps,
self.hparams.lr_decay_rate)
def train(self, data, num_steps):
"""Trains the BNN for num_steps, using the data in 'data'.
Args:
data: ContextualDataset object that provides the data.
num_steps: Number of minibatches to train the network for.
"""
if self.times_trained < self.cleared_times_trained:
num_steps = int(self.training_schedule[self.times_trained])
self.times_trained += 1
if self.verbose:
print('Training {} for {} steps...'.format(self.name, num_steps))
with self.graph.as_default():
for step in range(num_steps):
x, y, w = data.get_batch_with_weights(self.hparams.batch_size)
_, summary, global_step, loss = self.sess.run(
[self.train_op, self.summary_op, self.global_step, self.loss],
feed_dict={self.x: x, self.y: y, self.weights: w,
self.data_size: data.num_points()})
weights_l = self.sess.run(self.weights_std[0])
self.h_max_var.append(np.max(weights_l))
if step % self.freq_summary == 0:
if self.show_training:
print('step: {}, loss: {}'.format(step, loss))
sys.stdout.flush()
self.summary_writer.add_summary(summary, global_step)
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bayesian NN using factorized VI (Bayes By Backprop. Blundell et al. 2014).
See https://arxiv.org/abs/1505.05424 for details.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
# import tensorflow_probability as tfp
from absl import flags
from bandits.core.bayesian_nn import BayesianNN
FLAGS = flags.FLAGS
# tfd = tfp.distributions
tfd = tf.contrib.distributions
tfl = tf.contrib.layers
def log_gaussian(x, mu, sigma, reduce_sum=True):
"""Returns log Gaussian pdf."""
res = tfd.Normal(mu, sigma).log_prob(x)
if reduce_sum:
return tf.reduce_sum(res)
else:
return res
def analytic_kl(mu_1, sigma_1, mu_2, sigma_2):
"""KL for two Gaussian distributions with diagonal covariance matrix."""
kl = tfd.kl_divergence(tfd.MVNDiag(mu_1, sigma_1), tfd.MVNDiag(mu_2, sigma_2))
return kl
class BfVariationalNeuralBanditModel(BayesianNN):
"""Implements an approximate Bayesian NN using Variational Inference."""
def __init__(self, hparams, name="BBBNN"):
self.name = name
self.hparams = hparams
self.n_in = self.hparams.context_dim
self.n_out = self.hparams.num_actions
self.layers = self.hparams.layer_sizes
self.init_scale = self.hparams.init_scale
self.f_num_points = None
if "f_num_points" in hparams:
self.f_num_points = self.hparams.f_num_points
self.cleared_times_trained = self.hparams.cleared_times_trained
self.initial_training_steps = self.hparams.initial_training_steps
self.training_schedule = np.linspace(self.initial_training_steps,
self.hparams.training_epochs,
self.cleared_times_trained)
self.verbose = getattr(self.hparams, "verbose", True)
self.weights_m = {}
self.weights_std = {}
self.biases_m = {}
self.biases_std = {}
self.times_trained = 0
if self.hparams.use_sigma_exp_transform:
self.sigma_transform = tf.exp
self.inverse_sigma_transform = np.log
else:
self.sigma_transform = tf.nn.softplus
self.inverse_sigma_transform = lambda y: y + np.log(1. - np.exp(-y))
# Whether to use the local reparameterization trick to compute the loss.
# See details in https://arxiv.org/abs/1506.02557
self.use_local_reparameterization = True
self.build_graph()
def build_mu_variable(self, shape):
"""Returns a mean variable initialized as N(0, 0.05)."""
return tf.Variable(tf.random_normal(shape, 0.0, 0.05))
def build_sigma_variable(self, shape, init=-5.):
"""Returns a sigma variable initialized as N(init, 0.05)."""
# Initialize sigma to be very small initially to encourage MAP opt first
return tf.Variable(tf.random_normal(shape, init, 0.05))
def build_layer(self, input_x, input_x_local, shape,
layer_id, activation_fn=tf.nn.relu):
"""Builds a variational layer, and computes KL term.
Args:
input_x: Input to the variational layer.
input_x_local: Input when the local reparameterization trick was applied.
shape: [number_inputs, number_outputs] for the layer.
layer_id: Number of layer in the architecture.
activation_fn: Activation function to apply.
Returns:
output_h: Output of the variational layer.
output_h_local: Output when local reparameterization trick was applied.
neg_kl: Negative KL term for the layer.
"""
w_mu = self.build_mu_variable(shape)
w_sigma = self.sigma_transform(self.build_sigma_variable(shape))
w_noise = tf.random_normal(shape)
w = w_mu + w_sigma * w_noise
b_mu = self.build_mu_variable([1, shape[1]])
b_sigma = self.sigma_transform(self.build_sigma_variable([1, shape[1]]))
b = b_mu
# Store means and stds
self.weights_m[layer_id] = w_mu
self.weights_std[layer_id] = w_sigma
self.biases_m[layer_id] = b_mu
self.biases_std[layer_id] = b_sigma
# Create outputs
output_h = activation_fn(tf.matmul(input_x, w) + b)
if self.use_local_reparameterization:
# Use analytic KL divergence wrt the prior
neg_kl = -analytic_kl(w_mu, w_sigma,
0., tf.to_float(np.sqrt(2./shape[0])))
else:
# Create empirical KL loss terms
log_p = log_gaussian(w, 0., tf.to_float(np.sqrt(2./shape[0])))
log_q = log_gaussian(w, tf.stop_gradient(w_mu), tf.stop_gradient(w_sigma))
neg_kl = log_p - log_q
# Apply local reparameterization trick: sample activations pre nonlinearity
m_h = tf.matmul(input_x_local, w_mu) + b
v_h = tf.matmul(tf.square(input_x_local), tf.square(w_sigma))
output_h_local = m_h + tf.sqrt(v_h + 1e-6) * tf.random_normal(tf.shape(v_h))
output_h_local = activation_fn(output_h_local)
return output_h, output_h_local, neg_kl
def build_action_noise(self):
"""Defines a model for additive noise per action, and its KL term."""
# Define mean and std variables (log-normal dist) for each action.
noise_sigma_mu = (self.build_mu_variable([1, self.n_out])
+ self.inverse_sigma_transform(self.hparams.noise_sigma))
noise_sigma_sigma = self.sigma_transform(
self.build_sigma_variable([1, self.n_out]))
pre_noise_sigma = (noise_sigma_mu
+ tf.random_normal([1, self.n_out]) * noise_sigma_sigma)
self.noise_sigma = self.sigma_transform(pre_noise_sigma)
# Compute KL for additive noise sigma terms.
if getattr(self.hparams, "infer_noise_sigma", False):
neg_kl_term = log_gaussian(
pre_noise_sigma,
self.inverse_sigma_transform(self.hparams.noise_sigma),
self.hparams.prior_sigma
)
neg_kl_term -= log_gaussian(pre_noise_sigma,
noise_sigma_mu,
noise_sigma_sigma)
else:
neg_kl_term = 0.
return neg_kl_term
def build_model(self, activation_fn=tf.nn.relu):
"""Defines the actual NN model with fully connected layers.
The loss is computed for partial feedback settings (bandits), so only
the observed outcome is backpropagated (see weighted loss).
Selects the optimizer and, finally, it also initializes the graph.
Args:
activation_fn: the activation function used in the nn layers.
"""
def weight_prior(dtype, shape, c, d, e):
del c, d, e
return tfd.Independent(
tfd.Normal(loc=tf.zeros(shape, dtype),
scale=tf.to_float(np.sqrt(2) / shape[0])),
reinterpreted_batch_ndims=tf.size(shape))
if self.verbose:
print("Initializing model {}.".format(self.name))
# Compute model additive noise for each action with log-normal distribution
neg_kl_term = self.build_action_noise()
# Build variational network using self.x as input.
input_x = self.x
# Create Keras model using DenseLocalReparameterization (prior N(0, 1)).
model_layers = [
tfl.DenseLocalReparameterization(
n_nodes,
activation=tf.nn.relu,
kernel_prior_fn=weight_prior
)
for n_nodes in self.layers if n_nodes > 0
]
output_layer = tfl.DenseLocalReparameterization(
self.n_out,
activation=lambda x: x,
kernel_prior_fn=weight_prior
)
model_layers.append(output_layer)
model = tf.keras.Sequential(model_layers)
self.y_pred = model(input_x)
# Compute KL term
neg_kl_term -= tf.add_n(model.losses)
# Compute log likelihood (with learned or fixed noise level)
if getattr(self.hparams, "infer_noise_sigma", False):
log_likelihood = log_gaussian(
self.y, self.y_pred, self.noise_sigma, reduce_sum=False)
else:
log_likelihood = log_gaussian(
self.y, self.y_pred, self.hparams.noise_sigma, reduce_sum=False)
# Only take into account observed outcomes (bandits setting)
batch_size = tf.to_float(tf.shape(self.x)[0])
weighted_log_likelihood = tf.reduce_sum(
log_likelihood * self.weights) / batch_size
# The objective is 1/n * (\sum_i log_like_i - KL); neg_kl_term estimates -KL
elbo = weighted_log_likelihood + (neg_kl_term / self.n)
self.loss = -elbo
self.global_step = tf.train.get_or_create_global_step()
self.train_op = tf.train.AdamOptimizer(self.hparams.initial_lr).minimize(
self.loss, global_step=self.global_step)
# Create tensorboard metrics
self.create_summaries()
self.summary_writer = tf.summary.FileWriter(
"{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph)
def build_graph(self):
"""Defines graph, session, placeholders, and model.
Placeholders are: n (size of the dataset), x and y (context and observed
reward for each action), and weights (one-hot encoding of selected action
for each context, i.e., only possibly non-zero element in each y).
"""
self.graph = tf.Graph()
with self.graph.as_default():
self.sess = tf.Session()
self.n = tf.placeholder(shape=[], dtype=tf.float32)
self.x = tf.placeholder(shape=[None, self.n_in], dtype=tf.float32)
self.y = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32)
self.weights = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32)
self.build_model()
self.sess.run(tf.global_variables_initializer())
def create_summaries(self):
"""Defines summaries including mean loss, and global step."""
with self.graph.as_default():
with tf.name_scope(self.name + "_summaries"):
tf.summary.scalar("loss", self.loss)
tf.summary.scalar("global_step", self.global_step)
self.summary_op = tf.summary.merge_all()
def assign_lr(self):
"""Resets the learning rate in dynamic schedules for subsequent trainings.
In bandits settings, we do expand our dataset over time. Then, we need to
re-train the network with the new data. The algorithms that do not keep
the step constant, can reset it at the start of each *training* process.
"""
decay_steps = 1
if self.hparams.activate_decay:
current_gs = self.sess.run(self.global_step)
with self.graph.as_default():
self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr,
self.global_step - current_gs,
decay_steps,
self.hparams.lr_decay_rate)
def train(self, data, num_steps):
"""Trains the BNN for num_steps, using the data in 'data'.
Args:
data: ContextualDataset object that provides the data.
num_steps: Number of minibatches to train the network for.
Returns:
losses: Loss history during training.
"""
if self.times_trained < self.cleared_times_trained:
num_steps = int(self.training_schedule[self.times_trained])
self.times_trained += 1
losses = []
with self.graph.as_default():
if self.verbose:
print("Training {} for {} steps...".format(self.name, num_steps))
for step in range(num_steps):
x, y, weights = data.get_batch_with_weights(self.hparams.batch_size)
_, summary, global_step, loss = self.sess.run(
[self.train_op, self.summary_op, self.global_step, self.loss],
feed_dict={
self.x: x,
self.y: y,
self.weights: weights,
self.n: data.num_points(self.f_num_points),
})
losses.append(loss)
if step % self.hparams.freq_summary == 0:
if self.hparams.show_training:
print("{} | step: {}, loss: {}".format(
self.name, global_step, loss))
self.summary_writer.add_summary(summary, global_step)
return losses
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual algorithm based on boostrapping neural networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from bandits.core.bandit_algorithm import BanditAlgorithm
from bandits.core.contextual_dataset import ContextualDataset
from bandits.algorithms.neural_bandit_model import NeuralBanditModel
class BootstrappedBNNSampling(BanditAlgorithm):
"""Thompson Sampling algorithm based on training several neural networks."""
def __init__(self, name, hparams, optimizer='RMS'):
"""Creates a BootstrappedSGDSampling object based on a specific optimizer.
hparams.q: Number of models that are independently trained.
hparams.p: Prob of independently including each datapoint in each model.
Args:
name: Name given to the instance.
hparams: Hyperparameters for each individual model.
optimizer: Neural network optimization algorithm.
"""
self.name = name
self.hparams = hparams
self.optimizer_n = optimizer
self.training_freq = hparams.training_freq
self.training_epochs = hparams.training_epochs
self.t = 0
self.q = hparams.q
self.p = hparams.p
self.datasets = [
ContextualDataset(hparams.context_dim,
hparams.num_actions,
hparams.buffer_s)
for _ in range(self.q)
]
self.bnn_boot = [
NeuralBanditModel(optimizer, hparams, '{}-{}-bnn'.format(name, i))
for i in range(self.q)
]
def action(self, context):
"""Selects action for context based on Thompson Sampling using one BNN."""
if self.t < self.hparams.num_actions * self.hparams.initial_pulls:
# round robin until each action has been taken "initial_pulls" times
return self.t % self.hparams.num_actions
# choose model uniformly at random
model_index = np.random.randint(self.q)
with self.bnn_boot[model_index].graph.as_default():
c = context.reshape((1, self.hparams.context_dim))
output = self.bnn_boot[model_index].sess.run(
self.bnn_boot[model_index].y_pred,
feed_dict={self.bnn_boot[model_index].x: c})
return np.argmax(output)
def update(self, context, action, reward):
"""Updates the data buffer, and re-trains the BNN every self.freq_update."""
self.t += 1
for i in range(self.q):
# include the data point with probability p independently in each dataset
if np.random.random() < self.p or self.t < 2:
self.datasets[i].add(context, action, reward)
if self.t % self.training_freq == 0:
# update all the models:
for i in range(self.q):
if self.hparams.reset_lr:
self.bnn_boot[i].assign_lr()
self.bnn_boot[i].train(self.datasets[i], self.training_epochs)
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual bandit algorithm that selects an action at random."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from bandits.core.bandit_algorithm import BanditAlgorithm
class FixedPolicySampling(BanditAlgorithm):
"""Defines a baseline; returns an action at random with probs given by p."""
def __init__(self, name, p, hparams):
"""Creates a FixedPolicySampling object.
Args:
name: Name of the algorithm.
p: Vector of normalized probabilities corresponding to sampling each arm.
hparams: Hyper-parameters, including the number of arms (num_actions).
Raises:
ValueError: when p dimension does not match the number of actions.
"""
self.name = name
self.p = p
self.hparams = hparams
if len(p) != self.hparams.num_actions:
raise ValueError('Policy needs k probabilities.')
def action(self, context):
"""Selects an action at random according to distribution p."""
return np.random.choice(range(self.hparams.num_actions), p=self.p)
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contextual algorithm that keeps a full linear posterior for each arm."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from scipy.stats import invgamma
from bandits.core.bandit_algorithm import BanditAlgorithm
from bandits.core.contextual_dataset import ContextualDataset
class LinearFullPosteriorSampling(BanditAlgorithm):
"""Thompson Sampling with independent linear models and unknown noise var."""
def __init__(self, name, hparams):
"""Initialize posterior distributions and hyperparameters.
Assume a linear model for each action i: reward = context^T beta_i + noise
Each beta_i has a Gaussian prior (lambda parameter), each sigma2_i (noise
level) has an inverse Gamma prior (a0, b0 parameters). Mean, covariance,
and precision matrices are initialized, and the ContextualDataset created.
Args:
name: Name of the algorithm.
hparams: Hyper-parameters of the algorithm.
"""
self.name = name
self.hparams = hparams
# Gaussian prior for each beta_i
self._lambda_prior = self.hparams.lambda_prior
self.mu = [
np.zeros(self.hparams.context_dim + 1)
for _ in range(self.hparams.num_actions)
]
self.cov = [(1.0 / self.lambda_prior) * np.eye(self.hparams.context_dim + 1)
for _ in range(self.hparams.num_actions)]
self.precision = [
self.lambda_prior * np.eye(self.hparams.context_dim + 1)
for _ in range(self.hparams.num_actions)
]
# Inverse Gamma prior for each sigma2_i
self._a0 = self.hparams.a0
self._b0 = self.hparams.b0
self.a = [self._a0 for _ in range(self.hparams.num_actions)]
self.b = [self._b0 for _ in range(self.hparams.num_actions)]
self.t = 0
self.data_h = ContextualDataset(hparams.context_dim,
hparams.num_actions,
intercept=True)
def action(self, context):
"""Samples beta's from posterior, and chooses best action accordingly.
Args:
context: Context for which the action need to be chosen.
Returns:
action: Selected action for the context.
"""
# Round robin until each action has been selected "initial_pulls" times
if self.t < self.hparams.num_actions * self.hparams.initial_pulls:
return self.t % self.hparams.num_actions
# Sample sigma2, and beta conditional on sigma2
sigma2_s = [
self.b[i] * invgamma.rvs(self.a[i])
for i in range(self.hparams.num_actions)
]
try:
beta_s = [
np.random.multivariate_normal(self.mu[i], sigma2_s[i] * self.cov[i])
for i in range(self.hparams.num_actions)
]
except np.linalg.LinAlgError as e:
# Sampling could fail if covariance is not positive definite
print('Exception when sampling from {}.'.format(self.name))
print('Details: {} | {}.'.format(e.message, e.args))
d = self.hparams.context_dim + 1
beta_s = [
np.random.multivariate_normal(np.zeros((d)), np.eye(d))
for i in range(self.hparams.num_actions)
]
# Compute sampled expected values, intercept is last component of beta
vals = [
np.dot(beta_s[i][:-1], context.T) + beta_s[i][-1]
for i in range(self.hparams.num_actions)
]
return np.argmax(vals)
def update(self, context, action, reward):
"""Updates action posterior using the linear Bayesian regression formula.
Args:
context: Last observed context.
action: Last observed action.
reward: Last observed reward.
"""
self.t += 1
self.data_h.add(context, action, reward)
# Update posterior of action with formulas: \beta | x,y ~ N(mu_q, cov_q)
x, y = self.data_h.get_data(action)
# The algorithm could be improved with sequential update formulas (cheaper)
s = np.dot(x.T, x)
# Some terms are removed as we assume prior mu_0 = 0.
precision_a = s + self.lambda_prior * np.eye(self.hparams.context_dim + 1)
cov_a = np.linalg.inv(precision_a)
mu_a = np.dot(cov_a, np.dot(x.T, y))
# Inverse Gamma posterior update
a_post = self.a0 + x.shape[0] / 2.0
b_upd = 0.5 * (np.dot(y.T, y) - np.dot(mu_a.T, np.dot(precision_a, mu_a)))
b_post = self.b0 + b_upd
# Store new posterior distributions
self.mu[action] = mu_a
self.cov[action] = cov_a
self.precision[action] = precision_a
self.a[action] = a_post
self.b[action] = b_post
@property
def a0(self):
return self._a0
@property
def b0(self):
return self._b0
@property
def lambda_prior(self):
return self._lambda_prior
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment