Unverified Commit 78ddf6eb authored by cclauss's avatar cclauss Committed by GitHub
Browse files

Merge branch 'master' into patch-6

parents 50cb0365 1f34fcaf
......@@ -22,6 +22,7 @@ import math
import os
import numpy as np
from six.moves import xrange
import tensorflow as tf
import blocks_std
......
......@@ -25,6 +25,7 @@ from __future__ import print_function
from delf import feature_pb2
from delf import datum_io
import numpy as np
from six.moves import xrange
import tensorflow as tf
......
......@@ -22,6 +22,7 @@ import sys
import time
import numpy as np
from six.moves import xrange
import tensorflow as tf
from differential_privacy.dp_sgd.dp_optimizer import dp_optimizer
......
......@@ -17,6 +17,7 @@
import collections
from six.moves import xrange
import tensorflow as tf
OrderedDict = collections.OrderedDict
......
......@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange
def labels_from_probs(probs):
......@@ -127,5 +128,3 @@ def aggregation_most_frequent(logits):
result[i] = np.argmax(label_counts)
return np.asarray(result, dtype=np.int32)
......@@ -41,6 +41,7 @@ python analysis.py
import os
import math
import numpy as np
from six.moves import xrange
import tensorflow as tf
from differential_privacy.multiple_teachers.input import maybe_download
......@@ -139,7 +140,7 @@ def logmgf_exact(q, priv_eps, l):
try:
log_t = math.log(t)
except ValueError:
print "Got ValueError in math.log for values :" + str((q, priv_eps, l, t))
print("Got ValueError in math.log for values :" + str((q, priv_eps, l, t)))
log_t = priv_eps * l
else:
log_t = priv_eps * l
......@@ -171,7 +172,7 @@ def sens_at_k(counts, noise_eps, l, k):
"""
counts_sorted = sorted(counts, reverse=True)
if 0.5 * noise_eps * l > 1:
print "l too large to compute sensitivity"
print("l too large to compute sensitivity")
return 0
# Now we can assume that at k, gap remains positive
# or we have reached the point where logmgf_exact is
......@@ -268,8 +269,8 @@ def main(unused_argv):
# Solving gives eps = (alpha - ln (delta))/l
eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list
print "Epsilons (Noisy Max): " + str(eps_list_nm)
print "Smoothed sensitivities (Noisy Max): " + str(total_ss_nm / l_list)
print("Epsilons (Noisy Max): " + str(eps_list_nm))
print("Smoothed sensitivities (Noisy Max): " + str(total_ss_nm / l_list))
# If beta < eps / 2 ln (1/delta), then adding noise Lap(1) * 2 SS/eps
# is eps,delta DP
......@@ -280,12 +281,12 @@ def main(unused_argv):
# Print the first one's scale
ss_eps = 2.0 * beta * math.log(1/delta)
ss_scale = 2.0 / ss_eps
print "To get an " + str(ss_eps) + "-DP estimate of epsilon, "
print "..add noise ~ " + str(ss_scale)
print "... times " + str(total_ss_nm / l_list)
print "Epsilon = " + str(min(eps_list_nm)) + "."
print("To get an " + str(ss_eps) + "-DP estimate of epsilon, ")
print("..add noise ~ " + str(ss_scale))
print("... times " + str(total_ss_nm / l_list))
print("Epsilon = " + str(min(eps_list_nm)) + ".")
if min(eps_list_nm) == eps_list_nm[-1]:
print "Warning: May not have used enough values of l"
print("Warning: May not have used enough values of l")
# Data independent bound, as mechanism is
# 2*noise_eps DP.
......@@ -294,7 +295,7 @@ def main(unused_argv):
[logmgf_exact(1.0, 2.0 * noise_eps, l) for l in l_list])
data_ind_eps_list = (data_ind_log_mgf - math.log(delta)) / l_list
print "Data independent bound = " + str(min(data_ind_eps_list)) + "."
print("Data independent bound = " + str(min(data_ind_eps_list)) + ".")
return
......
......@@ -20,6 +20,7 @@ from __future__ import print_function
from datetime import datetime
import math
import numpy as np
from six.moves import xrange
import tensorflow as tf
import time
......@@ -600,5 +601,3 @@ def softmax_preds(images, ckpt_path, return_logits=False):
tf.reset_default_graph()
return preds
......@@ -24,6 +24,7 @@ import numpy as np
import os
from scipy.io import loadmat as loadmat
from six.moves import urllib
from six.moves import xrange
import sys
import tarfile
......
......@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange
import tensorflow as tf
from differential_privacy.multiple_teachers import aggregation
......
......@@ -40,12 +40,15 @@ To verify that the I1 >= I2 (see comments in GaussianMomentsAccountant in
accountant.py for the context), run the same loop above with verify=True
passed to compute_log_moment.
"""
from __future__ import print_function
import math
import sys
import numpy as np
import scipy.integrate as integrate
import scipy.stats
from six.moves import xrange
from sympy.mpmath import mp
......@@ -108,10 +111,10 @@ def compute_a(sigma, q, lmbd, verbose=False):
a_lambda_exact = ((1.0 - q) * a_lambda_first_term_exact +
q * a_lambda_second_term_exact)
if verbose:
print "A: by binomial expansion {} = {} + {}".format(
print("A: by binomial expansion {} = {} + {}".format(
a_lambda_exact,
(1.0 - q) * a_lambda_first_term_exact,
q * a_lambda_second_term_exact)
q * a_lambda_second_term_exact))
return _to_np_float64(a_lambda_exact)
......@@ -125,8 +128,8 @@ def compute_b(sigma, q, lmbd, verbose=False):
b_fn = lambda z: (np.power(mu0(z) / mu(z), lmbd) -
np.power(mu(-z) / mu0(z), lmbd))
if verbose:
print "M =", m
print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m))
print("M =", m)
print("f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m)))
assert b_fn(-m) < 0 and b_fn(m) < 0
b_lambda_int1_fn = lambda z: (mu0(z) *
......@@ -140,9 +143,9 @@ def compute_b(sigma, q, lmbd, verbose=False):
b_bound = a_lambda_m1 + b_int1 - b_int2
if verbose:
print "B: by numerical integration", b_lambda
print "B must be no more than ", b_bound
print b_lambda, b_bound
print("B: by numerical integration", b_lambda)
print("B must be no more than ", b_bound)
print(b_lambda, b_bound)
return _to_np_float64(b_lambda)
......@@ -188,10 +191,10 @@ def compute_a_mp(sigma, q, lmbd, verbose=False):
a_lambda_second_term = integral_inf_mp(a_lambda_second_term_fn)
if verbose:
print "A: by numerical integration {} = {} + {}".format(
print("A: by numerical integration {} = {} + {}".format(
a_lambda,
(1 - q) * a_lambda_first_term,
q * a_lambda_second_term)
q * a_lambda_second_term))
return _to_np_float64(a_lambda)
......@@ -210,8 +213,8 @@ def compute_b_mp(sigma, q, lmbd, verbose=False):
b_fn = lambda z: ((mu0(z) / mu(z)) ** lmbd_int -
(mu(-z) / mu0(z)) ** lmbd_int)
if verbose:
print "M =", m
print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m))
print("M =", m)
print("f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m)))
assert b_fn(-m) < 0 and b_fn(m) < 0
b_lambda_int1_fn = lambda z: mu0(z) * (mu0(z) / mu(z)) ** lmbd_int
......@@ -223,8 +226,8 @@ def compute_b_mp(sigma, q, lmbd, verbose=False):
b_bound = a_lambda_m1 + b_int1 - b_int2
if verbose:
print "B by numerical integration", b_lambda
print "B must be no more than ", b_bound
print("B by numerical integration", b_lambda)
print("B must be no more than ", b_bound)
assert b_lambda < b_bound + 1e-5
return _to_np_float64(b_lambda)
......
......@@ -19,6 +19,7 @@
import math
import numpy as np
from six.moves import xrange
import tensorflow as tf
from domain_adaptation.datasets import dataset_factory
......
# Filtering Variational Objectives
This folder contains a TensorFlow implementation of the algorithms from
Chris J. Maddison\*, Dieterich Lawson\*, George Tucker\*, Nicolas Heess, Mohammad Norouzi, Andriy Mnih, Arnaud Doucet, and Yee Whye Teh. "Filtering Variational Objectives." NIPS 2017.
[https://arxiv.org/abs/1705.09279](https://arxiv.org/abs/1705.09279)
This code implements 3 different bounds for training sequential latent variable models: the evidence lower bound (ELBO), the importance weighted auto-encoder bound (IWAE), and our bound, the filtering variational objective (FIVO).
Additionally it contains an implementation of the variational recurrent neural network (VRNN), a sequential latent variable model that can be trained using these three objectives. This repo provides code for training a VRNN to do sequence modeling of pianoroll and speech data.
#### Directory Structure
The important parts of the code are organized as follows.
```
fivo.py # main script, contains flag definitions
runners.py # graph construction code for training and evaluation
bounds.py # code for computing each bound
data
├── datasets.py # readers for pianoroll and speech datasets
├── calculate_pianoroll_mean.py # preprocesses the pianoroll datasets
└── create_timit_dataset.py # preprocesses the TIMIT dataset
models
└── vrnn.py # variational RNN implementation
bin
├── run_train.sh # an example script that runs training
├── run_eval.sh # an example script that runs evaluation
└── download_pianorolls.sh # a script that downloads the pianoroll files
```
### Training on Pianorolls
Requirements before we start:
* TensorFlow (see [tensorflow.org](http://tensorflow.org) for how to install)
* [scipy](https://www.scipy.org/)
* [sonnet](https://github.com/deepmind/sonnet)
#### Download the Data
The pianoroll datasets are encoded as pickled sparse arrays and are available at [http://www-etud.iro.umontreal.ca/~boulanni/icml2012](http://www-etud.iro.umontreal.ca/~boulanni/icml2012). You can use the script `bin/download_pianorolls.sh` to download the files into a directory of your choosing.
```
export PIANOROLL_DIR=~/pianorolls
mkdir $PIANOROLL_DIR
sh bin/download_pianorolls.sh $PIANOROLL_DIR
```
#### Preprocess the Data
The script `calculate_pianoroll_mean.py` loads a pianoroll pickle file, calculates the mean, updates the pickle file to include the mean under the key `train_mean`, and writes the file back to disk in-place. You should do this for all pianoroll datasets you wish to train on.
```
python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/piano-midi.de.pkl
python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/nottingham.de.pkl
python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/musedata.pkl
python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/jsb.pkl
```
#### Training
Now we can train a model. Here is a standard training run, taken from `bin/run_train.sh`:
```
python fivo.py \
--mode=train \
--logdir=/tmp/fivo \
--model=vrnn \
--bound=fivo \
--summarize_every=100 \
--batch_size=4 \
--num_samples=4 \
--learning_rate=0.0001 \
--dataset_path="$PIANOROLL_DIR/jsb.pkl" \
--dataset_type="pianoroll"
```
You should see output that looks something like this (with a lot of extra logging cruft):
```
Step 1, fivo bound per timestep: -11.801050
global_step/sec: 9.89825
Step 101, fivo bound per timestep: -11.198309
global_step/sec: 9.55475
Step 201, fivo bound per timestep: -11.287262
global_step/sec: 9.68146
step 301, fivo bound per timestep: -11.316490
global_step/sec: 9.94295
Step 401, fivo bound per timestep: -11.151743
```
You will also see lines saying `Out of range: exceptions.StopIteration: Iteration finished`. This is not an error and is fine.
#### Evaluation
You can also evaluate saved checkpoints. The `eval` mode loads a model checkpoint, tests its performance on all items in a dataset, and reports the log-likelihood averaged over the dataset. For example here is a command, taken from `bin/run_eval.sh`, that will evaluate a JSB model on the test set:
```
python fivo.py \
--mode=eval \
--split=test \
--alsologtostderr \
--logdir=/tmp/fivo \
--model=vrnn \
--batch_size=4 \
--num_samples=4 \
--dataset_path="$PIANOROLL_DIR/jsb.pkl" \
--dataset_type="pianoroll"
```
You should see output like this:
```
Model restored from step 1, evaluating.
test elbo ll/t: -12.299635, iwae ll/t: -12.128336 fivo ll/t: -11.656939
test elbo ll/seq: -754.750312, iwae ll/seq: -744.238773 fivo ll/seq: -715.3121490
```
The evaluation script prints log-likelihood in both nats per timestep (ll/t) and nats per sequence (ll/seq) for all three bounds.
### Training on TIMIT
The TIMIT speech dataset is available at the [Linguistic Data Consortium website](https://catalog.ldc.upenn.edu/LDC93S1), but is unfortunately not free. These instructions will proceed assuming you have downloaded the TIMIT archive and extracted it into the directory `$RAW_TIMIT_DIR`.
#### Preprocess TIMIT
We preprocess TIMIT (as described in our paper) and write it out to a series of TFRecord files. To prepare the TIMIT dataset use the script `create_timit_dataset.py`
```
export $TIMIT_DIR=~/timit_dataset
mkdir $TIMIT_DIR
python data/create_timit_dataset.py \
--raw_timit_dir=$RAW_TIMIT_DIR \
--out_dir=$TIMIT_DIR
```
You should see this exact output:
```
4389 train / 231 valid / 1680 test
train mean: 0.006060 train std: 548.136169
```
#### Training on TIMIT
This is very similar to training on pianoroll datasets, with just a few flags switched.
```
python fivo.py \
--mode=train \
--logdir=/tmp/fivo \
--model=vrnn \
--bound=fivo \
--summarize_every=100 \
--batch_size=4 \
--num_samples=4 \
--learning_rate=0.0001 \
--dataset_path="$TIMIT_DIR/train" \
--dataset_type="speech"
```
### Contact
This codebase is maintained by Dieterich Lawson, reachable via email at dieterichl@google.com. For questions and issues please open an issue on the tensorflow/models issues tracker and assign it to @dieterichlawson.
#!/bin/bash
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# A script to download the pianoroll datasets.
# Accepts one argument, the directory to put the files in
if [ -z "$1" ]
then
echo "Error, must provide a directory to download the files to."
exit
fi
echo "Downloading datasets into $1"
curl -s "http://www-etud.iro.umontreal.ca/~boulanni/Piano-midi.de.pickle" > $1/piano-midi.de.pkl
curl -s "http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.pickle" > $1/nottingham.pkl
curl -s "http://www-etud.iro.umontreal.ca/~boulanni/MuseData.pickle" > $1/musedata.pkl
curl -s "http://www-etud.iro.umontreal.ca/~boulanni/JSB%20Chorales.pickle" > $1/jsb.pkl
#!/bin/bash
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# An example of running evaluation.
PIANOROLL_DIR=$HOME/pianorolls
python fivo.py \
--mode=eval \
--logdir=/tmp/fivo \
--model=vrnn \
--batch_size=4 \
--num_samples=4 \
--split=test \
--dataset_path="$PIANOROLL_DIR/jsb.pkl" \
--dataset_type="pianoroll"
#!/bin/bash
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# An example of running training.
PIANOROLL_DIR=$HOME/pianorolls
python fivo.py \
--mode=train \
--logdir=/tmp/fivo \
--model=vrnn \
--bound=fivo \
--summarize_every=100 \
--batch_size=4 \
--num_samples=4 \
--learning_rate=0.0001 \
--dataset_path="$PIANOROLL_DIR/jsb.pkl" \
--dataset_type="pianoroll"
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of objectives for training stochastic latent variable models.
Contains implementations of the Importance Weighted Autoencoder objective (IWAE)
and the Filtering Variational objective (FIVO).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import nested_utils as nested
def iwae(cell,
inputs,
seq_lengths,
num_samples=1,
parallel_iterations=30,
swap_memory=True):
"""Computes the IWAE lower bound on the log marginal probability.
This method accepts a stochastic latent variable model and some observations
and computes a stochastic lower bound on the log marginal probability of the
observations. The IWAE estimator is defined by averaging multiple importance
weights. For more details see "Importance Weighted Autoencoders" by Burda
et al. https://arxiv.org/abs/1509.00519.
When num_samples = 1, this bound becomes the evidence lower bound (ELBO).
Args:
cell: A callable that implements one timestep of the model. See
models/vrnn.py for an example.
inputs: The inputs to the model. A potentially nested list or tuple of
Tensors each of shape [max_seq_len, batch_size, ...]. The Tensors must
have a rank at least two and have matching shapes in the first two
dimensions, which represent time and the batch respectively. At each
timestep 'cell' will be called with a slice of the Tensors in inputs.
seq_lengths: A [batch_size] Tensor of ints encoding the length of each
sequence in the batch (sequences can be padded to a common length).
num_samples: The number of samples to use.
parallel_iterations: The number of parallel iterations to use for the
internal while loop.
swap_memory: Whether GPU-CPU memory swapping should be enabled for the
internal while loop.
Returns:
log_p_hat: A Tensor of shape [batch_size] containing IWAE's estimate of the
log marginal probability of the observations.
kl: A Tensor of shape [batch_size] containing the kl divergence
from q(z|x) to p(z), averaged over samples.
log_weights: A Tensor of shape [max_seq_len, batch_size, num_samples]
containing the log weights at each timestep. Will not be valid for
timesteps past the end of a sequence.
log_ess: A Tensor of shape [max_seq_len, batch_size] containing the log
effective sample size at each timestep. Will not be valid for timesteps
past the end of a sequence.
"""
batch_size = tf.shape(seq_lengths)[0]
max_seq_len = tf.reduce_max(seq_lengths)
seq_mask = tf.transpose(
tf.sequence_mask(seq_lengths, maxlen=max_seq_len, dtype=tf.float32),
perm=[1, 0])
if num_samples > 1:
inputs, seq_mask = nested.tile_tensors([inputs, seq_mask], [1, num_samples])
inputs_ta, mask_ta = nested.tas_for_tensors([inputs, seq_mask], max_seq_len)
t0 = tf.constant(0, tf.int32)
init_states = cell.zero_state(batch_size * num_samples, tf.float32)
ta_names = ['log_weights', 'log_ess']
tas = [tf.TensorArray(tf.float32, max_seq_len, name='%s_ta' % n)
for n in ta_names]
log_weights_acc = tf.zeros([num_samples, batch_size], dtype=tf.float32)
kl_acc = tf.zeros([num_samples * batch_size], dtype=tf.float32)
accs = (log_weights_acc, kl_acc)
def while_predicate(t, *unused_args):
return t < max_seq_len
def while_step(t, rnn_state, tas, accs):
"""Implements one timestep of IWAE computation."""
log_weights_acc, kl_acc = accs
cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t)
# Run the cell for one step.
log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell(
cur_inputs,
rnn_state,
cur_mask,
)
# Compute the incremental weight and use it to update the current
# accumulated weight.
kl_acc += kl * cur_mask
log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask
log_alpha = tf.reshape(log_alpha, [num_samples, batch_size])
log_weights_acc += log_alpha
# Calculate the effective sample size.
ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0)
ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0)
log_ess = ess_num - ess_denom
# Update the Tensorarrays and accumulators.
ta_updates = [log_weights_acc, log_ess]
new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)]
new_accs = (log_weights_acc, kl_acc)
return t + 1, new_state, new_tas, new_accs
_, _, tas, accs = tf.while_loop(
while_predicate,
while_step,
loop_vars=(t0, init_states, tas, accs),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
log_weights, log_ess = [x.stack() for x in tas]
final_log_weights, kl = accs
log_p_hat = (tf.reduce_logsumexp(final_log_weights, axis=0) -
tf.log(tf.to_float(num_samples)))
kl = tf.reduce_mean(tf.reshape(kl, [num_samples, batch_size]), axis=0)
log_weights = tf.transpose(log_weights, perm=[0, 2, 1])
return log_p_hat, kl, log_weights, log_ess
def ess_criterion(num_samples, log_ess, unused_t):
"""A criterion that resamples based on effective sample size."""
return log_ess <= tf.log(num_samples / 2.0)
def never_resample_criterion(unused_num_samples, log_ess, unused_t):
"""A criterion that never resamples."""
return tf.cast(tf.zeros_like(log_ess), tf.bool)
def always_resample_criterion(unused_num_samples, log_ess, unused_t):
"""A criterion resamples at every timestep."""
return tf.cast(tf.ones_like(log_ess), tf.bool)
def fivo(cell,
inputs,
seq_lengths,
num_samples=1,
resampling_criterion=ess_criterion,
parallel_iterations=30,
swap_memory=True,
random_seed=None):
"""Computes the FIVO lower bound on the log marginal probability.
This method accepts a stochastic latent variable model and some observations
and computes a stochastic lower bound on the log marginal probability of the
observations. The lower bound is defined by a particle filter's unbiased
estimate of the marginal probability of the observations. For more details see
"Filtering Variational Objectives" by Maddison et al.
https://arxiv.org/abs/1705.09279.
When the resampling criterion is "never resample", this bound becomes IWAE.
Args:
cell: A callable that implements one timestep of the model. See
models/vrnn.py for an example.
inputs: The inputs to the model. A potentially nested list or tuple of
Tensors each of shape [max_seq_len, batch_size, ...]. The Tensors must
have a rank at least two and have matching shapes in the first two
dimensions, which represent time and the batch respectively. At each
timestep 'cell' will be called with a slice of the Tensors in inputs.
seq_lengths: A [batch_size] Tensor of ints encoding the length of each
sequence in the batch (sequences can be padded to a common length).
num_samples: The number of particles to use in each particle filter.
resampling_criterion: The resampling criterion to use for this particle
filter. Must accept the number of samples, the effective sample size,
and the current timestep and return a boolean Tensor of shape [batch_size]
indicating whether each particle filter should resample. See
ess_criterion and related functions defined in this file for examples.
parallel_iterations: The number of parallel iterations to use for the
internal while loop. Note that values greater than 1 can introduce
non-determinism even when random_seed is provided.
swap_memory: Whether GPU-CPU memory swapping should be enabled for the
internal while loop.
random_seed: The random seed to pass to the resampling operations in
the particle filter. Mainly useful for testing.
Returns:
log_p_hat: A Tensor of shape [batch_size] containing FIVO's estimate of the
log marginal probability of the observations.
kl: A Tensor of shape [batch_size] containing the sum over time of the kl
divergence from q_t(z_t|x) to p_t(z_t), averaged over particles. Note that
this includes kl terms from trajectories that are culled during resampling
steps.
log_weights: A Tensor of shape [max_seq_len, batch_size, num_samples]
containing the log weights at each timestep of the particle filter. Note
that on timesteps when a resampling operation is performed the log weights
are reset to 0. Will not be valid for timesteps past the end of a
sequence.
log_ess: A Tensor of shape [max_seq_len, batch_size] containing the log
effective sample size of each particle filter at each timestep. Will not
be valid for timesteps past the end of a sequence.
resampled: A Tensor of shape [max_seq_len, batch_size] indicating when the
particle filters resampled. Will be 1.0 on timesteps when resampling
occurred and 0.0 on timesteps when it did not.
"""
# batch_size represents the number of particle filters running in parallel.
batch_size = tf.shape(seq_lengths)[0]
max_seq_len = tf.reduce_max(seq_lengths)
seq_mask = tf.transpose(
tf.sequence_mask(seq_lengths, maxlen=max_seq_len, dtype=tf.float32),
perm=[1, 0])
# Each sequence in the batch will be the input data for a different
# particle filter. The batch will be laid out as:
# particle 1 of particle filter 1
# particle 1 of particle filter 2
# ...
# particle 1 of particle filter batch_size
# particle 2 of particle filter 1
# ...
# particle num_samples of particle filter batch_size
if num_samples > 1:
inputs, seq_mask = nested.tile_tensors([inputs, seq_mask], [1, num_samples])
inputs_ta, mask_ta = nested.tas_for_tensors([inputs, seq_mask], max_seq_len)
t0 = tf.constant(0, tf.int32)
init_states = cell.zero_state(batch_size * num_samples, tf.float32)
ta_names = ['log_weights', 'log_ess', 'resampled']
tas = [tf.TensorArray(tf.float32, max_seq_len, name='%s_ta' % n)
for n in ta_names]
log_weights_acc = tf.zeros([num_samples, batch_size], dtype=tf.float32)
log_p_hat_acc = tf.zeros([batch_size], dtype=tf.float32)
kl_acc = tf.zeros([num_samples * batch_size], dtype=tf.float32)
accs = (log_weights_acc, log_p_hat_acc, kl_acc)
def while_predicate(t, *unused_args):
return t < max_seq_len
def while_step(t, rnn_state, tas, accs):
"""Implements one timestep of FIVO computation."""
log_weights_acc, log_p_hat_acc, kl_acc = accs
cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t)
# Run the cell for one step.
log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell(
cur_inputs,
rnn_state,
cur_mask,
)
# Compute the incremental weight and use it to update the current
# accumulated weight.
kl_acc += kl * cur_mask
log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask
log_alpha = tf.reshape(log_alpha, [num_samples, batch_size])
log_weights_acc += log_alpha
# Calculate the effective sample size.
ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0)
ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0)
log_ess = ess_num - ess_denom
# Calculate the ancestor indices via resampling. Because we maintain the
# log unnormalized weights, we pass the weights in as logits, allowing
# the distribution object to apply a softmax and normalize them.
resampling_dist = tf.contrib.distributions.Categorical(
logits=tf.transpose(log_weights_acc, perm=[1, 0]))
ancestor_inds = tf.stop_gradient(
resampling_dist.sample(sample_shape=num_samples, seed=random_seed))
# Because the batch is flattened and laid out as discussed
# above, we must modify ancestor_inds to index the proper samples.
# The particles in the ith filter are distributed every batch_size rows
# in the batch, and offset i rows from the top. So, to correct the indices
# we multiply by the batch_size and add the proper offset. Crucially,
# when ancestor_inds is flattened the layout of the batch is maintained.
offset = tf.expand_dims(tf.range(batch_size), 0)
ancestor_inds = tf.reshape(ancestor_inds * batch_size + offset, [-1])
noresample_inds = tf.range(num_samples * batch_size)
# Decide whether or not we should resample; don't resample if we are past
# the end of a sequence.
should_resample = resampling_criterion(num_samples, log_ess, t)
should_resample = tf.logical_and(should_resample,
cur_mask[:batch_size] > 0.)
float_should_resample = tf.to_float(should_resample)
ancestor_inds = tf.where(
tf.tile(should_resample, [num_samples]),
ancestor_inds,
noresample_inds)
new_state = nested.gather_tensors(new_state, ancestor_inds)
# Update the TensorArrays before we reset the weights so that we capture
# the incremental weights and not zeros.
ta_updates = [log_weights_acc, log_ess, float_should_resample]
new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)]
# For the particle filters that resampled, update log_p_hat and
# reset weights to zero.
log_p_hat_update = tf.reduce_logsumexp(
log_weights_acc, axis=0) - tf.log(tf.to_float(num_samples))
log_p_hat_acc += log_p_hat_update * float_should_resample
log_weights_acc *= (1. - tf.tile(float_should_resample[tf.newaxis, :],
[num_samples, 1]))
new_accs = (log_weights_acc, log_p_hat_acc, kl_acc)
return t + 1, new_state, new_tas, new_accs
_, _, tas, accs = tf.while_loop(
while_predicate,
while_step,
loop_vars=(t0, init_states, tas, accs),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
log_weights, log_ess, resampled = [x.stack() for x in tas]
final_log_weights, log_p_hat, kl = accs
# Add in the final weight update to log_p_hat.
log_p_hat += (tf.reduce_logsumexp(final_log_weights, axis=0) -
tf.log(tf.to_float(num_samples)))
kl = tf.reduce_mean(tf.reshape(kl, [num_samples, batch_size]), axis=0)
log_weights = tf.transpose(log_weights, perm=[0, 2, 1])
return log_p_hat, kl, log_weights, log_ess, resampled
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to calculate the mean of a pianoroll dataset.
Given a pianoroll pickle file, this script loads the dataset and
calculates the mean of the training set. Then it updates the pickle file
so that the key "train_mean" points to the mean vector.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pickle
import numpy as np
import tensorflow as tf
from datasets import sparse_pianoroll_to_dense
tf.app.flags.DEFINE_string('in_file', None,
'Filename of the pickled pianoroll dataset to load.')
tf.app.flags.DEFINE_string('out_file', None,
'Name of the output pickle file. Defaults to in_file, '
'updating the input pickle file.')
tf.app.flags.mark_flag_as_required('in_file')
FLAGS = tf.app.flags.FLAGS
MIN_NOTE = 21
MAX_NOTE = 108
NUM_NOTES = MAX_NOTE - MIN_NOTE + 1
def main(unused_argv):
if FLAGS.out_file is None:
FLAGS.out_file = FLAGS.in_file
with tf.gfile.Open(FLAGS.in_file, 'r') as f:
pianorolls = pickle.load(f)
dense_pianorolls = [sparse_pianoroll_to_dense(p, MIN_NOTE, NUM_NOTES)[0]
for p in pianorolls['train']]
# Concatenate all elements along the time axis.
concatenated = np.concatenate(dense_pianorolls, axis=0)
mean = np.mean(concatenated, axis=0)
pianorolls['train_mean'] = mean
# Write out the whole pickle file, including the train mean.
pickle.dump(pianorolls, open(FLAGS.out_file, 'wb'))
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocesses TIMIT from raw wavfiles to create a set of TFRecords.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
import os
import random
import re
import numpy as np
import tensorflow as tf
tf.app.flags.DEFINE_string("raw_timit_dir", None,
"Directory containing TIMIT files.")
tf.app.flags.DEFINE_string("out_dir", None,
"Output directory for TFRecord files.")
tf.app.flags.DEFINE_float("valid_frac", 0.05,
"Fraction of train set to use as valid set. "
"Must be between 0.0 and 1.0.")
tf.app.flags.mark_flag_as_required("raw_timit_dir")
tf.app.flags.mark_flag_as_required("out_dir")
FLAGS = tf.app.flags.FLAGS
NUM_TRAIN_FILES = 4620
NUM_TEST_FILES = 1680
SAMPLES_PER_TIMESTEP = 200
# Regexes for reading SPHERE header files.
SAMPLE_COUNT_REGEX = re.compile(r"sample_count -i (\d+)")
SAMPLE_MIN_REGEX = re.compile(r"sample_min -i (-?\d+)")
SAMPLE_MAX_REGEX = re.compile(r"sample_max -i (-?\d+)")
def get_filenames(split):
"""Get all wav filenames from the TIMIT archive."""
path = os.path.join(FLAGS.raw_timit_dir, "TIMIT", split, "*", "*", "*.WAV")
# Sort the output by name so the order is deterministic.
files = sorted(glob.glob(path))
return files
def load_timit_wav(filename):
"""Loads a TIMIT wavfile into a numpy array.
TIMIT wavfiles include a SPHERE header, detailed in the TIMIT docs. The first
line is the header type and the second is the length of the header in bytes.
After the header, the remaining bytes are actual WAV data.
The header includes information about the WAV data such as the number of
samples and minimum and maximum amplitude. This function asserts that the
loaded wav data matches the header.
Args:
filename: The name of the TIMIT wavfile to load.
Returns:
wav: A numpy array containing the loaded wav data.
"""
wav_file = open(filename, "rb")
header_type = wav_file.readline()
header_length_str = wav_file.readline()
# The header length includes the length of the first two lines.
header_remaining_bytes = (int(header_length_str) - len(header_type) -
len(header_length_str))
header = wav_file.read(header_remaining_bytes)
# Read the relevant header fields.
sample_count = int(SAMPLE_COUNT_REGEX.search(header).group(1))
sample_min = int(SAMPLE_MIN_REGEX.search(header).group(1))
sample_max = int(SAMPLE_MAX_REGEX.search(header).group(1))
wav = np.fromstring(wav_file.read(), dtype="int16").astype("float32")
# Check that the loaded data conforms to the header description.
assert len(wav) == sample_count
assert wav.min() == sample_min
assert wav.max() == sample_max
return wav
def preprocess(wavs, block_size, mean, std):
"""Normalize the wav data and reshape it into chunks."""
processed_wavs = []
for wav in wavs:
wav = (wav - mean) / std
wav_length = wav.shape[0]
if wav_length % block_size != 0:
pad_width = block_size - (wav_length % block_size)
wav = np.pad(wav, (0, pad_width), "constant")
assert wav.shape[0] % block_size == 0
wav = wav.reshape((-1, block_size))
processed_wavs.append(wav)
return processed_wavs
def create_tfrecord_from_wavs(wavs, output_file):
"""Writes processed wav files to disk as sharded TFRecord files."""
with tf.python_io.TFRecordWriter(output_file) as builder:
for wav in wavs:
builder.write(wav.astype(np.float32).tobytes())
def main(unused_argv):
train_filenames = get_filenames("TRAIN")
test_filenames = get_filenames("TEST")
num_train_files = len(train_filenames)
num_test_files = len(test_filenames)
num_valid_files = int(num_train_files * FLAGS.valid_frac)
num_train_files -= num_valid_files
print("%d train / %d valid / %d test" % (
num_train_files, num_valid_files, num_test_files))
random.seed(1234)
random.shuffle(train_filenames)
valid_filenames = train_filenames[:num_valid_files]
train_filenames = train_filenames[num_valid_files:]
# Make sure there is no overlap in the train, test, and valid sets.
train_s = set(train_filenames)
test_s = set(test_filenames)
valid_s = set(valid_filenames)
# Disable explicit length testing to make the assertions more readable.
# pylint: disable=g-explicit-length-test
assert len(train_s & test_s) == 0
assert len(train_s & valid_s) == 0
assert len(valid_s & test_s) == 0
# pylint: enable=g-explicit-length-test
train_wavs = [load_timit_wav(f) for f in train_filenames]
valid_wavs = [load_timit_wav(f) for f in valid_filenames]
test_wavs = [load_timit_wav(f) for f in test_filenames]
assert len(train_wavs) + len(valid_wavs) == NUM_TRAIN_FILES
assert len(test_wavs) == NUM_TEST_FILES
# Calculate the mean and standard deviation of the train set.
train_stacked = np.hstack(train_wavs)
train_mean = np.mean(train_stacked)
train_std = np.std(train_stacked)
print("train mean: %f train std: %f" % (train_mean, train_std))
# Process all data, normalizing with the train set statistics.
processed_train_wavs = preprocess(train_wavs, SAMPLES_PER_TIMESTEP,
train_mean, train_std)
processed_valid_wavs = preprocess(valid_wavs, SAMPLES_PER_TIMESTEP,
train_mean, train_std)
processed_test_wavs = preprocess(test_wavs, SAMPLES_PER_TIMESTEP, train_mean,
train_std)
# Write the datasets to disk.
create_tfrecord_from_wavs(
processed_train_wavs,
os.path.join(FLAGS.out_dir, "train"))
create_tfrecord_from_wavs(
processed_valid_wavs,
os.path.join(FLAGS.out_dir, "valid"))
create_tfrecord_from_wavs(
processed_test_wavs,
os.path.join(FLAGS.out_dir, "test"))
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Code for creating sequence datasets.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pickle
from scipy.sparse import coo_matrix
import tensorflow as tf
# The default number of threads used to process data in parallel.
DEFAULT_PARALLELISM = 12
def sparse_pianoroll_to_dense(pianoroll, min_note, num_notes):
"""Converts a sparse pianoroll to a dense numpy array.
Given a sparse pianoroll, converts it to a dense numpy array of shape
[num_timesteps, num_notes] where entry i,j is 1.0 if note j is active on
timestep i and 0.0 otherwise.
Args:
pianoroll: A sparse pianoroll object, a list of tuples where the i'th tuple
contains the indices of the notes active at timestep i.
min_note: The minimum note in the pianoroll, subtracted from all notes so
that the minimum note becomes 0.
num_notes: The number of possible different note indices, determines the
second dimension of the resulting dense array.
Returns:
dense_pianoroll: A [num_timesteps, num_notes] numpy array of floats.
num_timesteps: A python int, the number of timesteps in the pianoroll.
"""
num_timesteps = len(pianoroll)
inds = []
for time, chord in enumerate(pianoroll):
# Re-index the notes to start from min_note.
inds.extend((time, note-min_note) for note in chord)
shape = [num_timesteps, num_notes]
values = [1.] * len(inds)
sparse_pianoroll = coo_matrix(
(values, ([x[0] for x in inds], [x[1] for x in inds])),
shape=shape)
return sparse_pianoroll.toarray(), num_timesteps
def create_pianoroll_dataset(path,
split,
batch_size,
num_parallel_calls=DEFAULT_PARALLELISM,
shuffle=False,
repeat=False,
min_note=21,
max_note=108):
"""Creates a pianoroll dataset.
Args:
path: The path of a pickle file containing the dataset to load.
split: The split to use, can be train, test, or valid.
batch_size: The batch size. If repeat is False then it is not guaranteed
that the true batch size will match for all batches since batch_size
may not necessarily evenly divide the number of elements.
num_parallel_calls: The number of threads to use for parallel processing of
the data.
shuffle: If true, shuffles the order of the dataset.
repeat: If true, repeats the dataset endlessly.
min_note: The minimum note number of the dataset. For all pianoroll datasets
the minimum note is number 21, and changing this affects the dimension of
the data. This is useful mostly for testing.
max_note: The maximum note number of the dataset. For all pianoroll datasets
the maximum note is number 108, and changing this affects the dimension of
the data. This is useful mostly for testing.
Returns:
inputs: A batch of input sequences represented as a dense Tensor of shape
[time, batch_size, data_dimension]. The sequences in inputs are the
sequences in targets shifted one timestep into the future, padded with
zeros. This tensor is mean-centered, with the mean taken from the pickle
file key 'train_mean'.
targets: A batch of target sequences represented as a dense Tensor of
shape [time, batch_size, data_dimension].
lens: An int Tensor of shape [batch_size] representing the lengths of each
sequence in the batch.
mean: A float Tensor of shape [data_dimension] containing the mean loaded
from the pickle file.
"""
# Load the data from disk.
num_notes = max_note - min_note + 1
with tf.gfile.Open(path, "r") as f:
raw_data = pickle.load(f)
pianorolls = raw_data[split]
mean = raw_data["train_mean"]
num_examples = len(pianorolls)
def pianoroll_generator():
for sparse_pianoroll in pianorolls:
yield sparse_pianoroll_to_dense(sparse_pianoroll, min_note, num_notes)
dataset = tf.data.Dataset.from_generator(
pianoroll_generator,
output_types=(tf.float64, tf.int64),
output_shapes=([None, num_notes], []))
if repeat: dataset = dataset.repeat()
if shuffle: dataset = dataset.shuffle(num_examples)
# Batch sequences togther, padding them to a common length in time.
dataset = dataset.padded_batch(batch_size,
padded_shapes=([None, num_notes], []))
def process_pianoroll_batch(data, lengths):
"""Create mean-centered and time-major next-step prediction Tensors."""
data = tf.to_float(tf.transpose(data, perm=[1, 0, 2]))
lengths = tf.to_int32(lengths)
targets = data
# Mean center the inputs.
inputs = data - tf.constant(mean, dtype=tf.float32,
shape=[1, 1, mean.shape[0]])
# Shift the inputs one step forward in time. Also remove the last timestep
# so that targets and inputs are the same length.
inputs = tf.pad(inputs, [[1, 0], [0, 0], [0, 0]], mode="CONSTANT")[:-1]
# Mask out unused timesteps.
inputs *= tf.expand_dims(tf.transpose(
tf.sequence_mask(lengths, dtype=inputs.dtype)), 2)
return inputs, targets, lengths
dataset = dataset.map(process_pianoroll_batch,
num_parallel_calls=num_parallel_calls)
dataset = dataset.prefetch(num_examples)
itr = dataset.make_one_shot_iterator()
inputs, targets, lengths = itr.get_next()
return inputs, targets, lengths, tf.constant(mean, dtype=tf.float32)
def create_speech_dataset(path,
batch_size,
samples_per_timestep=200,
num_parallel_calls=DEFAULT_PARALLELISM,
prefetch_buffer_size=2048,
shuffle=False,
repeat=False):
"""Creates a speech dataset.
Args:
path: The path of a possibly sharded TFRecord file containing the data.
batch_size: The batch size. If repeat is False then it is not guaranteed
that the true batch size will match for all batches since batch_size
may not necessarily evenly divide the number of elements.
samples_per_timestep: The number of audio samples per timestep. Used to
reshape the data into sequences of shape [time, samples_per_timestep].
Should not change except for testing -- in all speech datasets 200 is the
number of samples per timestep.
num_parallel_calls: The number of threads to use for parallel processing of
the data.
prefetch_buffer_size: The size of the prefetch queues to use after reading
and processing the raw data.
shuffle: If true, shuffles the order of the dataset.
repeat: If true, repeats the dataset endlessly.
Returns:
inputs: A batch of input sequences represented as a dense Tensor of shape
[time, batch_size, samples_per_timestep]. The sequences in inputs are the
sequences in targets shifted one timestep into the future, padded with
zeros.
targets: A batch of target sequences represented as a dense Tensor of
shape [time, batch_size, samples_per_timestep].
lens: An int Tensor of shape [batch_size] representing the lengths of each
sequence in the batch.
"""
filenames = [path]
def read_speech_example(value):
"""Parses a single tf.Example from the TFRecord file."""
decoded = tf.decode_raw(value, out_type=tf.float32)
example = tf.reshape(decoded, [-1, samples_per_timestep])
length = tf.shape(example)[0]
return example, length
# Create the dataset from the TFRecord files
dataset = tf.data.TFRecordDataset(filenames).map(
read_speech_example, num_parallel_calls=num_parallel_calls)
dataset = dataset.prefetch(prefetch_buffer_size)
if repeat: dataset = dataset.repeat()
if shuffle: dataset = dataset.shuffle(prefetch_buffer_size)
dataset = dataset.padded_batch(
batch_size, padded_shapes=([None, samples_per_timestep], []))
def process_speech_batch(data, lengths):
"""Creates Tensors for next step prediction."""
data = tf.transpose(data, perm=[1, 0, 2])
lengths = tf.to_int32(lengths)
targets = data
# Shift the inputs one step forward in time. Also remove the last timestep
# so that targets and inputs are the same length.
inputs = tf.pad(data, [[1, 0], [0, 0], [0, 0]], mode="CONSTANT")[:-1]
# Mask out unused timesteps.
inputs *= tf.expand_dims(
tf.transpose(tf.sequence_mask(lengths, dtype=inputs.dtype)), 2)
return inputs, targets, lengths
dataset = dataset.map(process_speech_batch,
num_parallel_calls=num_parallel_calls)
dataset = dataset.prefetch(prefetch_buffer_size)
itr = dataset.make_one_shot_iterator()
inputs, targets, lengths = itr.get_next()
return inputs, targets, lengths
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment