/PAD/PAD/_')),
- FLAGS.max_pathlen)
- for path in paths)
-
- # Split the lemma, POS, dependency label, and direction each into a
- # separate feature.
- lemmas, postags, deplabels, dirs = zip(
- *(part.split('/') for part in itertools.chain(*paths)))
-
- lemmas = [self.vocab.get(lemma, 1) for lemma in lemmas]
- postags = [lexnet_common.POSTAG_TO_ID[pos] for pos in postags]
- deplabels = [lexnet_common.DEPLABEL_TO_ID.get(dep, 1) for dep in deplabels]
- dirs = [lexnet_common.DIR_TO_ID.get(d, 0) for d in dirs]
-
- return tf.train.Example(features=tf.train.Features(feature={
- 'pair': _bytes_features(['::'.join((mod, head))]),
- 'rel': _bytes_features([rel]),
- 'rel_id': _int64_features([self.relations[rel]]),
- 'reprs': _bytes_features(raw_paths),
- 'pathlens': _int64_features(pathlens),
- 'counts': _int64_features(counts),
- 'lemmas': _int64_features(lemmas),
- 'dirs': _int64_features(dirs),
- 'deplabels': _int64_features(deplabels),
- 'postags': _int64_features(postags),
- 'x_embedding_id': _int64_features([self.vocab[mod]]),
- 'y_embedding_id': _int64_features([self.vocab[head]]),
- }))
-
-
-def main(_):
- # Read the splits file, if there is one.
- assignments = {}
- if FLAGS.splits:
- with tf.gfile.GFile(FLAGS.splits) as fh:
- parts = (line.split('\t') for line in fh.read().splitlines())
- assignments = {(mod, head): split for mod, head, split in parts}
-
- splits = set(assignments.itervalues())
- if FLAGS.default_split:
- default_split = FLAGS.default_split
- splits.add(FLAGS.default_split)
- elif splits:
- default_split = iter(splits).next()
- else:
- print('Please specify --splits, --default_split, or both', file=sys.stderr)
- return 1
-
- last_mod, last_head, last_label = None, None, None
- raw_paths = collections.Counter()
-
- # Keep track of pairs we've seen to ensure that we don't get unsorted data.
- seen_labeled_pairs = set()
-
- # Set up output compression
- compression_type = getattr(
- tf.python_io.TFRecordCompressionType, FLAGS.compression)
- options = tf.python_io.TFRecordOptions(compression_type=compression_type)
-
- writers = {
- split: tf.python_io.TFRecordWriter(
- os.path.join(FLAGS.output_dir, '%s.tfrecs.gz' % split),
- options=options)
- for split in splits}
-
- create_example = CreateExampleFn()
-
- in_fh = sys.stdin if not FLAGS.input else tf.gfile.GFile(FLAGS.input)
- for lineno, line in enumerate(in_fh, start=1):
- if lineno % 100 == 0:
- print('\rProcessed %d lines...' % lineno, end='', file=sys.stderr)
-
- parts = line.decode('utf-8').strip().split('\t')
- if len(parts) != 5:
- print('Skipping line %d: %d columns (expected 5)' % (
- lineno, len(parts)), file=sys.stderr)
-
- continue
-
- mod, head, label, raw_path, source = parts
- if mod == last_mod and head == last_head and label == last_label:
- raw_paths.update([raw_path])
- continue
-
- if last_mod and last_head and last_label and raw_paths:
- if (last_mod, last_head, last_label) in seen_labeled_pairs:
- print('It looks like the input data is not sorted; ignoring extra '
- 'record for (%s::%s, %s) at line %d' % (
- last_mod, last_head, last_label, lineno))
- else:
- ex = create_example(last_mod, last_head, last_label, raw_paths)
- if ex:
- split = assignments.get((last_mod, last_head), default_split)
- writers[split].write(ex.SerializeToString())
-
- seen_labeled_pairs.add((last_mod, last_head, last_label))
-
- last_mod, last_head, last_label = mod, head, label
- raw_paths = collections.Counter()
-
- if last_mod and last_head and last_label and raw_paths:
- ex = create_example(last_mod, last_head, last_label, raw_paths)
- if ex:
- split = assignments.get((last_mod, last_head), default_split)
- writers[split].write(ex.SerializeToString())
-
- for writer in writers.itervalues():
- writer.close()
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lexnet_nc/text_embeddings_to_binary.py b/research/lexnet_nc/text_embeddings_to_binary.py
deleted file mode 100755
index 8226a7654e6da733ba1e8c46810a8ec8afd7a2c0..0000000000000000000000000000000000000000
--- a/research/lexnet_nc/text_embeddings_to_binary.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2017, 2018 Google, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Converts a text embedding file into a binary format for quicker loading."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-tf.flags.DEFINE_string('input', '', 'text file containing embeddings')
-tf.flags.DEFINE_string('output_vocab', '', 'output file for vocabulary')
-tf.flags.DEFINE_string('output_npy', '', 'output file for binary')
-FLAGS = tf.flags.FLAGS
-
-def main(_):
- vecs = []
- vocab = []
- with tf.gfile.GFile(FLAGS.input) as fh:
- for line in fh:
- parts = line.strip().split()
- vocab.append(parts[0])
- vecs.append([float(x) for x in parts[1:]])
-
- with tf.gfile.GFile(FLAGS.output_vocab, 'w') as fh:
- fh.write('\n'.join(vocab))
- fh.write('\n')
-
- vecs = np.array(vecs, dtype=np.float32)
- np.save(FLAGS.output_npy, vecs, allow_pickle=False)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lfads/README.md b/research/lfads/README.md
deleted file mode 100644
index c75b656e4746894c42251e29a530271bb6484e4f..0000000000000000000000000000000000000000
--- a/research/lfads/README.md
+++ /dev/null
@@ -1,224 +0,0 @@
-
-
-
-# LFADS - Latent Factor Analysis via Dynamical Systems
-
-This code implements the model from the paper "[LFADS - Latent Factor Analysis via Dynamical Systems](http://biorxiv.org/content/early/2017/06/20/152884)". It is a sequential variational auto-encoder designed specifically for investigating neuroscience data, but can be applied widely to any time series data. In an unsupervised setting, LFADS is able to decompose time series data into various factors, such as an initial condition, a generative dynamical system, control inputs to that generator, and a low dimensional description of the observed data, called the factors. Additionally, the observation model is a loss on a probability distribution, so when LFADS processes a dataset, a denoised version of the dataset is also created. For example, if the dataset is raw spike counts, then under the negative log-likelihood loss under a Poisson distribution, the denoised data would be the inferred Poisson rates.
-
-
-## Prerequisites
-
-The code is written in Python 2.7.6. You will also need:
-
-* **TensorFlow** version 1.5 ([install](https://www.tensorflow.org/install/)) -
-* **NumPy, SciPy, Matplotlib** ([install SciPy stack](https://www.scipy.org/install.html), contains all of them)
-* **h5py** ([install](https://pypi.python.org/pypi/h5py))
-
-
-## Getting started
-
-Before starting, run the following:
-
-
-$ export PYTHONPATH=$PYTHONPATH:/path/to/your/directory/lfads/
-
-
-where "path/to/your/directory" is replaced with the path to the LFADS repository (you can get this path by using the `pwd` command). This allows the nested directories to access modules from their parent directory.
-
-## Generate synthetic data
-
-In order to generate the synthetic datasets first, from the top-level lfads directory, run:
-
-```sh
-$ cd synth_data
-$ ./run_generate_synth_data.sh
-$ cd ..
-```
-
-These synthetic datasets are provided 1. to gain insight into how the LFADS algorithm operates, and 2. to give reasonable starting points for analyses you might be interested for your own data.
-
-## Train an LFADS model
-
-Now that we have our example datasets, we can train some models! To spin up an LFADS model on the synthetic data, run any of the following commands. For the examples that are in the paper, the important hyperparameters are roughly replicated. Most hyperparameters are insensitive to small changes or won't ever be changed unless you want a very fine level of control. In the first example, all hyperparameter flags are enumerated for easy copy-pasting, but for the rest of the examples only the most important flags (~the first 9) are specified for brevity. For a full list of flags, their descriptions, and their default values, refer to the top of `run_lfads.py`. Please see Table 1 in the Online Methods of the associated paper for definitions of the most important hyperparameters.
-
-```sh
-# Run LFADS on chaotic rnn data with no input pulses (g = 1.5) with spiking noise
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_no_inputs \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_no_inputs \
---co_dim=0 \
---factors_dim=20 \
---ext_input_dim=0 \
---controller_input_lag=1 \
---output_dist=poisson \
---do_causal_controller=false \
---batch_size=128 \
---learning_rate_init=0.01 \
---learning_rate_stop=1e-05 \
---learning_rate_decay_factor=0.95 \
---learning_rate_n_to_compare=6 \
---do_reset_learning_rate=false \
---keep_prob=0.95 \
---con_dim=128 \
---gen_dim=200 \
---ci_enc_dim=128 \
---ic_dim=64 \
---ic_enc_dim=128 \
---ic_prior_var_min=0.1 \
---gen_cell_input_weight_scale=1.0 \
---cell_weight_scale=1.0 \
---do_feed_factors_to_controller=true \
---kl_start_step=0 \
---kl_increase_steps=2000 \
---kl_ic_weight=1.0 \
---l2_con_scale=0.0 \
---l2_gen_scale=2000.0 \
---l2_start_step=0 \
---l2_increase_steps=2000 \
---ic_prior_var_scale=0.1 \
---ic_post_var_min=0.0001 \
---kl_co_weight=1.0 \
---prior_ar_nvar=0.1 \
---cell_clip_value=5.0 \
---max_ckpt_to_keep_lve=5 \
---do_train_prior_ar_atau=true \
---co_prior_var_scale=0.1 \
---csv_log=fitlog \
---feedback_factors_or_rates=factors \
---do_train_prior_ar_nvar=true \
---max_grad_norm=200.0 \
---device=gpu:0 \
---num_steps_for_gen_ic=100000000 \
---ps_nexamples_to_process=100000000 \
---checkpoint_name=lfads_vae \
---temporal_spike_jitter_width=0 \
---checkpoint_pb_load_name=checkpoint \
---inject_ext_input_to_gen=false \
---co_mean_corr_scale=0.0 \
---gen_cell_rec_weight_scale=1.0 \
---max_ckpt_to_keep=5 \
---output_filename_stem="" \
---ic_prior_var_max=0.1 \
---prior_ar_atau=10.0 \
---do_train_io_only=false \
---do_train_encoder_only=false
-
-# Run LFADS on chaotic rnn data with no input pulses (g = 1.5) with Gaussian noise
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=gaussian_chaotic_rnn_no_inputs \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
---co_dim=1 \
---factors_dim=20 \
---output_dist=gaussian
-
-
-# Run LFADS on chaotic rnn data with input pulses (g = 2.5)
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_inputs_g2p5 \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
---co_dim=1 \
---factors_dim=20 \
---output_dist=poisson
-
-# Run LFADS on multi-session RNN data
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_multisession \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_multisession \
---factors_dim=10 \
---output_dist=poisson
-
-# Run LFADS on integration to bound model data
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=itb_rnn \
---lfads_save_dir=/tmp/lfads_itb_rnn \
---co_dim=1 \
---factors_dim=20 \
---controller_input_lag=0 \
---output_dist=poisson
-
-# Run LFADS on chaotic RNN data with labels
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnns_labeled \
---lfads_save_dir=/tmp/lfads_chaotic_rnns_labeled \
---co_dim=0 \
---factors_dim=20 \
---controller_input_lag=0 \
---ext_input_dim=1 \
---output_dist=poisson
-
-# Run LFADS on chaotic rnn data with no input pulses (g = 1.5) with Gaussian noise
-$ python run_lfads.py --kind=train \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_no_inputs \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_no_inputs \
---co_dim=0 \
---factors_dim=20 \
---ext_input_dim=0 \
---controller_input_lag=1 \
---output_dist=gaussian \
-
-
-```
-
-**Tip**: If you are running LFADS on GPU and would like to run more than one model concurrently, set the `--allow_gpu_growth=True` flag on each job, otherwise one model will take up the entire GPU for performance purposes. Also, one needs to install the TensorFlow libraries with GPU support.
-
-
-## Visualize a training model
-
-To visualize training curves and various other metrics while training and LFADS model, run the following command on your model directory. To launch a tensorboard on the chaotic RNN data with input pulses, for example:
-
-```sh
-tensorboard --logdir=/tmp/lfads_chaotic_rnn_inputs_g2p5
-```
-
-## Evaluate a trained model
-
-Once your model is finished training, there are multiple ways you can evaluate
-it. Below are some sample commands to evaluate an LFADS model trained on the
-chaotic rnn data with input pulses (g = 2.5). The key differences here are
-setting the `--kind` flag to the appropriate mode, as well as the
-`--checkpoint_pb_load_name` flag to `checkpoint_lve` and the `--batch_size` flag
-(if you'd like to make it larger or smaller). All other flags should be the
-same as used in training, so that the same model architecture is built.
-
-```sh
-# Take samples from posterior then average (denoising operation)
-$ python run_lfads.py --kind=posterior_sample_and_average \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_inputs_g2p5 \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
---co_dim=1 \
---factors_dim=20 \
---batch_size=1024 \
---checkpoint_pb_load_name=checkpoint_lve
-
-# Sample from prior (generation of completely new samples)
-$ python run_lfads.py --kind=prior_sample \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_inputs_g2p5 \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
---co_dim=1 \
---factors_dim=20 \
---batch_size=50 \
---checkpoint_pb_load_name=checkpoint_lve
-
-# Write down model parameters
-$ python run_lfads.py --kind=write_model_params \
---data_dir=/tmp/rnn_synth_data_v1.0/ \
---data_filename_stem=chaotic_rnn_inputs_g2p5 \
---lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
---co_dim=1 \
---factors_dim=20 \
---checkpoint_pb_load_name=checkpoint_lve
-```
-
-## Contact
-
-File any issues with the [issue tracker](https://github.com/tensorflow/models/issues). For any questions or problems, this code is maintained by [@sussillo](https://github.com/sussillo) and [@jazcollins](https://github.com/jazcollins).
-
diff --git a/research/lfads/distributions.py b/research/lfads/distributions.py
deleted file mode 100644
index 351d019af2b16117eb329b6ef1812aa006834b62..0000000000000000000000000000000000000000
--- a/research/lfads/distributions.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-import numpy as np
-import tensorflow as tf
-from utils import linear, log_sum_exp
-
-class Poisson(object):
- """Poisson distributon
-
- Computes the log probability under the model.
-
- """
- def __init__(self, log_rates):
- """ Create Poisson distributions with log_rates parameters.
-
- Args:
- log_rates: a tensor-like list of log rates underlying the Poisson dist.
- """
- self.logr = log_rates
-
- def logp(self, bin_counts):
- """Compute the log probability for the counts in the bin, under the model.
-
- Args:
- bin_counts: array-like integer counts
-
- Returns:
- The log-probability under the Poisson models for each element of
- bin_counts.
- """
- k = tf.to_float(bin_counts)
- # log poisson(k, r) = log(r^k * e^(-r) / k!) = k log(r) - r - log k!
- # log poisson(k, r=exp(x)) = k * x - exp(x) - lgamma(k + 1)
- return k * self.logr - tf.exp(self.logr) - tf.lgamma(k + 1)
-
-
-def diag_gaussian_log_likelihood(z, mu=0.0, logvar=0.0):
- """Log-likelihood under a Gaussian distribution with diagonal covariance.
- Returns the log-likelihood for each dimension. One should sum the
- results for the log-likelihood under the full multidimensional model.
-
- Args:
- z: The value to compute the log-likelihood.
- mu: The mean of the Gaussian
- logvar: The log variance of the Gaussian.
-
- Returns:
- The log-likelihood under the Gaussian model.
- """
-
- return -0.5 * (logvar + np.log(2*np.pi) + \
- tf.square((z-mu)/tf.exp(0.5*logvar)))
-
-
-def gaussian_pos_log_likelihood(unused_mean, logvar, noise):
- """Gaussian log-likelihood function for a posterior in VAE
-
- Note: This function is specialized for a posterior distribution, that has the
- form of z = mean + sigma * noise.
-
- Args:
- unused_mean: ignore
- logvar: The log variance of the distribution
- noise: The noise used in the sampling of the posterior.
-
- Returns:
- The log-likelihood under the Gaussian model.
- """
- # ln N(z; mean, sigma) = - ln(sigma) - 0.5 ln 2pi - noise^2 / 2
- return - 0.5 * (logvar + np.log(2 * np.pi) + tf.square(noise))
-
-
-class Gaussian(object):
- """Base class for Gaussian distribution classes."""
- pass
-
-
-class DiagonalGaussian(Gaussian):
- """Diagonal Gaussian with different constant mean and variances in each
- dimension.
- """
-
- def __init__(self, batch_size, z_size, mean, logvar):
- """Create a diagonal gaussian distribution.
-
- Args:
- batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
- z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
- mean: The N-D mean of the distribution.
- logvar: The N-D log variance of the diagonal distribution.
- """
- size__xz = [None, z_size]
- self.mean = mean # bxn already
- self.logvar = logvar # bxn already
- self.noise = noise = tf.random_normal(tf.shape(logvar))
- self.sample = mean + tf.exp(0.5 * logvar) * noise
- mean.set_shape(size__xz)
- logvar.set_shape(size__xz)
- self.sample.set_shape(size__xz)
-
- def logp(self, z=None):
- """Compute the log-likelihood under the distribution.
-
- Args:
- z (optional): value to compute likelihood for, if None, use sample.
-
- Returns:
- The likelihood of z under the model.
- """
- if z is None:
- z = self.sample
-
- # This is needed to make sure that the gradients are simple.
- # The value of the function shouldn't change.
- if z == self.sample:
- return gaussian_pos_log_likelihood(self.mean, self.logvar, self.noise)
-
- return diag_gaussian_log_likelihood(z, self.mean, self.logvar)
-
-
-class LearnableDiagonalGaussian(Gaussian):
- """Diagonal Gaussian whose mean and variance are learned parameters."""
-
- def __init__(self, batch_size, z_size, name, mean_init=0.0,
- var_init=1.0, var_min=0.0, var_max=1000000.0):
- """Create a learnable diagonal gaussian distribution.
-
- Args:
- batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
- z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
- name: prefix name for the mean and log TF variables.
- mean_init (optional): The N-D mean initialization of the distribution.
- var_init (optional): The N-D variance initialization of the diagonal
- distribution.
- var_min (optional): The minimum value the learned variance can take in any
- dimension.
- var_max (optional): The maximum value the learned variance can take in any
- dimension.
- """
-
- size_1xn = [1, z_size]
- size__xn = [None, z_size]
- size_bx1 = tf.stack([batch_size, 1])
- assert var_init > 0.0, "Problems"
- assert var_max >= var_min, "Problems"
- assert var_init >= var_min, "Problems"
- assert var_max >= var_init, "Problems"
-
-
- z_mean_1xn = tf.get_variable(name=name+"/mean", shape=size_1xn,
- initializer=tf.constant_initializer(mean_init))
- self.mean_bxn = mean_bxn = tf.tile(z_mean_1xn, size_bx1)
- mean_bxn.set_shape(size__xn) # tile loses shape
-
- log_var_init = np.log(var_init)
- if var_max > var_min:
- var_is_trainable = True
- else:
- var_is_trainable = False
-
- z_logvar_1xn = \
- tf.get_variable(name=(name+"/logvar"), shape=size_1xn,
- initializer=tf.constant_initializer(log_var_init),
- trainable=var_is_trainable)
-
- if var_is_trainable:
- z_logit_var_1xn = tf.exp(z_logvar_1xn)
- z_var_1xn = tf.nn.sigmoid(z_logit_var_1xn)*(var_max-var_min) + var_min
- z_logvar_1xn = tf.log(z_var_1xn)
-
- logvar_bxn = tf.tile(z_logvar_1xn, size_bx1)
- self.logvar_bxn = logvar_bxn
- self.noise_bxn = noise_bxn = tf.random_normal(tf.shape(logvar_bxn))
- self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn
-
- def logp(self, z=None):
- """Compute the log-likelihood under the distribution.
-
- Args:
- z (optional): value to compute likelihood for, if None, use sample.
-
- Returns:
- The likelihood of z under the model.
- """
- if z is None:
- z = self.sample
-
- # This is needed to make sure that the gradients are simple.
- # The value of the function shouldn't change.
- if z == self.sample_bxn:
- return gaussian_pos_log_likelihood(self.mean_bxn, self.logvar_bxn,
- self.noise_bxn)
-
- return diag_gaussian_log_likelihood(z, self.mean_bxn, self.logvar_bxn)
-
- @property
- def mean(self):
- return self.mean_bxn
-
- @property
- def logvar(self):
- return self.logvar_bxn
-
- @property
- def sample(self):
- return self.sample_bxn
-
-
-class DiagonalGaussianFromInput(Gaussian):
- """Diagonal Gaussian whose mean and variance are conditioned on other
- variables.
-
- Note: the parameters to convert from input to the learned mean and log
- variance are held in this class.
- """
-
- def __init__(self, x_bxu, z_size, name, var_min=0.0):
- """Create an input dependent diagonal Gaussian distribution.
-
- Args:
- x: The input tensor from which the mean and variance are computed,
- via a linear transformation of x. I.e.
- mu = Wx + b, log(var) = Mx + c
- z_size: The size of the distribution.
- name: The name to prefix to learned variables.
- var_min (optional): Minimal variance allowed. This is an additional
- way to control the amount of information getting through the stochastic
- layer.
- """
- size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size])
- self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean"))
- logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar"))
- if var_min > 0.0:
- logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min)
- self.logvar_bxn = logvar_bxn
-
- self.noise_bxn = noise_bxn = tf.random_normal(size_bxn)
- self.noise_bxn.set_shape([None, z_size])
- self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn
-
- def logp(self, z=None):
- """Compute the log-likelihood under the distribution.
-
- Args:
- z (optional): value to compute likelihood for, if None, use sample.
-
- Returns:
- The likelihood of z under the model.
- """
-
- if z is None:
- z = self.sample
-
- # This is needed to make sure that the gradients are simple.
- # The value of the function shouldn't change.
- if z == self.sample_bxn:
- return gaussian_pos_log_likelihood(self.mean_bxn,
- self.logvar_bxn, self.noise_bxn)
-
- return diag_gaussian_log_likelihood(z, self.mean_bxn, self.logvar_bxn)
-
- @property
- def mean(self):
- return self.mean_bxn
-
- @property
- def logvar(self):
- return self.logvar_bxn
-
- @property
- def sample(self):
- return self.sample_bxn
-
-
-class GaussianProcess:
- """Base class for Gaussian processes."""
- pass
-
-
-class LearnableAutoRegressive1Prior(GaussianProcess):
- """AR(1) model where autocorrelation and process variance are learned
- parameters. Assumed zero mean.
-
- """
-
- def __init__(self, batch_size, z_size,
- autocorrelation_taus, noise_variances,
- do_train_prior_ar_atau, do_train_prior_ar_nvar,
- num_steps, name):
- """Create a learnable autoregressive (1) process.
-
- Args:
- batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
- z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
- autocorrelation_taus: The auto correlation time constant of the AR(1)
- process.
- A value of 0 is uncorrelated gaussian noise.
- noise_variances: The variance of the additive noise, *not* the process
- variance.
- do_train_prior_ar_atau: Train or leave as constant, the autocorrelation?
- do_train_prior_ar_nvar: Train or leave as constant, the noise variance?
- num_steps: Number of steps to run the process.
- name: The name to prefix to learned TF variables.
- """
-
- # Note the use of the plural in all of these quantities. This is intended
- # to mark that even though a sample z_t from the posterior is thought of a
- # single sample of a multidimensional gaussian, the prior is actually
- # thought of as U AR(1) processes, where U is the dimension of the inferred
- # input.
- size_bx1 = tf.stack([batch_size, 1])
- size__xu = [None, z_size]
- # process variance, the variance at time t over all instantiations of AR(1)
- # with these parameters.
- log_evar_inits_1xu = tf.expand_dims(tf.log(noise_variances), 0)
- self.logevars_1xu = logevars_1xu = \
- tf.Variable(log_evar_inits_1xu, name=name+"/logevars", dtype=tf.float32,
- trainable=do_train_prior_ar_nvar)
- self.logevars_bxu = logevars_bxu = tf.tile(logevars_1xu, size_bx1)
- logevars_bxu.set_shape(size__xu) # tile loses shape
-
- # \tau, which is the autocorrelation time constant of the AR(1) process
- log_atau_inits_1xu = tf.expand_dims(tf.log(autocorrelation_taus), 0)
- self.logataus_1xu = logataus_1xu = \
- tf.Variable(log_atau_inits_1xu, name=name+"/logatau", dtype=tf.float32,
- trainable=do_train_prior_ar_atau)
-
- # phi in x_t = \mu + phi x_tm1 + \eps
- # phi = exp(-1/tau)
- # phi = exp(-1/exp(logtau))
- # phi = exp(-exp(-logtau))
- phis_1xu = tf.exp(-tf.exp(-logataus_1xu))
- self.phis_bxu = phis_bxu = tf.tile(phis_1xu, size_bx1)
- phis_bxu.set_shape(size__xu)
-
- # process noise
- # pvar = evar / (1- phi^2)
- # logpvar = log ( exp(logevar) / (1 - phi^2) )
- # logpvar = logevar - log(1-phi^2)
- # logpvar = logevar - (log(1-phi) + log(1+phi))
- self.logpvars_1xu = \
- logevars_1xu - tf.log(1.0-phis_1xu) - tf.log(1.0+phis_1xu)
- self.logpvars_bxu = logpvars_bxu = tf.tile(self.logpvars_1xu, size_bx1)
- logpvars_bxu.set_shape(size__xu)
-
- # process mean (zero but included in for completeness)
- self.pmeans_bxu = pmeans_bxu = tf.zeros_like(phis_bxu)
-
- # For sampling from the prior during de-novo generation.
- self.means_t = means_t = [None] * num_steps
- self.logvars_t = logvars_t = [None] * num_steps
- self.samples_t = samples_t = [None] * num_steps
- self.gaussians_t = gaussians_t = [None] * num_steps
- sample_bxu = tf.zeros_like(phis_bxu)
- for t in range(num_steps):
- # process variance used here to make process completely stationary
- if t == 0:
- logvar_pt_bxu = self.logpvars_bxu
- else:
- logvar_pt_bxu = self.logevars_bxu
-
- z_mean_pt_bxu = pmeans_bxu + phis_bxu * sample_bxu
- gaussians_t[t] = DiagonalGaussian(batch_size, z_size,
- mean=z_mean_pt_bxu,
- logvar=logvar_pt_bxu)
- sample_bxu = gaussians_t[t].sample
- samples_t[t] = sample_bxu
- logvars_t[t] = logvar_pt_bxu
- means_t[t] = z_mean_pt_bxu
-
- def logp_t(self, z_t_bxu, z_tm1_bxu=None):
- """Compute the log-likelihood under the distribution for a given time t,
- not the whole sequence.
-
- Args:
- z_t_bxu: sample to compute likelihood for at time t.
- z_tm1_bxu (optional): sample condition probability of z_t upon.
-
- Returns:
- The likelihood of p_t under the model at time t. i.e.
- p(z_t|z_tm1_bxu) = N(z_tm1_bxu * phis, eps^2)
-
- """
- if z_tm1_bxu is None:
- return diag_gaussian_log_likelihood(z_t_bxu, self.pmeans_bxu,
- self.logpvars_bxu)
- else:
- means_t_bxu = self.pmeans_bxu + self.phis_bxu * z_tm1_bxu
- logp_tgtm1_bxu = diag_gaussian_log_likelihood(z_t_bxu,
- means_t_bxu,
- self.logevars_bxu)
- return logp_tgtm1_bxu
-
-
-class KLCost_GaussianGaussian(object):
- """log p(x|z) + KL(q||p) terms for Gaussian posterior and Gaussian prior. See
- eqn 10 and Appendix B in VAE for latter term,
- http://arxiv.org/abs/1312.6114
-
- The log p(x|z) term is the reconstruction error under the model.
- The KL term represents the penalty for passing information from the encoder
- to the decoder.
- To sample KL(q||p), we simply sample
- ln q - ln p
- by drawing samples from q and averaging.
- """
-
- def __init__(self, zs, prior_zs):
- """Create a lower bound in three parts, normalized reconstruction
- cost, normalized KL divergence cost, and their sum.
-
- E_q[ln p(z_i | z_{i+1}) / q(z_i | x)
- \int q(z) ln p(z) dz = - 0.5 ln(2pi) - 0.5 \sum (ln(sigma_p^2) + \
- sigma_q^2 / sigma_p^2 + (mean_p - mean_q)^2 / sigma_p^2)
-
- \int q(z) ln q(z) dz = - 0.5 ln(2pi) - 0.5 \sum (ln(sigma_q^2) + 1)
-
- Args:
- zs: posterior z ~ q(z|x)
- prior_zs: prior zs
- """
- # L = -KL + log p(x|z), to maximize bound on likelihood
- # -L = KL - log p(x|z), to minimize bound on NLL
- # so 'KL cost' is postive KL divergence
- kl_b = 0.0
- for z, prior_z in zip(zs, prior_zs):
- assert isinstance(z, Gaussian)
- assert isinstance(prior_z, Gaussian)
- # ln(2pi) terms cancel
- kl_b += 0.5 * tf.reduce_sum(
- prior_z.logvar - z.logvar
- + tf.exp(z.logvar - prior_z.logvar)
- + tf.square((z.mean - prior_z.mean) / tf.exp(0.5 * prior_z.logvar))
- - 1.0, [1])
-
- self.kl_cost_b = kl_b
- self.kl_cost = tf.reduce_mean(kl_b)
-
-
-class KLCost_GaussianGaussianProcessSampled(object):
- """ log p(x|z) + KL(q||p) terms for Gaussian posterior and Gaussian process
- prior via sampling.
-
- The log p(x|z) term is the reconstruction error under the model.
- The KL term represents the penalty for passing information from the encoder
- to the decoder.
- To sample KL(q||p), we simply sample
- ln q - ln p
- by drawing samples from q and averaging.
- """
-
- def __init__(self, post_zs, prior_z_process):
- """Create a lower bound in three parts, normalized reconstruction
- cost, normalized KL divergence cost, and their sum.
-
- Args:
- post_zs: posterior z ~ q(z|x)
- prior_z_process: prior AR(1) process
- """
- assert len(post_zs) > 1, "GP is for time, need more than 1 time step."
- assert isinstance(prior_z_process, GaussianProcess), "Must use GP."
-
- # L = -KL + log p(x|z), to maximize bound on likelihood
- # -L = KL - log p(x|z), to minimize bound on NLL
- # so 'KL cost' is postive KL divergence
- z0_bxu = post_zs[0].sample
- logq_bxu = post_zs[0].logp(z0_bxu)
- logp_bxu = prior_z_process.logp_t(z0_bxu)
- z_tm1_bxu = z0_bxu
- for z_t in post_zs[1:]:
- # posterior is independent in time, prior is not
- z_t_bxu = z_t.sample
- logq_bxu += z_t.logp(z_t_bxu)
- logp_bxu += prior_z_process.logp_t(z_t_bxu, z_tm1_bxu)
- z_tm1_bxu = z_t_bxu
-
- kl_bxu = logq_bxu - logp_bxu
- kl_b = tf.reduce_sum(kl_bxu, [1])
- self.kl_cost_b = kl_b
- self.kl_cost = tf.reduce_mean(kl_b)
diff --git a/research/lfads/lfads.py b/research/lfads/lfads.py
deleted file mode 100644
index 308ebabe90fbbb90701ac0585e7c1eaeaf6e3649..0000000000000000000000000000000000000000
--- a/research/lfads/lfads.py
+++ /dev/null
@@ -1,2170 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-"""
-LFADS - Latent Factor Analysis via Dynamical Systems.
-
-LFADS is an unsupervised method to decompose time series data into
-various factors, such as an initial condition, a generative
-dynamical system, control inputs to that generator, and a low
-dimensional description of the observed data, called the factors.
-Additionally, the observations have a noise model (in this case
-Poisson), so a denoised version of the observations is also created
-(e.g. underlying rates of a Poisson distribution given the observed
-event counts).
-
-The main data structure being passed around is a dataset. This is a dictionary
-of data dictionaries.
-
-DATASET: The top level dictionary is simply name (string -> dictionary).
-The nested dictionary is the DATA DICTIONARY, which has the following keys:
- 'train_data' and 'valid_data', whose values are the corresponding training
- and validation data with shape
- ExTxD, E - # examples, T - # time steps, D - # dimensions in data.
- The data dictionary also has a few more keys:
- 'train_ext_input' and 'valid_ext_input', if there are know external inputs
- to the system being modeled, these take on dimensions:
- ExTxI, E - # examples, T - # time steps, I = # dimensions in input.
- 'alignment_matrix_cxf' - If you are using multiple days data, it's possible
- that one can align the channels (see manuscript). If so each dataset will
- contain this matrix, which will be used for both the input adapter and the
- output adapter for each dataset. These matrices, if provided, must be of
- size [data_dim x factors] where data_dim is the number of neurons recorded
- on that day, and factors is chosen and set through the '--factors' flag.
- 'alignment_bias_c' - See alignment_matrix_cxf. This bias will used to
- the offset for the alignment transformation. It will *subtract* off the
- bias from the data, so pca style inits can align factors across sessions.
-
-
- If one runs LFADS on data where the true rates are known for some trials,
- (say simulated, testing data, as in the example shipped with the paper), then
- one can add three more fields for plotting purposes. These are 'train_truth'
- and 'valid_truth', and 'conversion_factor'. These have the same dimensions as
- 'train_data', and 'valid_data' but represent the underlying rates of the
- observations. Finally, if one needs to convert scale for plotting the true
- underlying firing rates, there is the 'conversion_factor' key.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import numpy as np
-import os
-import tensorflow as tf
-from distributions import LearnableDiagonalGaussian, DiagonalGaussianFromInput
-from distributions import diag_gaussian_log_likelihood
-from distributions import KLCost_GaussianGaussian, Poisson
-from distributions import LearnableAutoRegressive1Prior
-from distributions import KLCost_GaussianGaussianProcessSampled
-
-from utils import init_linear, linear, list_t_bxn_to_tensor_bxtxn, write_data
-from utils import log_sum_exp, flatten
-from plot_lfads import plot_lfads
-
-
-class GRU(object):
- """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
-
- """
- def __init__(self, num_units, forget_bias=1.0, weight_scale=1.0,
- clip_value=np.inf, collections=None):
- """Create a GRU object.
-
- Args:
- num_units: Number of units in the GRU
- forget_bias (optional): Hack to help learning.
- weight_scale (optional): weights are scaled by ws/sqrt(#inputs), with
- ws being the weight scale.
- clip_value (optional): if the recurrent values grow above this value,
- clip them.
- collections (optional): List of additonal collections variables should
- belong to.
- """
- self._num_units = num_units
- self._forget_bias = forget_bias
- self._weight_scale = weight_scale
- self._clip_value = clip_value
- self._collections = collections
-
- @property
- def state_size(self):
- return self._num_units
-
- @property
- def output_size(self):
- return self._num_units
-
- @property
- def state_multiplier(self):
- return 1
-
- def output_from_state(self, state):
- """Return the output portion of the state."""
- return state
-
- def __call__(self, inputs, state, scope=None):
- """Gated recurrent unit (GRU) function.
-
- Args:
- inputs: A 2D batch x input_dim tensor of inputs.
- state: The previous state from the last time step.
- scope (optional): TF variable scope for defined GRU variables.
-
- Returns:
- A tuple (state, state), where state is the newly computed state at time t.
- It is returned twice to respect an interface that works for LSTMs.
- """
-
- x = inputs
- h = state
- if inputs is not None:
- xh = tf.concat(axis=1, values=[x, h])
- else:
- xh = h
-
- with tf.variable_scope(scope or type(self).__name__): # "GRU"
- with tf.variable_scope("Gates"): # Reset gate and update gate.
- # We start with bias of 1.0 to not reset and not update.
- r, u = tf.split(axis=1, num_or_size_splits=2, value=linear(xh,
- 2 * self._num_units,
- alpha=self._weight_scale,
- name="xh_2_ru",
- collections=self._collections))
- r, u = tf.sigmoid(r), tf.sigmoid(u + self._forget_bias)
- with tf.variable_scope("Candidate"):
- xrh = tf.concat(axis=1, values=[x, r * h])
- c = tf.tanh(linear(xrh, self._num_units, name="xrh_2_c",
- collections=self._collections))
- new_h = u * h + (1 - u) * c
- new_h = tf.clip_by_value(new_h, -self._clip_value, self._clip_value)
-
- return new_h, new_h
-
-
-class GenGRU(object):
- """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
-
- This version is specialized for the generator, but isn't as fast, so
- we have two. Note this allows for l2 regularization on the recurrent
- weights, but also implicitly rescales the inputs via the 1/sqrt(input)
- scaling in the linear helper routine to be large magnitude, if there are
- fewer inputs than recurrent state.
-
- """
- def __init__(self, num_units, forget_bias=1.0,
- input_weight_scale=1.0, rec_weight_scale=1.0, clip_value=np.inf,
- input_collections=None, recurrent_collections=None):
- """Create a GRU object.
-
- Args:
- num_units: Number of units in the GRU
- forget_bias (optional): Hack to help learning.
- input_weight_scale (optional): weights are scaled ws/sqrt(#inputs), with
- ws being the weight scale.
- rec_weight_scale (optional): weights are scaled ws/sqrt(#inputs),
- with ws being the weight scale.
- clip_value (optional): if the recurrent values grow above this value,
- clip them.
- input_collections (optional): List of additonal collections variables
- that input->rec weights should belong to.
- recurrent_collections (optional): List of additonal collections variables
- that rec->rec weights should belong to.
- """
- self._num_units = num_units
- self._forget_bias = forget_bias
- self._input_weight_scale = input_weight_scale
- self._rec_weight_scale = rec_weight_scale
- self._clip_value = clip_value
- self._input_collections = input_collections
- self._rec_collections = recurrent_collections
-
- @property
- def state_size(self):
- return self._num_units
-
- @property
- def output_size(self):
- return self._num_units
-
- @property
- def state_multiplier(self):
- return 1
-
- def output_from_state(self, state):
- """Return the output portion of the state."""
- return state
-
- def __call__(self, inputs, state, scope=None):
- """Gated recurrent unit (GRU) function.
-
- Args:
- inputs: A 2D batch x input_dim tensor of inputs.
- state: The previous state from the last time step.
- scope (optional): TF variable scope for defined GRU variables.
-
- Returns:
- A tuple (state, state), where state is the newly computed state at time t.
- It is returned twice to respect an interface that works for LSTMs.
- """
-
- x = inputs
- h = state
- with tf.variable_scope(scope or type(self).__name__): # "GRU"
- with tf.variable_scope("Gates"): # Reset gate and update gate.
- # We start with bias of 1.0 to not reset and not update.
- r_x = u_x = 0.0
- if x is not None:
- r_x, u_x = tf.split(axis=1, num_or_size_splits=2, value=linear(x,
- 2 * self._num_units,
- alpha=self._input_weight_scale,
- do_bias=False,
- name="x_2_ru",
- normalized=False,
- collections=self._input_collections))
-
- r_h, u_h = tf.split(axis=1, num_or_size_splits=2, value=linear(h,
- 2 * self._num_units,
- do_bias=True,
- alpha=self._rec_weight_scale,
- name="h_2_ru",
- collections=self._rec_collections))
- r = r_x + r_h
- u = u_x + u_h
- r, u = tf.sigmoid(r), tf.sigmoid(u + self._forget_bias)
-
- with tf.variable_scope("Candidate"):
- c_x = 0.0
- if x is not None:
- c_x = linear(x, self._num_units, name="x_2_c", do_bias=False,
- alpha=self._input_weight_scale,
- normalized=False,
- collections=self._input_collections)
- c_rh = linear(r*h, self._num_units, name="rh_2_c", do_bias=True,
- alpha=self._rec_weight_scale,
- collections=self._rec_collections)
- c = tf.tanh(c_x + c_rh)
-
- new_h = u * h + (1 - u) * c
- new_h = tf.clip_by_value(new_h, -self._clip_value, self._clip_value)
-
- return new_h, new_h
-
-
-class LFADS(object):
- """LFADS - Latent Factor Analysis via Dynamical Systems.
-
- LFADS is an unsupervised method to decompose time series data into
- various factors, such as an initial condition, a generative
- dynamical system, inferred inputs to that generator, and a low
- dimensional description of the observed data, called the factors.
- Additoinally, the observations have a noise model (in this case
- Poisson), so a denoised version of the observations is also created
- (e.g. underlying rates of a Poisson distribution given the observed
- event counts).
- """
-
- def __init__(self, hps, kind="train", datasets=None):
- """Create an LFADS model.
-
- train - a model for training, sampling of posteriors is used
- posterior_sample_and_average - sample from the posterior, this is used
- for evaluating the expected value of the outputs of LFADS, given a
- specific input, by averaging over multiple samples from the approx
- posterior. Also used for the lower bound on the negative
- log-likelihood using IWAE error (Importance Weighed Auto-encoder).
- This is the denoising operation.
- prior_sample - a model for generation - sampling from priors is used
-
- Args:
- hps: The dictionary of hyper parameters.
- kind: the type of model to build (see above).
- datasets: a dictionary of named data_dictionaries, see top of lfads.py
- """
- print("Building graph...")
- all_kinds = ['train', 'posterior_sample_and_average', 'posterior_push_mean',
- 'prior_sample']
- assert kind in all_kinds, 'Wrong kind'
- if hps.feedback_factors_or_rates == "rates":
- assert len(hps.dataset_names) == 1, \
- "Multiple datasets not supported for rate feedback."
- num_steps = hps.num_steps
- ic_dim = hps.ic_dim
- co_dim = hps.co_dim
- ext_input_dim = hps.ext_input_dim
- cell_class = GRU
- gen_cell_class = GenGRU
-
- def makelambda(v): # Used with tf.case
- return lambda: v
-
- # Define the data placeholder, and deal with all parts of the graph
- # that are dataset dependent.
- self.dataName = tf.placeholder(tf.string, shape=())
- # The batch_size to be inferred from data, as normal.
- # Additionally, the data_dim will be inferred as well, allowing for a
- # single placeholder for all datasets, regardless of data dimension.
- if hps.output_dist == 'poisson':
- # Enforce correct dtype
- assert np.issubdtype(
- datasets[hps.dataset_names[0]]['train_data'].dtype, int), \
- "Data dtype must be int for poisson output distribution"
- data_dtype = tf.int32
- elif hps.output_dist == 'gaussian':
- assert np.issubdtype(
- datasets[hps.dataset_names[0]]['train_data'].dtype, float), \
- "Data dtype must be float for gaussian output dsitribution"
- data_dtype = tf.float32
- else:
- assert False, "NIY"
- self.dataset_ph = dataset_ph = tf.placeholder(data_dtype,
- [None, num_steps, None],
- name="data")
- self.train_step = tf.get_variable("global_step", [], tf.int64,
- tf.zeros_initializer(),
- trainable=False)
- self.hps = hps
- ndatasets = hps.ndatasets
- factors_dim = hps.factors_dim
- self.preds = preds = [None] * ndatasets
- self.fns_in_fac_Ws = fns_in_fac_Ws = [None] * ndatasets
- self.fns_in_fatcor_bs = fns_in_fac_bs = [None] * ndatasets
- self.fns_out_fac_Ws = fns_out_fac_Ws = [None] * ndatasets
- self.fns_out_fac_bs = fns_out_fac_bs = [None] * ndatasets
- self.datasetNames = dataset_names = hps.dataset_names
- self.ext_inputs = ext_inputs = None
-
- if len(dataset_names) == 1: # single session
- if 'alignment_matrix_cxf' in datasets[dataset_names[0]].keys():
- used_in_factors_dim = factors_dim
- in_identity_if_poss = False
- else:
- used_in_factors_dim = hps.dataset_dims[dataset_names[0]]
- in_identity_if_poss = True
- else: # multisession
- used_in_factors_dim = factors_dim
- in_identity_if_poss = False
-
- for d, name in enumerate(dataset_names):
- data_dim = hps.dataset_dims[name]
- in_mat_cxf = None
- in_bias_1xf = None
- align_bias_1xc = None
-
- if datasets and 'alignment_matrix_cxf' in datasets[name].keys():
- dataset = datasets[name]
- if hps.do_train_readin:
- print("Initializing trainable readin matrix with alignment matrix" \
- " provided for dataset:", name)
- else:
- print("Setting non-trainable readin matrix to alignment matrix" \
- " provided for dataset:", name)
- in_mat_cxf = dataset['alignment_matrix_cxf'].astype(np.float32)
- if in_mat_cxf.shape != (data_dim, factors_dim):
- raise ValueError("""Alignment matrix must have dimensions %d x %d
- (data_dim x factors_dim), but currently has %d x %d."""%
- (data_dim, factors_dim, in_mat_cxf.shape[0],
- in_mat_cxf.shape[1]))
- if datasets and 'alignment_bias_c' in datasets[name].keys():
- dataset = datasets[name]
- if hps.do_train_readin:
- print("Initializing trainable readin bias with alignment bias " \
- "provided for dataset:", name)
- else:
- print("Setting non-trainable readin bias to alignment bias " \
- "provided for dataset:", name)
- align_bias_c = dataset['alignment_bias_c'].astype(np.float32)
- align_bias_1xc = np.expand_dims(align_bias_c, axis=0)
- if align_bias_1xc.shape[1] != data_dim:
- raise ValueError("""Alignment bias must have dimensions %d
- (data_dim), but currently has %d."""%
- (data_dim, in_mat_cxf.shape[0]))
- if in_mat_cxf is not None and align_bias_1xc is not None:
- # (data - alignment_bias) * W_in
- # data * W_in - alignment_bias * W_in
- # So b = -alignment_bias * W_in to accommodate PCA style offset.
- in_bias_1xf = -np.dot(align_bias_1xc, in_mat_cxf)
-
- if hps.do_train_readin:
- # only add to IO transformations collection only if we want it to be
- # learnable, because IO_transformations collection will be trained
- # when do_train_io_only
- collections_readin=['IO_transformations']
- else:
- collections_readin=None
-
- in_fac_lin = init_linear(data_dim, used_in_factors_dim,
- do_bias=True,
- mat_init_value=in_mat_cxf,
- bias_init_value=in_bias_1xf,
- identity_if_possible=in_identity_if_poss,
- normalized=False, name="x_2_infac_"+name,
- collections=collections_readin,
- trainable=hps.do_train_readin)
- in_fac_W, in_fac_b = in_fac_lin
- fns_in_fac_Ws[d] = makelambda(in_fac_W)
- fns_in_fac_bs[d] = makelambda(in_fac_b)
-
- with tf.variable_scope("glm"):
- out_identity_if_poss = False
- if len(dataset_names) == 1 and \
- factors_dim == hps.dataset_dims[dataset_names[0]]:
- out_identity_if_poss = True
- for d, name in enumerate(dataset_names):
- data_dim = hps.dataset_dims[name]
- in_mat_cxf = None
- if datasets and 'alignment_matrix_cxf' in datasets[name].keys():
- dataset = datasets[name]
- in_mat_cxf = dataset['alignment_matrix_cxf'].astype(np.float32)
-
- if datasets and 'alignment_bias_c' in datasets[name].keys():
- dataset = datasets[name]
- align_bias_c = dataset['alignment_bias_c'].astype(np.float32)
- align_bias_1xc = np.expand_dims(align_bias_c, axis=0)
-
- out_mat_fxc = None
- out_bias_1xc = None
- if in_mat_cxf is not None:
- out_mat_fxc = in_mat_cxf.T
- if align_bias_1xc is not None:
- out_bias_1xc = align_bias_1xc
-
- if hps.output_dist == 'poisson':
- out_fac_lin = init_linear(factors_dim, data_dim, do_bias=True,
- mat_init_value=out_mat_fxc,
- bias_init_value=out_bias_1xc,
- identity_if_possible=out_identity_if_poss,
- normalized=False,
- name="fac_2_logrates_"+name,
- collections=['IO_transformations'])
- out_fac_W, out_fac_b = out_fac_lin
-
- elif hps.output_dist == 'gaussian':
- out_fac_lin_mean = \
- init_linear(factors_dim, data_dim, do_bias=True,
- mat_init_value=out_mat_fxc,
- bias_init_value=out_bias_1xc,
- normalized=False,
- name="fac_2_means_"+name,
- collections=['IO_transformations'])
- out_fac_W_mean, out_fac_b_mean = out_fac_lin_mean
-
- mat_init_value = np.zeros([factors_dim, data_dim]).astype(np.float32)
- bias_init_value = np.ones([1, data_dim]).astype(np.float32)
- out_fac_lin_logvar = \
- init_linear(factors_dim, data_dim, do_bias=True,
- mat_init_value=mat_init_value,
- bias_init_value=bias_init_value,
- normalized=False,
- name="fac_2_logvars_"+name,
- collections=['IO_transformations'])
- out_fac_W_mean, out_fac_b_mean = out_fac_lin_mean
- out_fac_W_logvar, out_fac_b_logvar = out_fac_lin_logvar
- out_fac_W = tf.concat(
- axis=1, values=[out_fac_W_mean, out_fac_W_logvar])
- out_fac_b = tf.concat(
- axis=1, values=[out_fac_b_mean, out_fac_b_logvar])
- else:
- assert False, "NIY"
-
- preds[d] = tf.equal(tf.constant(name), self.dataName)
- data_dim = hps.dataset_dims[name]
- fns_out_fac_Ws[d] = makelambda(out_fac_W)
- fns_out_fac_bs[d] = makelambda(out_fac_b)
-
- pf_pairs_in_fac_Ws = zip(preds, fns_in_fac_Ws)
- pf_pairs_in_fac_bs = zip(preds, fns_in_fac_bs)
- pf_pairs_out_fac_Ws = zip(preds, fns_out_fac_Ws)
- pf_pairs_out_fac_bs = zip(preds, fns_out_fac_bs)
-
- this_in_fac_W = tf.case(pf_pairs_in_fac_Ws, exclusive=True)
- this_in_fac_b = tf.case(pf_pairs_in_fac_bs, exclusive=True)
- this_out_fac_W = tf.case(pf_pairs_out_fac_Ws, exclusive=True)
- this_out_fac_b = tf.case(pf_pairs_out_fac_bs, exclusive=True)
-
- # External inputs (not changing by dataset, by definition).
- if hps.ext_input_dim > 0:
- self.ext_input = tf.placeholder(tf.float32,
- [None, num_steps, ext_input_dim],
- name="ext_input")
- else:
- self.ext_input = None
- ext_input_bxtxi = self.ext_input
-
- self.keep_prob = keep_prob = tf.placeholder(tf.float32, [], "keep_prob")
- self.batch_size = batch_size = int(hps.batch_size)
- self.learning_rate = tf.Variable(float(hps.learning_rate_init),
- trainable=False, name="learning_rate")
- self.learning_rate_decay_op = self.learning_rate.assign(
- self.learning_rate * hps.learning_rate_decay_factor)
-
- # Dropout the data.
- dataset_do_bxtxd = tf.nn.dropout(tf.to_float(dataset_ph), keep_prob)
- if hps.ext_input_dim > 0:
- ext_input_do_bxtxi = tf.nn.dropout(ext_input_bxtxi, keep_prob)
- else:
- ext_input_do_bxtxi = None
-
- # ENCODERS
- def encode_data(dataset_bxtxd, enc_cell, name, forward_or_reverse,
- num_steps_to_encode):
- """Encode data for LFADS
- Args:
- dataset_bxtxd - the data to encode, as a 3 tensor, with dims
- time x batch x data dims.
- enc_cell: encoder cell
- name: name of encoder
- forward_or_reverse: string, encode in forward or reverse direction
- num_steps_to_encode: number of steps to encode, 0:num_steps_to_encode
- Returns:
- encoded data as a list with num_steps_to_encode items, in order
- """
- if forward_or_reverse == "forward":
- dstr = "_fwd"
- time_fwd_or_rev = range(num_steps_to_encode)
- else:
- dstr = "_rev"
- time_fwd_or_rev = reversed(range(num_steps_to_encode))
-
- with tf.variable_scope(name+"_enc"+dstr, reuse=False):
- enc_state = tf.tile(
- tf.Variable(tf.zeros([1, enc_cell.state_size]),
- name=name+"_enc_t0"+dstr), tf.stack([batch_size, 1]))
- enc_state.set_shape([None, enc_cell.state_size]) # tile loses shape
-
- enc_outs = [None] * num_steps_to_encode
- for i, t in enumerate(time_fwd_or_rev):
- with tf.variable_scope(name+"_enc"+dstr, reuse=True if i > 0 else None):
- dataset_t_bxd = dataset_bxtxd[:,t,:]
- in_fac_t_bxf = tf.matmul(dataset_t_bxd, this_in_fac_W) + this_in_fac_b
- in_fac_t_bxf.set_shape([None, used_in_factors_dim])
- if ext_input_dim > 0 and not hps.inject_ext_input_to_gen:
- ext_input_t_bxi = ext_input_do_bxtxi[:,t,:]
- enc_input_t_bxfpe = tf.concat(
- axis=1, values=[in_fac_t_bxf, ext_input_t_bxi])
- else:
- enc_input_t_bxfpe = in_fac_t_bxf
- enc_out, enc_state = enc_cell(enc_input_t_bxfpe, enc_state)
- enc_outs[t] = enc_out
-
- return enc_outs
-
- # Encode initial condition means and variances
- # ([x_T, x_T-1, ... x_0] and [x_0, x_1, ... x_T] -> g0/c0)
- self.ic_enc_fwd = [None] * num_steps
- self.ic_enc_rev = [None] * num_steps
- if ic_dim > 0:
- enc_ic_cell = cell_class(hps.ic_enc_dim,
- weight_scale=hps.cell_weight_scale,
- clip_value=hps.cell_clip_value)
- ic_enc_fwd = encode_data(dataset_do_bxtxd, enc_ic_cell,
- "ic", "forward",
- hps.num_steps_for_gen_ic)
- ic_enc_rev = encode_data(dataset_do_bxtxd, enc_ic_cell,
- "ic", "reverse",
- hps.num_steps_for_gen_ic)
- self.ic_enc_fwd = ic_enc_fwd
- self.ic_enc_rev = ic_enc_rev
-
- # Encoder control input means and variances, bi-directional encoding so:
- # ([x_T, x_T-1, ..., x_0] and [x_0, x_1 ... x_T] -> u_t)
- self.ci_enc_fwd = [None] * num_steps
- self.ci_enc_rev = [None] * num_steps
- if co_dim > 0:
- enc_ci_cell = cell_class(hps.ci_enc_dim,
- weight_scale=hps.cell_weight_scale,
- clip_value=hps.cell_clip_value)
- ci_enc_fwd = encode_data(dataset_do_bxtxd, enc_ci_cell,
- "ci", "forward",
- hps.num_steps)
- if hps.do_causal_controller:
- ci_enc_rev = None
- else:
- ci_enc_rev = encode_data(dataset_do_bxtxd, enc_ci_cell,
- "ci", "reverse",
- hps.num_steps)
- self.ci_enc_fwd = ci_enc_fwd
- self.ci_enc_rev = ci_enc_rev
-
- # STOCHASTIC LATENT VARIABLES, priors and posteriors
- # (initial conditions g0, and control inputs, u_t)
- # Note that zs represent all the stochastic latent variables.
- with tf.variable_scope("z", reuse=False):
- self.prior_zs_g0 = None
- self.posterior_zs_g0 = None
- self.g0s_val = None
- if ic_dim > 0:
- self.prior_zs_g0 = \
- LearnableDiagonalGaussian(batch_size, ic_dim, name="prior_g0",
- mean_init=0.0,
- var_min=hps.ic_prior_var_min,
- var_init=hps.ic_prior_var_scale,
- var_max=hps.ic_prior_var_max)
- ic_enc = tf.concat(axis=1, values=[ic_enc_fwd[-1], ic_enc_rev[0]])
- ic_enc = tf.nn.dropout(ic_enc, keep_prob)
- self.posterior_zs_g0 = \
- DiagonalGaussianFromInput(ic_enc, ic_dim, "ic_enc_2_post_g0",
- var_min=hps.ic_post_var_min)
- if kind in ["train", "posterior_sample_and_average",
- "posterior_push_mean"]:
- zs_g0 = self.posterior_zs_g0
- else:
- zs_g0 = self.prior_zs_g0
- if kind in ["train", "posterior_sample_and_average", "prior_sample"]:
- self.g0s_val = zs_g0.sample
- else:
- self.g0s_val = zs_g0.mean
-
- # Priors for controller, 'co' for controller output
- self.prior_zs_co = prior_zs_co = [None] * num_steps
- self.posterior_zs_co = posterior_zs_co = [None] * num_steps
- self.zs_co = zs_co = [None] * num_steps
- self.prior_zs_ar_con = None
- if co_dim > 0:
- # Controller outputs
- autocorrelation_taus = [hps.prior_ar_atau for x in range(hps.co_dim)]
- noise_variances = [hps.prior_ar_nvar for x in range(hps.co_dim)]
- self.prior_zs_ar_con = prior_zs_ar_con = \
- LearnableAutoRegressive1Prior(batch_size, hps.co_dim,
- autocorrelation_taus,
- noise_variances,
- hps.do_train_prior_ar_atau,
- hps.do_train_prior_ar_nvar,
- num_steps, "u_prior_ar1")
-
- # CONTROLLER -> GENERATOR -> RATES
- # (u(t) -> gen(t) -> factors(t) -> rates(t) -> p(x_t|z_t) )
- self.controller_outputs = u_t = [None] * num_steps
- self.con_ics = con_state = None
- self.con_states = con_states = [None] * num_steps
- self.con_outs = con_outs = [None] * num_steps
- self.gen_inputs = gen_inputs = [None] * num_steps
- if co_dim > 0:
- # gen_cell_class here for l2 penalty recurrent weights
- # didn't split the cell_weight scale here, because I doubt it matters
- con_cell = gen_cell_class(hps.con_dim,
- input_weight_scale=hps.cell_weight_scale,
- rec_weight_scale=hps.cell_weight_scale,
- clip_value=hps.cell_clip_value,
- recurrent_collections=['l2_con_reg'])
- with tf.variable_scope("con", reuse=False):
- self.con_ics = tf.tile(
- tf.Variable(tf.zeros([1, hps.con_dim*con_cell.state_multiplier]),
- name="c0"),
- tf.stack([batch_size, 1]))
- self.con_ics.set_shape([None, con_cell.state_size]) # tile loses shape
- con_states[-1] = self.con_ics
-
- gen_cell = gen_cell_class(hps.gen_dim,
- input_weight_scale=hps.gen_cell_input_weight_scale,
- rec_weight_scale=hps.gen_cell_rec_weight_scale,
- clip_value=hps.cell_clip_value,
- recurrent_collections=['l2_gen_reg'])
- with tf.variable_scope("gen", reuse=False):
- if ic_dim == 0:
- self.gen_ics = tf.tile(
- tf.Variable(tf.zeros([1, gen_cell.state_size]), name="g0"),
- tf.stack([batch_size, 1]))
- else:
- self.gen_ics = linear(self.g0s_val, gen_cell.state_size,
- identity_if_possible=True,
- name="g0_2_gen_ic")
-
- self.gen_states = gen_states = [None] * num_steps
- self.gen_outs = gen_outs = [None] * num_steps
- gen_states[-1] = self.gen_ics
- gen_outs[-1] = gen_cell.output_from_state(gen_states[-1])
- self.factors = factors = [None] * num_steps
- factors[-1] = linear(gen_outs[-1], factors_dim, do_bias=False,
- normalized=True, name="gen_2_fac")
-
- self.rates = rates = [None] * num_steps
- # rates[-1] is collected to potentially feed back to controller
- with tf.variable_scope("glm", reuse=False):
- if hps.output_dist == 'poisson':
- log_rates_t0 = tf.matmul(factors[-1], this_out_fac_W) + this_out_fac_b
- log_rates_t0.set_shape([None, None])
- rates[-1] = tf.exp(log_rates_t0) # rate
- rates[-1].set_shape([None, hps.dataset_dims[hps.dataset_names[0]]])
- elif hps.output_dist == 'gaussian':
- mean_n_logvars = tf.matmul(factors[-1],this_out_fac_W) + this_out_fac_b
- mean_n_logvars.set_shape([None, None])
- means_t_bxd, logvars_t_bxd = tf.split(axis=1, num_or_size_splits=2,
- value=mean_n_logvars)
- rates[-1] = means_t_bxd
- else:
- assert False, "NIY"
-
- # We support multiple output distributions, for example Poisson, and also
- # Gaussian. In these two cases respectively, there are one and two
- # parameters (rates vs. mean and variance). So the output_dist_params
- # tensor will variable sizes via tf.concat and tf.split, along the 1st
- # dimension. So in the case of gaussian, for example, it'll be
- # batch x (D+D), where each D dims is the mean, and then variances,
- # respectively. For a distribution with 3 parameters, it would be
- # batch x (D+D+D).
- self.output_dist_params = dist_params = [None] * num_steps
- self.log_p_xgz_b = log_p_xgz_b = 0.0 # log P(x|z)
- for t in range(num_steps):
- # Controller
- if co_dim > 0:
- # Build inputs for controller
- tlag = t - hps.controller_input_lag
- if tlag < 0:
- con_in_f_t = tf.zeros_like(ci_enc_fwd[0])
- else:
- con_in_f_t = ci_enc_fwd[tlag]
- if hps.do_causal_controller:
- # If controller is causal (wrt to data generation process), then it
- # cannot see future data. Thus, excluding ci_enc_rev[t] is obvious.
- # Less obvious is the need to exclude factors[t-1]. This arises
- # because information flows from g0 through factors to the controller
- # input. The g0 encoding is backwards, so we must necessarily exclude
- # the factors in order to keep the controller input purely from a
- # forward encoding (however unlikely it is that
- # g0->factors->controller channel might actually be used in this way).
- con_in_list_t = [con_in_f_t]
- else:
- tlag_rev = t + hps.controller_input_lag
- if tlag_rev >= num_steps:
- # better than zeros
- con_in_r_t = tf.zeros_like(ci_enc_rev[0])
- else:
- con_in_r_t = ci_enc_rev[tlag_rev]
- con_in_list_t = [con_in_f_t, con_in_r_t]
-
- if hps.do_feed_factors_to_controller:
- if hps.feedback_factors_or_rates == "factors":
- con_in_list_t.append(factors[t-1])
- elif hps.feedback_factors_or_rates == "rates":
- con_in_list_t.append(rates[t-1])
- else:
- assert False, "NIY"
-
- con_in_t = tf.concat(axis=1, values=con_in_list_t)
- con_in_t = tf.nn.dropout(con_in_t, keep_prob)
- with tf.variable_scope("con", reuse=True if t > 0 else None):
- con_outs[t], con_states[t] = con_cell(con_in_t, con_states[t-1])
- posterior_zs_co[t] = \
- DiagonalGaussianFromInput(con_outs[t], co_dim,
- name="con_to_post_co")
- if kind == "train":
- u_t[t] = posterior_zs_co[t].sample
- elif kind == "posterior_sample_and_average":
- u_t[t] = posterior_zs_co[t].sample
- elif kind == "posterior_push_mean":
- u_t[t] = posterior_zs_co[t].mean
- else:
- u_t[t] = prior_zs_ar_con.samples_t[t]
-
- # Inputs to the generator (controller output + external input)
- if ext_input_dim > 0 and hps.inject_ext_input_to_gen:
- ext_input_t_bxi = ext_input_do_bxtxi[:,t,:]
- if co_dim > 0:
- gen_inputs[t] = tf.concat(axis=1, values=[u_t[t], ext_input_t_bxi])
- else:
- gen_inputs[t] = ext_input_t_bxi
- else:
- gen_inputs[t] = u_t[t]
-
- # Generator
- data_t_bxd = dataset_ph[:,t,:]
- with tf.variable_scope("gen", reuse=True if t > 0 else None):
- gen_outs[t], gen_states[t] = gen_cell(gen_inputs[t], gen_states[t-1])
- gen_outs[t] = tf.nn.dropout(gen_outs[t], keep_prob)
- with tf.variable_scope("gen", reuse=True): # ic defined it above
- factors[t] = linear(gen_outs[t], factors_dim, do_bias=False,
- normalized=True, name="gen_2_fac")
- with tf.variable_scope("glm", reuse=True if t > 0 else None):
- if hps.output_dist == 'poisson':
- log_rates_t = tf.matmul(factors[t], this_out_fac_W) + this_out_fac_b
- log_rates_t.set_shape([None, None])
- rates[t] = dist_params[t] = tf.exp(tf.clip_by_value(log_rates_t, -hps._clip_value, hps._clip_value)) # rates feed back
- rates[t].set_shape([None, hps.dataset_dims[hps.dataset_names[0]]])
- loglikelihood_t = Poisson(log_rates_t).logp(data_t_bxd)
-
- elif hps.output_dist == 'gaussian':
- mean_n_logvars = tf.matmul(factors[t],this_out_fac_W) + this_out_fac_b
- mean_n_logvars.set_shape([None, None])
- means_t_bxd, logvars_t_bxd = tf.split(axis=1, num_or_size_splits=2,
- value=mean_n_logvars)
- rates[t] = means_t_bxd # rates feed back to controller
- dist_params[t] = tf.concat(
- axis=1, values=[means_t_bxd, tf.exp(tf.clip_by_value(logvars_t_bxd, -hps._clip_value, hps._clip_value))])
- loglikelihood_t = \
- diag_gaussian_log_likelihood(data_t_bxd,
- means_t_bxd, logvars_t_bxd)
- else:
- assert False, "NIY"
-
- log_p_xgz_b += tf.reduce_sum(loglikelihood_t, [1])
-
- # Correlation of inferred inputs cost.
- self.corr_cost = tf.constant(0.0)
- if hps.co_mean_corr_scale > 0.0:
- all_sum_corr = []
- for i in range(hps.co_dim):
- for j in range(i+1, hps.co_dim):
- sum_corr_ij = tf.constant(0.0)
- for t in range(num_steps):
- u_mean_t = posterior_zs_co[t].mean
- sum_corr_ij += u_mean_t[:,i]*u_mean_t[:,j]
- all_sum_corr.append(0.5 * tf.square(sum_corr_ij))
- self.corr_cost = tf.reduce_mean(all_sum_corr) # div by batch and by n*(n-1)/2 pairs
-
- # Variational Lower Bound on posterior, p(z|x), plus reconstruction cost.
- # KL and reconstruction costs are normalized only by batch size, not by
- # dimension, or by time steps.
- kl_cost_g0_b = tf.zeros_like(batch_size, dtype=tf.float32)
- kl_cost_co_b = tf.zeros_like(batch_size, dtype=tf.float32)
- self.kl_cost = tf.constant(0.0) # VAE KL cost
- self.recon_cost = tf.constant(0.0) # VAE reconstruction cost
- self.nll_bound_vae = tf.constant(0.0)
- self.nll_bound_iwae = tf.constant(0.0) # for eval with IWAE cost.
- if kind in ["train", "posterior_sample_and_average", "posterior_push_mean"]:
- kl_cost_g0_b = 0.0
- kl_cost_co_b = 0.0
- if ic_dim > 0:
- g0_priors = [self.prior_zs_g0]
- g0_posts = [self.posterior_zs_g0]
- kl_cost_g0_b = KLCost_GaussianGaussian(g0_posts, g0_priors).kl_cost_b
- kl_cost_g0_b = hps.kl_ic_weight * kl_cost_g0_b
- if co_dim > 0:
- kl_cost_co_b = \
- KLCost_GaussianGaussianProcessSampled(
- posterior_zs_co, prior_zs_ar_con).kl_cost_b
- kl_cost_co_b = hps.kl_co_weight * kl_cost_co_b
-
- # L = -KL + log p(x|z), to maximize bound on likelihood
- # -L = KL - log p(x|z), to minimize bound on NLL
- # so 'reconstruction cost' is negative log likelihood
- self.recon_cost = - tf.reduce_mean(log_p_xgz_b)
- self.kl_cost = tf.reduce_mean(kl_cost_g0_b + kl_cost_co_b)
-
- lb_on_ll_b = log_p_xgz_b - kl_cost_g0_b - kl_cost_co_b
-
- # VAE error averages outside the log
- self.nll_bound_vae = -tf.reduce_mean(lb_on_ll_b)
-
- # IWAE error averages inside the log
- k = tf.cast(tf.shape(log_p_xgz_b)[0], tf.float32)
- iwae_lb_on_ll = -tf.log(k) + log_sum_exp(lb_on_ll_b)
- self.nll_bound_iwae = -iwae_lb_on_ll
-
- # L2 regularization on the generator, normalized by number of parameters.
- self.l2_cost = tf.constant(0.0)
- if self.hps.l2_gen_scale > 0.0 or self.hps.l2_con_scale > 0.0:
- l2_costs = []
- l2_numels = []
- l2_reg_var_lists = [tf.get_collection('l2_gen_reg'),
- tf.get_collection('l2_con_reg')]
- l2_reg_scales = [self.hps.l2_gen_scale, self.hps.l2_con_scale]
- for l2_reg_vars, l2_scale in zip(l2_reg_var_lists, l2_reg_scales):
- for v in l2_reg_vars:
- numel = tf.reduce_prod(tf.concat(axis=0, values=tf.shape(v)))
- numel_f = tf.cast(numel, tf.float32)
- l2_numels.append(numel_f)
- v_l2 = tf.reduce_sum(v*v)
- l2_costs.append(0.5 * l2_scale * v_l2)
- self.l2_cost = tf.add_n(l2_costs) / tf.add_n(l2_numels)
-
- # Compute the cost for training, part of the graph regardless.
- # The KL cost can be problematic at the beginning of optimization,
- # so we allow an exponential increase in weighting the KL from 0
- # to 1.
- self.kl_decay_step = tf.maximum(self.train_step - hps.kl_start_step, 0)
- self.l2_decay_step = tf.maximum(self.train_step - hps.l2_start_step, 0)
- kl_decay_step_f = tf.cast(self.kl_decay_step, tf.float32)
- l2_decay_step_f = tf.cast(self.l2_decay_step, tf.float32)
- kl_increase_steps_f = tf.cast(hps.kl_increase_steps, tf.float32)
- l2_increase_steps_f = tf.cast(hps.l2_increase_steps, tf.float32)
- self.kl_weight = kl_weight = \
- tf.minimum(kl_decay_step_f / kl_increase_steps_f, 1.0)
- self.l2_weight = l2_weight = \
- tf.minimum(l2_decay_step_f / l2_increase_steps_f, 1.0)
-
- self.timed_kl_cost = kl_weight * self.kl_cost
- self.timed_l2_cost = l2_weight * self.l2_cost
- self.weight_corr_cost = hps.co_mean_corr_scale * self.corr_cost
- self.cost = self.recon_cost + self.timed_kl_cost + \
- self.timed_l2_cost + self.weight_corr_cost
-
- if kind != "train":
- # save every so often
- self.seso_saver = tf.train.Saver(tf.global_variables(),
- max_to_keep=hps.max_ckpt_to_keep)
- # lowest validation error
- self.lve_saver = tf.train.Saver(tf.global_variables(),
- max_to_keep=hps.max_ckpt_to_keep_lve)
-
- return
-
- # OPTIMIZATION
- # train the io matrices only
- if self.hps.do_train_io_only:
- self.train_vars = tvars = \
- tf.get_collection('IO_transformations',
- scope=tf.get_variable_scope().name)
- # train the encoder only
- elif self.hps.do_train_encoder_only:
- tvars1 = \
- tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
- scope='LFADS/ic_enc_*')
- tvars2 = \
- tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
- scope='LFADS/z/ic_enc_*')
-
- self.train_vars = tvars = tvars1 + tvars2
- # train all variables
- else:
- self.train_vars = tvars = \
- tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
- scope=tf.get_variable_scope().name)
- print("done.")
- print("Model Variables (to be optimized): ")
- total_params = 0
- for i in range(len(tvars)):
- shape = tvars[i].get_shape().as_list()
- print(" ", i, tvars[i].name, shape)
- total_params += np.prod(shape)
- print("Total model parameters: ", total_params)
-
- grads = tf.gradients(self.cost, tvars)
- grads, grad_global_norm = tf.clip_by_global_norm(grads, hps.max_grad_norm)
- opt = tf.train.AdamOptimizer(self.learning_rate, beta1=0.9, beta2=0.999,
- epsilon=1e-01)
- self.grads = grads
- self.grad_global_norm = grad_global_norm
- self.train_op = opt.apply_gradients(
- zip(grads, tvars), global_step=self.train_step)
-
- self.seso_saver = tf.train.Saver(tf.global_variables(),
- max_to_keep=hps.max_ckpt_to_keep)
-
- # lowest validation error
- self.lve_saver = tf.train.Saver(tf.global_variables(),
- max_to_keep=hps.max_ckpt_to_keep)
-
- # SUMMARIES, used only during training.
- # example summary
- self.example_image = tf.placeholder(tf.float32, shape=[1,None,None,3],
- name='image_tensor')
- self.example_summ = tf.summary.image("LFADS example", self.example_image,
- collections=["example_summaries"])
-
- # general training summaries
- self.lr_summ = tf.summary.scalar("Learning rate", self.learning_rate)
- self.kl_weight_summ = tf.summary.scalar("KL weight", self.kl_weight)
- self.l2_weight_summ = tf.summary.scalar("L2 weight", self.l2_weight)
- self.corr_cost_summ = tf.summary.scalar("Corr cost", self.weight_corr_cost)
- self.grad_global_norm_summ = tf.summary.scalar("Gradient global norm",
- self.grad_global_norm)
- if hps.co_dim > 0:
- self.atau_summ = [None] * hps.co_dim
- self.pvar_summ = [None] * hps.co_dim
- for c in range(hps.co_dim):
- self.atau_summ[c] = \
- tf.summary.scalar("AR Autocorrelation taus " + str(c),
- tf.exp(self.prior_zs_ar_con.logataus_1xu[0,c]))
- self.pvar_summ[c] = \
- tf.summary.scalar("AR Variances " + str(c),
- tf.exp(self.prior_zs_ar_con.logpvars_1xu[0,c]))
-
- # cost summaries, separated into different collections for
- # training vs validation. We make placeholders for these, because
- # even though the graph computes these costs on a per-batch basis,
- # we want to report the more reliable metric of per-epoch cost.
- kl_cost_ph = tf.placeholder(tf.float32, shape=[], name='kl_cost_ph')
- self.kl_t_cost_summ = tf.summary.scalar("KL cost (train)", kl_cost_ph,
- collections=["train_summaries"])
- self.kl_v_cost_summ = tf.summary.scalar("KL cost (valid)", kl_cost_ph,
- collections=["valid_summaries"])
- l2_cost_ph = tf.placeholder(tf.float32, shape=[], name='l2_cost_ph')
- self.l2_cost_summ = tf.summary.scalar("L2 cost", l2_cost_ph,
- collections=["train_summaries"])
-
- recon_cost_ph = tf.placeholder(tf.float32, shape=[], name='recon_cost_ph')
- self.recon_t_cost_summ = tf.summary.scalar("Reconstruction cost (train)",
- recon_cost_ph,
- collections=["train_summaries"])
- self.recon_v_cost_summ = tf.summary.scalar("Reconstruction cost (valid)",
- recon_cost_ph,
- collections=["valid_summaries"])
-
- total_cost_ph = tf.placeholder(tf.float32, shape=[], name='total_cost_ph')
- self.cost_t_summ = tf.summary.scalar("Total cost (train)", total_cost_ph,
- collections=["train_summaries"])
- self.cost_v_summ = tf.summary.scalar("Total cost (valid)", total_cost_ph,
- collections=["valid_summaries"])
-
- self.kl_cost_ph = kl_cost_ph
- self.l2_cost_ph = l2_cost_ph
- self.recon_cost_ph = recon_cost_ph
- self.total_cost_ph = total_cost_ph
-
- # Merged summaries, for easy coding later.
- self.merged_examples = tf.summary.merge_all(key="example_summaries")
- self.merged_generic = tf.summary.merge_all() # default key is 'summaries'
- self.merged_train = tf.summary.merge_all(key="train_summaries")
- self.merged_valid = tf.summary.merge_all(key="valid_summaries")
-
- session = tf.get_default_session()
- self.logfile = os.path.join(hps.lfads_save_dir, "lfads_log")
- self.writer = tf.summary.FileWriter(self.logfile)
-
- def build_feed_dict(self, train_name, data_bxtxd, ext_input_bxtxi=None,
- keep_prob=None):
- """Build the feed dictionary, handles cases where there is no value defined.
-
- Args:
- train_name: The key into the datasets, to set the tf.case statement for
- the proper readin / readout matrices.
- data_bxtxd: The data tensor
- ext_input_bxtxi (optional): The external input tensor
- keep_prob: The drop out keep probability.
-
- Returns:
- The feed dictionary with TF tensors as keys and data as values, for use
- with tf.Session.run()
-
- """
- feed_dict = {}
- B, T, _ = data_bxtxd.shape
- feed_dict[self.dataName] = train_name
- feed_dict[self.dataset_ph] = data_bxtxd
-
- if self.ext_input is not None and ext_input_bxtxi is not None:
- feed_dict[self.ext_input] = ext_input_bxtxi
-
- if keep_prob is None:
- feed_dict[self.keep_prob] = self.hps.keep_prob
- else:
- feed_dict[self.keep_prob] = keep_prob
-
- return feed_dict
-
- @staticmethod
- def get_batch(data_extxd, ext_input_extxi=None, batch_size=None,
- example_idxs=None):
- """Get a batch of data, either randomly chosen, or specified directly.
-
- Args:
- data_extxd: The data to model, numpy tensors with shape:
- # examples x # time steps x # dimensions
- ext_input_extxi (optional): The external inputs, numpy tensor with shape:
- # examples x # time steps x # external input dimensions
- batch_size: The size of the batch to return
- example_idxs (optional): The example indices used to select examples.
-
- Returns:
- A tuple with two parts:
- 1. Batched data numpy tensor with shape:
- batch_size x # time steps x # dimensions
- 2. Batched external input numpy tensor with shape:
- batch_size x # time steps x # external input dims
- """
- assert batch_size is not None or example_idxs is not None, "Problems"
- E, T, D = data_extxd.shape
- if example_idxs is None:
- example_idxs = np.random.choice(E, batch_size)
-
- ext_input_bxtxi = None
- if ext_input_extxi is not None:
- ext_input_bxtxi = ext_input_extxi[example_idxs,:,:]
-
- return data_extxd[example_idxs,:,:], ext_input_bxtxi
-
- @staticmethod
- def example_idxs_mod_batch_size(nexamples, batch_size):
- """Given a number of examples, E, and a batch_size, B, generate indices
- [0, 1, 2, ... B-1;
- [B, B+1, ... 2*B-1;
- ...
- ]
- returning those indices as a 2-dim tensor shaped like E/B x B. Note that
- shape is only correct if E % B == 0. If not, then an extra row is generated
- so that the remainder of examples is included. The extra examples are
- explicitly to to the zero index (see randomize_example_idxs_mod_batch_size)
- for randomized behavior.
-
- Args:
- nexamples: The number of examples to batch up.
- batch_size: The size of the batch.
- Returns:
- 2-dim tensor as described above.
- """
- bmrem = batch_size - (nexamples % batch_size)
- bmrem_examples = []
- if bmrem < batch_size:
- #bmrem_examples = np.zeros(bmrem, dtype=np.int32)
- ridxs = np.random.permutation(nexamples)[0:bmrem].astype(np.int32)
- bmrem_examples = np.sort(ridxs)
- example_idxs = range(nexamples) + list(bmrem_examples)
- example_idxs_e_x_edivb = np.reshape(example_idxs, [-1, batch_size])
- return example_idxs_e_x_edivb, bmrem
-
- @staticmethod
- def randomize_example_idxs_mod_batch_size(nexamples, batch_size):
- """Indices 1:nexamples, randomized, in 2D form of
- shape = (nexamples / batch_size) x batch_size. The remainder
- is managed by drawing randomly from 1:nexamples.
-
- Args:
- nexamples: number of examples to randomize
- batch_size: number of elements in batch
-
- Returns:
- The randomized, properly shaped indicies.
- """
- assert nexamples > batch_size, "Problems"
- bmrem = batch_size - nexamples % batch_size
- bmrem_examples = []
- if bmrem < batch_size:
- bmrem_examples = np.random.choice(range(nexamples),
- size=bmrem, replace=False)
- example_idxs = range(nexamples) + list(bmrem_examples)
- mixed_example_idxs = np.random.permutation(example_idxs)
- example_idxs_e_x_edivb = np.reshape(mixed_example_idxs, [-1, batch_size])
- return example_idxs_e_x_edivb, bmrem
-
- def shuffle_spikes_in_time(self, data_bxtxd):
- """Shuffle the spikes in the temporal dimension. This is useful to
- help the LFADS system avoid overfitting to individual spikes or fast
- oscillations found in the data that are irrelevant to behavior. A
- pure 'tabula rasa' approach would avoid this, but LFADS is sensitive
- enough to pick up dynamics that you may not want.
-
- Args:
- data_bxtxd: numpy array of spike count data to be shuffled.
- Returns:
- S_bxtxd, a numpy array with the same dimensions and contents as
- data_bxtxd, but shuffled appropriately.
-
- """
-
- B, T, N = data_bxtxd.shape
- w = self.hps.temporal_spike_jitter_width
-
- if w == 0:
- return data_bxtxd
-
- max_counts = np.max(data_bxtxd)
- S_bxtxd = np.zeros([B,T,N])
-
- # Intuitively, shuffle spike occurances, 0 or 1, but since we have counts,
- # Do it over and over again up to the max count.
- for mc in range(1,max_counts+1):
- idxs = np.nonzero(data_bxtxd >= mc)
-
- data_ones = np.zeros_like(data_bxtxd)
- data_ones[data_bxtxd >= mc] = 1
-
- nfound = len(idxs[0])
- shuffles_incrs_in_time = np.random.randint(-w, w, size=nfound)
-
- shuffle_tidxs = idxs[1].copy()
- shuffle_tidxs += shuffles_incrs_in_time
-
- # Reflect on the boundaries to not lose mass.
- shuffle_tidxs[shuffle_tidxs < 0] = -shuffle_tidxs[shuffle_tidxs < 0]
- shuffle_tidxs[shuffle_tidxs > T-1] = \
- (T-1)-(shuffle_tidxs[shuffle_tidxs > T-1] -(T-1))
-
- for iii in zip(idxs[0], shuffle_tidxs, idxs[2]):
- S_bxtxd[iii] += 1
-
- return S_bxtxd
-
- def shuffle_and_flatten_datasets(self, datasets, kind='train'):
- """Since LFADS supports multiple datasets in the same dynamical model,
- we have to be careful to use all the data in a single training epoch. But
- since the datasets my have different data dimensionality, we cannot batch
- examples from data dictionaries together. Instead, we generate random
- batches within each data dictionary, and then randomize these batches
- while holding onto the dataname, so that when it's time to feed
- the graph, the correct in/out matrices can be selected, per batch.
-
- Args:
- datasets: A dict of data dicts. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- kind: 'train' or 'valid'
-
- Returns:
- A flat list, in which each element is a pair ('name', indices).
- """
- batch_size = self.hps.batch_size
- ndatasets = len(datasets)
- random_example_idxs = {}
- epoch_idxs = {}
- all_name_example_idx_pairs = []
- kind_data = kind + '_data'
- for name, data_dict in datasets.items():
- nexamples, ntime, data_dim = data_dict[kind_data].shape
- epoch_idxs[name] = 0
- random_example_idxs, _ = \
- self.randomize_example_idxs_mod_batch_size(nexamples, batch_size)
-
- epoch_size = random_example_idxs.shape[0]
- names = [name] * epoch_size
- all_name_example_idx_pairs += zip(names, random_example_idxs)
-
- np.random.shuffle(all_name_example_idx_pairs) # shuffle in place
-
- return all_name_example_idx_pairs
-
- def train_epoch(self, datasets, batch_size=None, do_save_ckpt=True):
- """Train the model through the entire dataset once.
-
- Args:
- datasets: A dict of data dicts. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- batch_size (optional): The batch_size to use
- do_save_ckpt (optional): Should the routine save a checkpoint on this
- training epoch?
-
- Returns:
- A tuple with 6 float values:
- (total cost of the epoch, epoch reconstruction cost,
- epoch kl cost, KL weight used this training epoch,
- total l2 cost on generator, and the corresponding weight).
- """
- ops_to_eval = [self.cost, self.recon_cost,
- self.kl_cost, self.kl_weight,
- self.l2_cost, self.l2_weight,
- self.train_op]
- collected_op_values = self.run_epoch(datasets, ops_to_eval, kind="train")
-
- total_cost = total_recon_cost = total_kl_cost = 0.0
- # normalizing by batch done in distributions.py
- epoch_size = len(collected_op_values)
- for op_values in collected_op_values:
- total_cost += op_values[0]
- total_recon_cost += op_values[1]
- total_kl_cost += op_values[2]
-
- kl_weight = collected_op_values[-1][3]
- l2_cost = collected_op_values[-1][4]
- l2_weight = collected_op_values[-1][5]
-
- epoch_total_cost = total_cost / epoch_size
- epoch_recon_cost = total_recon_cost / epoch_size
- epoch_kl_cost = total_kl_cost / epoch_size
-
- if do_save_ckpt:
- session = tf.get_default_session()
- checkpoint_path = os.path.join(self.hps.lfads_save_dir,
- self.hps.checkpoint_name + '.ckpt')
- self.seso_saver.save(session, checkpoint_path,
- global_step=self.train_step)
-
- return epoch_total_cost, epoch_recon_cost, epoch_kl_cost, \
- kl_weight, l2_cost, l2_weight
-
-
- def run_epoch(self, datasets, ops_to_eval, kind="train", batch_size=None,
- do_collect=True, keep_prob=None):
- """Run the model through the entire dataset once.
-
- Args:
- datasets: A dict of data dicts. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- ops_to_eval: A list of tensorflow operations that will be evaluated in
- the tf.session.run() call.
- batch_size (optional): The batch_size to use
- do_collect (optional): Should the routine collect all session.run
- output as a list, and return it?
- keep_prob (optional): The dropout keep probability.
-
- Returns:
- A list of lists, the internal list is the return for the ops for each
- session.run() call. The outer list collects over the epoch.
- """
- hps = self.hps
- all_name_example_idx_pairs = \
- self.shuffle_and_flatten_datasets(datasets, kind)
-
- kind_data = kind + '_data'
- kind_ext_input = kind + '_ext_input'
-
- total_cost = total_recon_cost = total_kl_cost = 0.0
- session = tf.get_default_session()
- epoch_size = len(all_name_example_idx_pairs)
- evaled_ops_list = []
- for name, example_idxs in all_name_example_idx_pairs:
- data_dict = datasets[name]
- data_extxd = data_dict[kind_data]
- if hps.output_dist == 'poisson' and hps.temporal_spike_jitter_width > 0:
- data_extxd = self.shuffle_spikes_in_time(data_extxd)
-
- ext_input_extxi = data_dict[kind_ext_input]
- data_bxtxd, ext_input_bxtxi = self.get_batch(data_extxd, ext_input_extxi,
- example_idxs=example_idxs)
-
- feed_dict = self.build_feed_dict(name, data_bxtxd, ext_input_bxtxi,
- keep_prob=keep_prob)
- evaled_ops_np = session.run(ops_to_eval, feed_dict=feed_dict)
- if do_collect:
- evaled_ops_list.append(evaled_ops_np)
-
- return evaled_ops_list
-
- def summarize_all(self, datasets, summary_values):
- """Plot and summarize stuff in tensorboard.
-
- Note that everything done in the current function is otherwise done on
- a single, randomly selected dataset (except for summary_values, which are
- passed in.)
-
- Args:
- datasets, the dictionary of datasets used in the study.
- summary_values: These summary values are created from the training loop,
- and so summarize the entire set of datasets.
- """
- hps = self.hps
- tr_kl_cost = summary_values['tr_kl_cost']
- tr_recon_cost = summary_values['tr_recon_cost']
- tr_total_cost = summary_values['tr_total_cost']
- kl_weight = summary_values['kl_weight']
- l2_weight = summary_values['l2_weight']
- l2_cost = summary_values['l2_cost']
- has_any_valid_set = summary_values['has_any_valid_set']
- i = summary_values['nepochs']
-
- session = tf.get_default_session()
- train_summ, train_step = session.run([self.merged_train,
- self.train_step],
- feed_dict={self.l2_cost_ph:l2_cost,
- self.kl_cost_ph:tr_kl_cost,
- self.recon_cost_ph:tr_recon_cost,
- self.total_cost_ph:tr_total_cost})
- self.writer.add_summary(train_summ, train_step)
- if has_any_valid_set:
- ev_kl_cost = summary_values['ev_kl_cost']
- ev_recon_cost = summary_values['ev_recon_cost']
- ev_total_cost = summary_values['ev_total_cost']
- eval_summ = session.run(self.merged_valid,
- feed_dict={self.kl_cost_ph:ev_kl_cost,
- self.recon_cost_ph:ev_recon_cost,
- self.total_cost_ph:ev_total_cost})
- self.writer.add_summary(eval_summ, train_step)
- print("Epoch:%d, step:%d (TRAIN, VALID): total: %.2f, %.2f\
- recon: %.2f, %.2f, kl: %.2f, %.2f, l2: %.5f,\
- kl weight: %.2f, l2 weight: %.2f" % \
- (i, train_step, tr_total_cost, ev_total_cost,
- tr_recon_cost, ev_recon_cost, tr_kl_cost, ev_kl_cost,
- l2_cost, kl_weight, l2_weight))
-
- csv_outstr = "epoch,%d, step,%d, total,%.2f,%.2f, \
- recon,%.2f,%.2f, kl,%.2f,%.2f, l2,%.5f, \
- klweight,%.2f, l2weight,%.2f\n"% \
- (i, train_step, tr_total_cost, ev_total_cost,
- tr_recon_cost, ev_recon_cost, tr_kl_cost, ev_kl_cost,
- l2_cost, kl_weight, l2_weight)
-
- else:
- print("Epoch:%d, step:%d TRAIN: total: %.2f recon: %.2f, kl: %.2f,\
- l2: %.5f, kl weight: %.2f, l2 weight: %.2f" % \
- (i, train_step, tr_total_cost, tr_recon_cost, tr_kl_cost,
- l2_cost, kl_weight, l2_weight))
- csv_outstr = "epoch,%d, step,%d, total,%.2f, recon,%.2f, kl,%.2f, \
- l2,%.5f, klweight,%.2f, l2weight,%.2f\n"% \
- (i, train_step, tr_total_cost, tr_recon_cost,
- tr_kl_cost, l2_cost, kl_weight, l2_weight)
-
- if self.hps.csv_log:
- csv_file = os.path.join(self.hps.lfads_save_dir, self.hps.csv_log+'.csv')
- with open(csv_file, "a") as myfile:
- myfile.write(csv_outstr)
-
-
- def plot_single_example(self, datasets):
- """Plot an image relating to a randomly chosen, specific example. We use
- posterior sample and average by taking one example, and filling a whole
- batch with that example, sample from the posterior, and then average the
- quantities.
-
- """
- hps = self.hps
- all_data_names = datasets.keys()
- data_name = np.random.permutation(all_data_names)[0]
- data_dict = datasets[data_name]
- has_valid_set = True if data_dict['valid_data'] is not None else False
- cf = 1.0 # plotting concern
-
- # posterior sample and average here
- E, _, _ = data_dict['train_data'].shape
- eidx = np.random.choice(E)
- example_idxs = eidx * np.ones(hps.batch_size, dtype=np.int32)
-
- train_data_bxtxd, train_ext_input_bxtxi = \
- self.get_batch(data_dict['train_data'], data_dict['train_ext_input'],
- example_idxs=example_idxs)
-
- truth_train_data_bxtxd = None
- if 'train_truth' in data_dict and data_dict['train_truth'] is not None:
- truth_train_data_bxtxd, _ = self.get_batch(data_dict['train_truth'],
- example_idxs=example_idxs)
- cf = data_dict['conversion_factor']
-
- # plotter does averaging
- train_model_values = self.eval_model_runs_batch(data_name,
- train_data_bxtxd,
- train_ext_input_bxtxi,
- do_average_batch=False)
-
- train_step = train_model_values['train_steps']
- feed_dict = self.build_feed_dict(data_name, train_data_bxtxd,
- train_ext_input_bxtxi, keep_prob=1.0)
-
- session = tf.get_default_session()
- generic_summ = session.run(self.merged_generic, feed_dict=feed_dict)
- self.writer.add_summary(generic_summ, train_step)
-
- valid_data_bxtxd = valid_model_values = valid_ext_input_bxtxi = None
- truth_valid_data_bxtxd = None
- if has_valid_set:
- E, _, _ = data_dict['valid_data'].shape
- eidx = np.random.choice(E)
- example_idxs = eidx * np.ones(hps.batch_size, dtype=np.int32)
- valid_data_bxtxd, valid_ext_input_bxtxi = \
- self.get_batch(data_dict['valid_data'],
- data_dict['valid_ext_input'],
- example_idxs=example_idxs)
- if 'valid_truth' in data_dict and data_dict['valid_truth'] is not None:
- truth_valid_data_bxtxd, _ = self.get_batch(data_dict['valid_truth'],
- example_idxs=example_idxs)
- else:
- truth_valid_data_bxtxd = None
-
- # plotter does averaging
- valid_model_values = self.eval_model_runs_batch(data_name,
- valid_data_bxtxd,
- valid_ext_input_bxtxi,
- do_average_batch=False)
-
- example_image = plot_lfads(train_bxtxd=train_data_bxtxd,
- train_model_vals=train_model_values,
- train_ext_input_bxtxi=train_ext_input_bxtxi,
- train_truth_bxtxd=truth_train_data_bxtxd,
- valid_bxtxd=valid_data_bxtxd,
- valid_model_vals=valid_model_values,
- valid_ext_input_bxtxi=valid_ext_input_bxtxi,
- valid_truth_bxtxd=truth_valid_data_bxtxd,
- bidx=None, cf=cf, output_dist=hps.output_dist)
- example_image = np.expand_dims(example_image, axis=0)
- example_summ = session.run(self.merged_examples,
- feed_dict={self.example_image : example_image})
- self.writer.add_summary(example_summ)
-
- def train_model(self, datasets):
- """Train the model, print per-epoch information, and save checkpoints.
-
- Loop over training epochs. The function that actually does the
- training is train_epoch. This function iterates over the training
- data, one epoch at a time. The learning rate schedule is such
- that it will stay the same until the cost goes up in comparison to
- the last few values, then it will drop.
-
- Args:
- datasets: A dict of data dicts. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- """
- hps = self.hps
- has_any_valid_set = False
- for data_dict in datasets.values():
- if data_dict['valid_data'] is not None:
- has_any_valid_set = True
- break
-
- session = tf.get_default_session()
- lr = session.run(self.learning_rate)
- lr_stop = hps.learning_rate_stop
- i = -1
- train_costs = []
- valid_costs = []
- ev_total_cost = ev_recon_cost = ev_kl_cost = 0.0
- lowest_ev_cost = np.Inf
- while True:
- i += 1
- do_save_ckpt = True if i % 10 ==0 else False
- tr_total_cost, tr_recon_cost, tr_kl_cost, kl_weight, l2_cost, l2_weight = \
- self.train_epoch(datasets, do_save_ckpt=do_save_ckpt)
-
- # Evaluate the validation cost, and potentially save. Note that this
- # routine will not save a validation checkpoint until the kl weight and
- # l2 weights are equal to 1.0.
- if has_any_valid_set:
- ev_total_cost, ev_recon_cost, ev_kl_cost = \
- self.eval_cost_epoch(datasets, kind='valid')
- valid_costs.append(ev_total_cost)
-
- # > 1 may give more consistent results, but not the actual lowest vae.
- # == 1 gives the lowest vae seen so far.
- n_lve = 1
- run_avg_lve = np.mean(valid_costs[-n_lve:])
-
- # conditions for saving checkpoints:
- # KL weight must have finished stepping (>=1.0), AND
- # L2 weight must have finished stepping OR L2 is not being used, AND
- # the current run has a lower LVE than previous runs AND
- # len(valid_costs > n_lve) (not sure what that does)
- if kl_weight >= 1.0 and \
- (l2_weight >= 1.0 or \
- (self.hps.l2_gen_scale == 0.0 and self.hps.l2_con_scale == 0.0)) \
- and (len(valid_costs) > n_lve and run_avg_lve < lowest_ev_cost):
-
- lowest_ev_cost = run_avg_lve
- checkpoint_path = os.path.join(self.hps.lfads_save_dir,
- self.hps.checkpoint_name + '_lve.ckpt')
- self.lve_saver.save(session, checkpoint_path,
- global_step=self.train_step,
- latest_filename='checkpoint_lve')
-
- # Plot and summarize.
- values = {'nepochs':i, 'has_any_valid_set': has_any_valid_set,
- 'tr_total_cost':tr_total_cost, 'ev_total_cost':ev_total_cost,
- 'tr_recon_cost':tr_recon_cost, 'ev_recon_cost':ev_recon_cost,
- 'tr_kl_cost':tr_kl_cost, 'ev_kl_cost':ev_kl_cost,
- 'l2_weight':l2_weight, 'kl_weight':kl_weight,
- 'l2_cost':l2_cost}
- self.summarize_all(datasets, values)
- self.plot_single_example(datasets)
-
- # Manage learning rate.
- train_res = tr_total_cost
- n_lr = hps.learning_rate_n_to_compare
- if len(train_costs) > n_lr and train_res > np.max(train_costs[-n_lr:]):
- _ = session.run(self.learning_rate_decay_op)
- lr = session.run(self.learning_rate)
- print(" Decreasing learning rate to %f." % lr)
- # Force the system to run n_lr times while at this lr.
- train_costs.append(np.inf)
- else:
- train_costs.append(train_res)
-
- if lr < lr_stop:
- print("Stopping optimization based on learning rate criteria.")
- break
-
- def eval_cost_epoch(self, datasets, kind='train', ext_input_extxi=None,
- batch_size=None):
- """Evaluate the cost of the epoch.
-
- Args:
- data_dict: The dictionary of data (training and validation) used for
- training and evaluation of the model, respectively.
-
- Returns:
- a 3 tuple of costs:
- (epoch total cost, epoch reconstruction cost, epoch KL cost)
- """
- ops_to_eval = [self.cost, self.recon_cost, self.kl_cost]
- collected_op_values = self.run_epoch(datasets, ops_to_eval, kind=kind,
- keep_prob=1.0)
-
- total_cost = total_recon_cost = total_kl_cost = 0.0
- # normalizing by batch done in distributions.py
- epoch_size = len(collected_op_values)
- for op_values in collected_op_values:
- total_cost += op_values[0]
- total_recon_cost += op_values[1]
- total_kl_cost += op_values[2]
-
- epoch_total_cost = total_cost / epoch_size
- epoch_recon_cost = total_recon_cost / epoch_size
- epoch_kl_cost = total_kl_cost / epoch_size
-
- return epoch_total_cost, epoch_recon_cost, epoch_kl_cost
-
- def eval_model_runs_batch(self, data_name, data_bxtxd, ext_input_bxtxi=None,
- do_eval_cost=False, do_average_batch=False):
- """Returns all the goodies for the entire model, per batch.
-
- If data_bxtxd and ext_input_bxtxi can have fewer than batch_size along dim 1
- in which case this handles the padding and truncating automatically
-
- Args:
- data_name: The name of the data dict, to select which in/out matrices
- to use.
- data_bxtxd: Numpy array training data with shape:
- batch_size x # time steps x # dimensions
- ext_input_bxtxi: Numpy array training external input with shape:
- batch_size x # time steps x # external input dims
- do_eval_cost (optional): If true, the IWAE (Importance Weighted
- Autoencoder) log likeihood bound, instead of the VAE version.
- do_average_batch (optional): average over the batch, useful for getting
- good IWAE costs, and model outputs for a single data point.
-
- Returns:
- A dictionary with the outputs of the model decoder, namely:
- prior g0 mean, prior g0 variance, approx. posterior mean, approx
- posterior mean, the generator initial conditions, the control inputs (if
- enabled), the state of the generator, the factors, and the rates.
- """
- session = tf.get_default_session()
-
- # if fewer than batch_size provided, pad to batch_size
- hps = self.hps
- batch_size = hps.batch_size
- E, _, _ = data_bxtxd.shape
- if E < hps.batch_size:
- data_bxtxd = np.pad(data_bxtxd, ((0, hps.batch_size-E), (0, 0), (0, 0)),
- mode='constant', constant_values=0)
- if ext_input_bxtxi is not None:
- ext_input_bxtxi = np.pad(ext_input_bxtxi,
- ((0, hps.batch_size-E), (0, 0), (0, 0)),
- mode='constant', constant_values=0)
-
- feed_dict = self.build_feed_dict(data_name, data_bxtxd,
- ext_input_bxtxi, keep_prob=1.0)
-
- # Non-temporal signals will be batch x dim.
- # Temporal signals are list length T with elements batch x dim.
- tf_vals = [self.gen_ics, self.gen_states, self.factors,
- self.output_dist_params]
- tf_vals.append(self.cost)
- tf_vals.append(self.nll_bound_vae)
- tf_vals.append(self.nll_bound_iwae)
- tf_vals.append(self.train_step) # not train_op!
- if self.hps.ic_dim > 0:
- tf_vals += [self.prior_zs_g0.mean, self.prior_zs_g0.logvar,
- self.posterior_zs_g0.mean, self.posterior_zs_g0.logvar]
- if self.hps.co_dim > 0:
- tf_vals.append(self.controller_outputs)
- tf_vals_flat, fidxs = flatten(tf_vals)
-
- np_vals_flat = session.run(tf_vals_flat, feed_dict=feed_dict)
-
- ff = 0
- gen_ics = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- gen_states = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- factors = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- out_dist_params = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- costs = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- nll_bound_vaes = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- nll_bound_iwaes = [np_vals_flat[f] for f in fidxs[ff]]; ff +=1
- train_steps = [np_vals_flat[f] for f in fidxs[ff]]; ff +=1
- if self.hps.ic_dim > 0:
- prior_g0_mean = [np_vals_flat[f] for f in fidxs[ff]]; ff +=1
- prior_g0_logvar = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- post_g0_mean = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- post_g0_logvar = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- if self.hps.co_dim > 0:
- controller_outputs = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
-
- # [0] are to take out the non-temporal items from lists
- gen_ics = gen_ics[0]
- costs = costs[0]
- nll_bound_vaes = nll_bound_vaes[0]
- nll_bound_iwaes = nll_bound_iwaes[0]
- train_steps = train_steps[0]
-
- # Convert to full tensors, not lists of tensors in time dim.
- gen_states = list_t_bxn_to_tensor_bxtxn(gen_states)
- factors = list_t_bxn_to_tensor_bxtxn(factors)
- out_dist_params = list_t_bxn_to_tensor_bxtxn(out_dist_params)
- if self.hps.ic_dim > 0:
- # select first time point
- prior_g0_mean = prior_g0_mean[0]
- prior_g0_logvar = prior_g0_logvar[0]
- post_g0_mean = post_g0_mean[0]
- post_g0_logvar = post_g0_logvar[0]
- if self.hps.co_dim > 0:
- controller_outputs = list_t_bxn_to_tensor_bxtxn(controller_outputs)
-
- # slice out the trials in case < batch_size provided
- if E < hps.batch_size:
- idx = np.arange(E)
- gen_ics = gen_ics[idx, :]
- gen_states = gen_states[idx, :]
- factors = factors[idx, :, :]
- out_dist_params = out_dist_params[idx, :, :]
- if self.hps.ic_dim > 0:
- prior_g0_mean = prior_g0_mean[idx, :]
- prior_g0_logvar = prior_g0_logvar[idx, :]
- post_g0_mean = post_g0_mean[idx, :]
- post_g0_logvar = post_g0_logvar[idx, :]
- if self.hps.co_dim > 0:
- controller_outputs = controller_outputs[idx, :, :]
-
- if do_average_batch:
- gen_ics = np.mean(gen_ics, axis=0)
- gen_states = np.mean(gen_states, axis=0)
- factors = np.mean(factors, axis=0)
- out_dist_params = np.mean(out_dist_params, axis=0)
- if self.hps.ic_dim > 0:
- prior_g0_mean = np.mean(prior_g0_mean, axis=0)
- prior_g0_logvar = np.mean(prior_g0_logvar, axis=0)
- post_g0_mean = np.mean(post_g0_mean, axis=0)
- post_g0_logvar = np.mean(post_g0_logvar, axis=0)
- if self.hps.co_dim > 0:
- controller_outputs = np.mean(controller_outputs, axis=0)
-
- model_vals = {}
- model_vals['gen_ics'] = gen_ics
- model_vals['gen_states'] = gen_states
- model_vals['factors'] = factors
- model_vals['output_dist_params'] = out_dist_params
- model_vals['costs'] = costs
- model_vals['nll_bound_vaes'] = nll_bound_vaes
- model_vals['nll_bound_iwaes'] = nll_bound_iwaes
- model_vals['train_steps'] = train_steps
- if self.hps.ic_dim > 0:
- model_vals['prior_g0_mean'] = prior_g0_mean
- model_vals['prior_g0_logvar'] = prior_g0_logvar
- model_vals['post_g0_mean'] = post_g0_mean
- model_vals['post_g0_logvar'] = post_g0_logvar
- if self.hps.co_dim > 0:
- model_vals['controller_outputs'] = controller_outputs
-
- return model_vals
-
- def eval_model_runs_avg_epoch(self, data_name, data_extxd,
- ext_input_extxi=None):
- """Returns all the expected value for goodies for the entire model.
-
- The expected value is taken over hidden (z) variables, namely the initial
- conditions and the control inputs. The expected value is approximate, and
- accomplished via sampling (batch_size) samples for every examples.
-
- Args:
- data_name: The name of the data dict, to select which in/out matrices
- to use.
- data_extxd: Numpy array training data with shape:
- # examples x # time steps x # dimensions
- ext_input_extxi (optional): Numpy array training external input with
- shape: # examples x # time steps x # external input dims
-
- Returns:
- A dictionary with the averaged outputs of the model decoder, namely:
- prior g0 mean, prior g0 variance, approx. posterior mean, approx
- posterior mean, the generator initial conditions, the control inputs (if
- enabled), the state of the generator, the factors, and the output
- distribution parameters, e.g. (rates or mean and variances).
- """
- hps = self.hps
- batch_size = hps.batch_size
- E, T, D = data_extxd.shape
- E_to_process = hps.ps_nexamples_to_process
- if E_to_process > E:
- E_to_process = E
-
- if hps.ic_dim > 0:
- prior_g0_mean = np.zeros([E_to_process, hps.ic_dim])
- prior_g0_logvar = np.zeros([E_to_process, hps.ic_dim])
- post_g0_mean = np.zeros([E_to_process, hps.ic_dim])
- post_g0_logvar = np.zeros([E_to_process, hps.ic_dim])
-
- if hps.co_dim > 0:
- controller_outputs = np.zeros([E_to_process, T, hps.co_dim])
- gen_ics = np.zeros([E_to_process, hps.gen_dim])
- gen_states = np.zeros([E_to_process, T, hps.gen_dim])
- factors = np.zeros([E_to_process, T, hps.factors_dim])
-
- if hps.output_dist == 'poisson':
- out_dist_params = np.zeros([E_to_process, T, D])
- elif hps.output_dist == 'gaussian':
- out_dist_params = np.zeros([E_to_process, T, D+D])
- else:
- assert False, "NIY"
-
- costs = np.zeros(E_to_process)
- nll_bound_vaes = np.zeros(E_to_process)
- nll_bound_iwaes = np.zeros(E_to_process)
- train_steps = np.zeros(E_to_process)
- for es_idx in range(E_to_process):
- print("Running %d of %d." % (es_idx+1, E_to_process))
- example_idxs = es_idx * np.ones(batch_size, dtype=np.int32)
- data_bxtxd, ext_input_bxtxi = self.get_batch(data_extxd,
- ext_input_extxi,
- batch_size=batch_size,
- example_idxs=example_idxs)
- model_values = self.eval_model_runs_batch(data_name, data_bxtxd,
- ext_input_bxtxi,
- do_eval_cost=True,
- do_average_batch=True)
-
- if self.hps.ic_dim > 0:
- prior_g0_mean[es_idx,:] = model_values['prior_g0_mean']
- prior_g0_logvar[es_idx,:] = model_values['prior_g0_logvar']
- post_g0_mean[es_idx,:] = model_values['post_g0_mean']
- post_g0_logvar[es_idx,:] = model_values['post_g0_logvar']
- gen_ics[es_idx,:] = model_values['gen_ics']
-
- if self.hps.co_dim > 0:
- controller_outputs[es_idx,:,:] = model_values['controller_outputs']
- gen_states[es_idx,:,:] = model_values['gen_states']
- factors[es_idx,:,:] = model_values['factors']
- out_dist_params[es_idx,:,:] = model_values['output_dist_params']
- costs[es_idx] = model_values['costs']
- nll_bound_vaes[es_idx] = model_values['nll_bound_vaes']
- nll_bound_iwaes[es_idx] = model_values['nll_bound_iwaes']
- train_steps[es_idx] = model_values['train_steps']
- print('bound nll(vae): %.3f, bound nll(iwae): %.3f' \
- % (nll_bound_vaes[es_idx], nll_bound_iwaes[es_idx]))
-
- model_runs = {}
- if self.hps.ic_dim > 0:
- model_runs['prior_g0_mean'] = prior_g0_mean
- model_runs['prior_g0_logvar'] = prior_g0_logvar
- model_runs['post_g0_mean'] = post_g0_mean
- model_runs['post_g0_logvar'] = post_g0_logvar
- model_runs['gen_ics'] = gen_ics
-
- if self.hps.co_dim > 0:
- model_runs['controller_outputs'] = controller_outputs
- model_runs['gen_states'] = gen_states
- model_runs['factors'] = factors
- model_runs['output_dist_params'] = out_dist_params
- model_runs['costs'] = costs
- model_runs['nll_bound_vaes'] = nll_bound_vaes
- model_runs['nll_bound_iwaes'] = nll_bound_iwaes
- model_runs['train_steps'] = train_steps
- return model_runs
-
- def eval_model_runs_push_mean(self, data_name, data_extxd,
- ext_input_extxi=None):
- """Returns values of interest for the model by pushing the means through
-
- The mean values for both initial conditions and the control inputs are
- pushed through the model instead of sampling (as is done in
- eval_model_runs_avg_epoch).
- This is a quick and approximate version of estimating these values instead
- of sampling from the posterior many times and then averaging those values of
- interest.
-
- Internally, a total of batch_size trials are run through the model at once.
-
- Args:
- data_name: The name of the data dict, to select which in/out matrices
- to use.
- data_extxd: Numpy array training data with shape:
- # examples x # time steps x # dimensions
- ext_input_extxi (optional): Numpy array training external input with
- shape: # examples x # time steps x # external input dims
-
- Returns:
- A dictionary with the estimated outputs of the model decoder, namely:
- prior g0 mean, prior g0 variance, approx. posterior mean, approx
- posterior mean, the generator initial conditions, the control inputs (if
- enabled), the state of the generator, the factors, and the output
- distribution parameters, e.g. (rates or mean and variances).
- """
- hps = self.hps
- batch_size = hps.batch_size
- E, T, D = data_extxd.shape
- E_to_process = hps.ps_nexamples_to_process
- if E_to_process > E:
- print("Setting number of posterior samples to process to : ", E)
- E_to_process = E
-
- if hps.ic_dim > 0:
- prior_g0_mean = np.zeros([E_to_process, hps.ic_dim])
- prior_g0_logvar = np.zeros([E_to_process, hps.ic_dim])
- post_g0_mean = np.zeros([E_to_process, hps.ic_dim])
- post_g0_logvar = np.zeros([E_to_process, hps.ic_dim])
-
- if hps.co_dim > 0:
- controller_outputs = np.zeros([E_to_process, T, hps.co_dim])
- gen_ics = np.zeros([E_to_process, hps.gen_dim])
- gen_states = np.zeros([E_to_process, T, hps.gen_dim])
- factors = np.zeros([E_to_process, T, hps.factors_dim])
-
- if hps.output_dist == 'poisson':
- out_dist_params = np.zeros([E_to_process, T, D])
- elif hps.output_dist == 'gaussian':
- out_dist_params = np.zeros([E_to_process, T, D+D])
- else:
- assert False, "NIY"
-
- costs = np.zeros(E_to_process)
- nll_bound_vaes = np.zeros(E_to_process)
- nll_bound_iwaes = np.zeros(E_to_process)
- train_steps = np.zeros(E_to_process)
-
- # generator that will yield 0:N in groups of per items, e.g.
- # (0:per-1), (per:2*per-1), ..., with the last group containing <= per items
- # this will be used to feed per=batch_size trials into the model at a time
- def trial_batches(N, per):
- for i in range(0, N, per):
- yield np.arange(i, min(i+per, N), dtype=np.int32)
-
- for batch_idx, es_idx in enumerate(trial_batches(E_to_process,
- hps.batch_size)):
- print("Running trial batch %d with %d trials" % (batch_idx+1,
- len(es_idx)))
- data_bxtxd, ext_input_bxtxi = self.get_batch(data_extxd,
- ext_input_extxi,
- batch_size=batch_size,
- example_idxs=es_idx)
- model_values = self.eval_model_runs_batch(data_name, data_bxtxd,
- ext_input_bxtxi,
- do_eval_cost=True,
- do_average_batch=False)
-
- if self.hps.ic_dim > 0:
- prior_g0_mean[es_idx,:] = model_values['prior_g0_mean']
- prior_g0_logvar[es_idx,:] = model_values['prior_g0_logvar']
- post_g0_mean[es_idx,:] = model_values['post_g0_mean']
- post_g0_logvar[es_idx,:] = model_values['post_g0_logvar']
- gen_ics[es_idx,:] = model_values['gen_ics']
-
- if self.hps.co_dim > 0:
- controller_outputs[es_idx,:,:] = model_values['controller_outputs']
- gen_states[es_idx,:,:] = model_values['gen_states']
- factors[es_idx,:,:] = model_values['factors']
- out_dist_params[es_idx,:,:] = model_values['output_dist_params']
-
- # TODO
- # model_values['costs'] and other costs come out as scalars, summed over
- # all the trials in the batch. what we want is the per-trial costs
- costs[es_idx] = model_values['costs']
- nll_bound_vaes[es_idx] = model_values['nll_bound_vaes']
- nll_bound_iwaes[es_idx] = model_values['nll_bound_iwaes']
-
- train_steps[es_idx] = model_values['train_steps']
-
- model_runs = {}
- if self.hps.ic_dim > 0:
- model_runs['prior_g0_mean'] = prior_g0_mean
- model_runs['prior_g0_logvar'] = prior_g0_logvar
- model_runs['post_g0_mean'] = post_g0_mean
- model_runs['post_g0_logvar'] = post_g0_logvar
- model_runs['gen_ics'] = gen_ics
-
- if self.hps.co_dim > 0:
- model_runs['controller_outputs'] = controller_outputs
- model_runs['gen_states'] = gen_states
- model_runs['factors'] = factors
- model_runs['output_dist_params'] = out_dist_params
-
- # You probably do not want the LL associated values when pushing the mean
- # instead of sampling.
- model_runs['costs'] = costs
- model_runs['nll_bound_vaes'] = nll_bound_vaes
- model_runs['nll_bound_iwaes'] = nll_bound_iwaes
- model_runs['train_steps'] = train_steps
- return model_runs
-
- def write_model_runs(self, datasets, output_fname=None, push_mean=False):
- """Run the model on the data in data_dict, and save the computed values.
-
- LFADS generates a number of outputs for each examples, and these are all
- saved. They are:
- The mean and variance of the prior of g0.
- The mean and variance of approximate posterior of g0.
- The control inputs (if enabled)
- The initial conditions, g0, for all examples.
- The generator states for all time.
- The factors for all time.
- The output distribution parameters (e.g. rates) for all time.
-
- Args:
- datasets: a dictionary of named data_dictionaries, see top of lfads.py
- output_fname: a file name stem for the output files.
- push_mean: if False (default), generates batch_size samples for each trial
- and averages the results. if True, runs each trial once without noise,
- pushing the posterior mean initial conditions and control inputs through
- the trained model. False is used for posterior_sample_and_average, True
- is used for posterior_push_mean.
- """
- hps = self.hps
- kind = hps.kind
-
- for data_name, data_dict in datasets.items():
- data_tuple = [('train', data_dict['train_data'],
- data_dict['train_ext_input']),
- ('valid', data_dict['valid_data'],
- data_dict['valid_ext_input'])]
- for data_kind, data_extxd, ext_input_extxi in data_tuple:
- if not output_fname:
- fname = "model_runs_" + data_name + '_' + data_kind + '_' + kind
- else:
- fname = output_fname + data_name + '_' + data_kind + '_' + kind
-
- print("Writing data for %s data and kind %s." % (data_name, data_kind))
- if push_mean:
- model_runs = self.eval_model_runs_push_mean(data_name, data_extxd,
- ext_input_extxi)
- else:
- model_runs = self.eval_model_runs_avg_epoch(data_name, data_extxd,
- ext_input_extxi)
- full_fname = os.path.join(hps.lfads_save_dir, fname)
- write_data(full_fname, model_runs, compression='gzip')
- print("Done.")
-
- def write_model_samples(self, dataset_name, output_fname=None):
- """Use the prior distribution to generate batch_size number of samples
- from the model.
-
- LFADS generates a number of outputs for each sample, and these are all
- saved. They are:
- The mean and variance of the prior of g0.
- The control inputs (if enabled)
- The initial conditions, g0, for all examples.
- The generator states for all time.
- The factors for all time.
- The output distribution parameters (e.g. rates) for all time.
-
- Args:
- dataset_name: The name of the dataset to grab the factors -> rates
- alignment matrices from.
- output_fname: The name of the file in which to save the generated
- samples.
- """
- hps = self.hps
- batch_size = hps.batch_size
-
- print("Generating %d samples" % (batch_size))
- tf_vals = [self.factors, self.gen_states, self.gen_ics,
- self.cost, self.output_dist_params]
- if hps.ic_dim > 0:
- tf_vals += [self.prior_zs_g0.mean, self.prior_zs_g0.logvar]
- if hps.co_dim > 0:
- tf_vals += [self.prior_zs_ar_con.samples_t]
- tf_vals_flat, fidxs = flatten(tf_vals)
-
- session = tf.get_default_session()
- feed_dict = {}
- feed_dict[self.dataName] = dataset_name
- feed_dict[self.keep_prob] = 1.0
-
- np_vals_flat = session.run(tf_vals_flat, feed_dict=feed_dict)
-
- ff = 0
- factors = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- gen_states = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- gen_ics = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- costs = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- output_dist_params = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- if hps.ic_dim > 0:
- prior_g0_mean = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- prior_g0_logvar = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
- if hps.co_dim > 0:
- prior_zs_ar_con = [np_vals_flat[f] for f in fidxs[ff]]; ff += 1
-
- # [0] are to take out the non-temporal items from lists
- gen_ics = gen_ics[0]
- costs = costs[0]
-
- # Convert to full tensors, not lists of tensors in time dim.
- gen_states = list_t_bxn_to_tensor_bxtxn(gen_states)
- factors = list_t_bxn_to_tensor_bxtxn(factors)
- output_dist_params = list_t_bxn_to_tensor_bxtxn(output_dist_params)
- if hps.ic_dim > 0:
- prior_g0_mean = prior_g0_mean[0]
- prior_g0_logvar = prior_g0_logvar[0]
- if hps.co_dim > 0:
- prior_zs_ar_con = list_t_bxn_to_tensor_bxtxn(prior_zs_ar_con)
-
- model_vals = {}
- model_vals['gen_ics'] = gen_ics
- model_vals['gen_states'] = gen_states
- model_vals['factors'] = factors
- model_vals['output_dist_params'] = output_dist_params
- model_vals['costs'] = costs.reshape(1)
- if hps.ic_dim > 0:
- model_vals['prior_g0_mean'] = prior_g0_mean
- model_vals['prior_g0_logvar'] = prior_g0_logvar
- if hps.co_dim > 0:
- model_vals['prior_zs_ar_con'] = prior_zs_ar_con
-
- full_fname = os.path.join(hps.lfads_save_dir, output_fname)
- write_data(full_fname, model_vals, compression='gzip')
- print("Done.")
-
- @staticmethod
- def eval_model_parameters(use_nested=True, include_strs=None):
- """Evaluate and return all of the TF variables in the model.
-
- Args:
- use_nested (optional): For returning values, use a nested dictoinary, based
- on variable scoping, or return all variables in a flat dictionary.
- include_strs (optional): A list of strings to use as a filter, to reduce the
- number of variables returned. A variable name must contain at least one
- string in include_strs as a sub-string in order to be returned.
-
- Returns:
- The parameters of the model. This can be in a flat
- dictionary, or a nested dictionary, where the nesting is by variable
- scope.
- """
- all_tf_vars = tf.global_variables()
- session = tf.get_default_session()
- all_tf_vars_eval = session.run(all_tf_vars)
- vars_dict = {}
- strs = ["LFADS"]
- if include_strs:
- strs += include_strs
-
- for i, (var, var_eval) in enumerate(zip(all_tf_vars, all_tf_vars_eval)):
- if any(s in include_strs for s in var.name):
- if not isinstance(var_eval, np.ndarray): # for H5PY
- print(var.name, """ is not numpy array, saving as numpy array
- with value: """, var_eval, type(var_eval))
- e = np.array(var_eval)
- print(e, type(e))
- else:
- e = var_eval
- vars_dict[var.name] = e
-
- if not use_nested:
- return vars_dict
-
- var_names = vars_dict.keys()
- nested_vars_dict = {}
- current_dict = nested_vars_dict
- for v, var_name in enumerate(var_names):
- var_split_name_list = var_name.split('/')
- split_name_list_len = len(var_split_name_list)
- current_dict = nested_vars_dict
- for p, part in enumerate(var_split_name_list):
- if p < split_name_list_len - 1:
- if part in current_dict:
- current_dict = current_dict[part]
- else:
- current_dict[part] = {}
- current_dict = current_dict[part]
- else:
- current_dict[part] = vars_dict[var_name]
-
- return nested_vars_dict
-
- @staticmethod
- def spikify_rates(rates_bxtxd):
- """Randomly spikify underlying rates according a Poisson distribution
-
- Args:
- rates_bxtxd: a numpy tensor with shape:
-
- Returns:
- A numpy array with the same shape as rates_bxtxd, but with the event
- counts.
- """
-
- B,T,N = rates_bxtxd.shape
- assert all([B > 0, N > 0]), "problems"
-
- # Because the rates are changing, there is nesting
- spikes_bxtxd = np.zeros([B,T,N], dtype=np.int32)
- for b in range(B):
- for t in range(T):
- for n in range(N):
- rate = rates_bxtxd[b,t,n]
- count = np.random.poisson(rate)
- spikes_bxtxd[b,t,n] = count
-
- return spikes_bxtxd
diff --git a/research/lfads/plot_lfads.py b/research/lfads/plot_lfads.py
deleted file mode 100644
index c4e1a0332ef2affeae147edda4779cc4a7e9a0ef..0000000000000000000000000000000000000000
--- a/research/lfads/plot_lfads.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import matplotlib
-matplotlib.use('Agg')
-from matplotlib import pyplot as plt
-import numpy as np
-import tensorflow as tf
-
-def _plot_item(W, name, full_name, nspaces):
- plt.figure()
- if W.shape == ():
- print(name, ": ", W)
- elif W.shape[0] == 1:
- plt.stem(W.T)
- plt.title(full_name)
- elif W.shape[1] == 1:
- plt.stem(W)
- plt.title(full_name)
- else:
- plt.imshow(np.abs(W), interpolation='nearest', cmap='jet');
- plt.colorbar()
- plt.title(full_name)
-
-
-def all_plot(d, full_name="", exclude="", nspaces=0):
- """Recursively plot all the LFADS model parameters in the nested
- dictionary."""
- for k, v in d.iteritems():
- this_name = full_name+"/"+k
- if isinstance(v, dict):
- all_plot(v, full_name=this_name, exclude=exclude, nspaces=nspaces+4)
- else:
- if exclude == "" or exclude not in this_name:
- _plot_item(v, name=k, full_name=full_name+"/"+k, nspaces=nspaces+4)
-
-
-
-def plot_time_series(vals_bxtxn, bidx=None, n_to_plot=np.inf, scale=1.0,
- color='r', title=None):
-
- if bidx is None:
- vals_txn = np.mean(vals_bxtxn, axis=0)
- else:
- vals_txn = vals_bxtxn[bidx,:,:]
-
- T, N = vals_txn.shape
- if n_to_plot > N:
- n_to_plot = N
-
- plt.plot(vals_txn[:,0:n_to_plot] + scale*np.array(range(n_to_plot)),
- color=color, lw=1.0)
- plt.axis('tight')
- if title:
- plt.title(title)
-
-
-def plot_lfads_timeseries(data_bxtxn, model_vals, ext_input_bxtxi=None,
- truth_bxtxn=None, bidx=None, output_dist="poisson",
- conversion_factor=1.0, subplot_cidx=0,
- col_title=None):
-
- n_to_plot = 10
- scale = 1.0
- nrows = 7
- plt.subplot(nrows,2,1+subplot_cidx)
-
- if output_dist == 'poisson':
- rates = means = conversion_factor * model_vals['output_dist_params']
- plot_time_series(rates, bidx, n_to_plot=n_to_plot, scale=scale,
- title=col_title + " rates (LFADS - red, Truth - black)")
- elif output_dist == 'gaussian':
- means_vars = model_vals['output_dist_params']
- means, vars = np.split(means_vars,2, axis=2) # bxtxn
- stds = np.sqrt(vars)
- plot_time_series(means, bidx, n_to_plot=n_to_plot, scale=scale,
- title=col_title + " means (LFADS - red, Truth - black)")
- plot_time_series(means+stds, bidx, n_to_plot=n_to_plot, scale=scale,
- color='c')
- plot_time_series(means-stds, bidx, n_to_plot=n_to_plot, scale=scale,
- color='c')
- else:
- assert 'NIY'
-
-
- if truth_bxtxn is not None:
- plot_time_series(truth_bxtxn, bidx, n_to_plot=n_to_plot, color='k',
- scale=scale)
-
- input_title = ""
- if "controller_outputs" in model_vals.keys():
- input_title += " Controller Output"
- plt.subplot(nrows,2,3+subplot_cidx)
- u_t = model_vals['controller_outputs'][0:-1]
- plot_time_series(u_t, bidx, n_to_plot=n_to_plot, color='c', scale=1.0,
- title=col_title + input_title)
-
- if ext_input_bxtxi is not None:
- input_title += " External Input"
- plot_time_series(ext_input_bxtxi, n_to_plot=n_to_plot, color='b',
- scale=scale, title=col_title + input_title)
-
- plt.subplot(nrows,2,5+subplot_cidx)
- plot_time_series(means, bidx,
- n_to_plot=n_to_plot, scale=1.0,
- title=col_title + " Spikes (LFADS - red, Spikes - black)")
- plot_time_series(data_bxtxn, bidx, n_to_plot=n_to_plot, color='k', scale=1.0)
-
- plt.subplot(nrows,2,7+subplot_cidx)
- plot_time_series(model_vals['factors'], bidx, n_to_plot=n_to_plot, color='b',
- scale=2.0, title=col_title + " Factors")
-
- plt.subplot(nrows,2,9+subplot_cidx)
- plot_time_series(model_vals['gen_states'], bidx, n_to_plot=n_to_plot,
- color='g', scale=1.0, title=col_title + " Generator State")
-
- if bidx is not None:
- data_nxt = data_bxtxn[bidx,:,:].T
- params_nxt = model_vals['output_dist_params'][bidx,:,:].T
- else:
- data_nxt = np.mean(data_bxtxn, axis=0).T
- params_nxt = np.mean(model_vals['output_dist_params'], axis=0).T
- if output_dist == 'poisson':
- means_nxt = params_nxt
- elif output_dist == 'gaussian': # (means+vars) x time
- means_nxt = np.vsplit(params_nxt,2)[0] # get means
- else:
- assert "NIY"
-
- plt.subplot(nrows,2,11+subplot_cidx)
- plt.imshow(data_nxt, aspect='auto', interpolation='nearest')
- plt.title(col_title + ' Data')
-
- plt.subplot(nrows,2,13+subplot_cidx)
- plt.imshow(means_nxt, aspect='auto', interpolation='nearest')
- plt.title(col_title + ' Means')
-
-
-def plot_lfads(train_bxtxd, train_model_vals,
- train_ext_input_bxtxi=None, train_truth_bxtxd=None,
- valid_bxtxd=None, valid_model_vals=None,
- valid_ext_input_bxtxi=None, valid_truth_bxtxd=None,
- bidx=None, cf=1.0, output_dist='poisson'):
-
- # Plotting
- f = plt.figure(figsize=(18,20), tight_layout=True)
- plot_lfads_timeseries(train_bxtxd, train_model_vals,
- train_ext_input_bxtxi,
- truth_bxtxn=train_truth_bxtxd,
- conversion_factor=cf, bidx=bidx,
- output_dist=output_dist, col_title='Train')
- plot_lfads_timeseries(valid_bxtxd, valid_model_vals,
- valid_ext_input_bxtxi,
- truth_bxtxn=valid_truth_bxtxd,
- conversion_factor=cf, bidx=bidx,
- output_dist=output_dist,
- subplot_cidx=1, col_title='Valid')
-
- # Convert from figure to an numpy array width x height x 3 (last for RGB)
- f.canvas.draw()
- data = np.fromstring(f.canvas.tostring_rgb(), dtype=np.uint8, sep='')
- data_wxhx3 = data.reshape(f.canvas.get_width_height()[::-1] + (3,))
- plt.close()
-
- return data_wxhx3
diff --git a/research/lfads/run_lfads.py b/research/lfads/run_lfads.py
deleted file mode 100755
index bd1c0d5e4deab50481cd32efdd044c61707204cc..0000000000000000000000000000000000000000
--- a/research/lfads/run_lfads.py
+++ /dev/null
@@ -1,815 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from lfads import LFADS
-import numpy as np
-import os
-import tensorflow as tf
-import re
-import utils
-import sys
-MAX_INT = sys.maxsize
-
-# Lots of hyperparameters, but most are pretty insensitive. The
-# explanation of these hyperparameters is found below, in the flags
-# session.
-
-CHECKPOINT_PB_LOAD_NAME = "checkpoint"
-CHECKPOINT_NAME = "lfads_vae"
-CSV_LOG = "fitlog"
-OUTPUT_FILENAME_STEM = ""
-DEVICE = "gpu:0" # "cpu:0", or other gpus, e.g. "gpu:1"
-MAX_CKPT_TO_KEEP = 5
-MAX_CKPT_TO_KEEP_LVE = 5
-PS_NEXAMPLES_TO_PROCESS = MAX_INT # if larger than number of examples, process all
-EXT_INPUT_DIM = 0
-IC_DIM = 64
-FACTORS_DIM = 50
-IC_ENC_DIM = 128
-GEN_DIM = 200
-GEN_CELL_INPUT_WEIGHT_SCALE = 1.0
-GEN_CELL_REC_WEIGHT_SCALE = 1.0
-CELL_WEIGHT_SCALE = 1.0
-BATCH_SIZE = 128
-LEARNING_RATE_INIT = 0.01
-LEARNING_RATE_DECAY_FACTOR = 0.95
-LEARNING_RATE_STOP = 0.00001
-LEARNING_RATE_N_TO_COMPARE = 6
-INJECT_EXT_INPUT_TO_GEN = False
-DO_TRAIN_IO_ONLY = False
-DO_TRAIN_ENCODER_ONLY = False
-DO_RESET_LEARNING_RATE = False
-FEEDBACK_FACTORS_OR_RATES = "factors"
-DO_TRAIN_READIN = True
-
-# Calibrated just above the average value for the rnn synthetic data.
-MAX_GRAD_NORM = 200.0
-CELL_CLIP_VALUE = 5.0
-KEEP_PROB = 0.95
-TEMPORAL_SPIKE_JITTER_WIDTH = 0
-OUTPUT_DISTRIBUTION = 'poisson' # 'poisson' or 'gaussian'
-NUM_STEPS_FOR_GEN_IC = MAX_INT # set to num_steps if greater than num_steps
-
-DATA_DIR = "/tmp/rnn_synth_data_v1.0/"
-DATA_FILENAME_STEM = "chaotic_rnn_inputs_g1p5"
-LFADS_SAVE_DIR = "/tmp/lfads_chaotic_rnn_inputs_g1p5/"
-CO_DIM = 1
-DO_CAUSAL_CONTROLLER = False
-DO_FEED_FACTORS_TO_CONTROLLER = True
-CONTROLLER_INPUT_LAG = 1
-PRIOR_AR_AUTOCORRELATION = 10.0
-PRIOR_AR_PROCESS_VAR = 0.1
-DO_TRAIN_PRIOR_AR_ATAU = True
-DO_TRAIN_PRIOR_AR_NVAR = True
-CI_ENC_DIM = 128
-CON_DIM = 128
-CO_PRIOR_VAR_SCALE = 0.1
-KL_INCREASE_STEPS = 2000
-L2_INCREASE_STEPS = 2000
-L2_GEN_SCALE = 2000.0
-L2_CON_SCALE = 0.0
-# scale of regularizer on time correlation of inferred inputs
-CO_MEAN_CORR_SCALE = 0.0
-KL_IC_WEIGHT = 1.0
-KL_CO_WEIGHT = 1.0
-KL_START_STEP = 0
-L2_START_STEP = 0
-IC_PRIOR_VAR_MIN = 0.1
-IC_PRIOR_VAR_SCALE = 0.1
-IC_PRIOR_VAR_MAX = 0.1
-IC_POST_VAR_MIN = 0.0001 # protection from KL blowing up
-
-flags = tf.app.flags
-flags.DEFINE_string("kind", "train",
- "Type of model to build {train, \
- posterior_sample_and_average, \
- posterior_push_mean, \
- prior_sample, write_model_params")
-flags.DEFINE_string("output_dist", OUTPUT_DISTRIBUTION,
- "Type of output distribution, 'poisson' or 'gaussian'")
-flags.DEFINE_boolean("allow_gpu_growth", False,
- "If true, only allocate amount of memory needed for \
- Session. Otherwise, use full GPU memory.")
-
-# DATA
-flags.DEFINE_string("data_dir", DATA_DIR, "Data for training")
-flags.DEFINE_string("data_filename_stem", DATA_FILENAME_STEM,
- "Filename stem for data dictionaries.")
-flags.DEFINE_string("lfads_save_dir", LFADS_SAVE_DIR, "model save dir")
-flags.DEFINE_string("checkpoint_pb_load_name", CHECKPOINT_PB_LOAD_NAME,
- "Name of checkpoint files, use 'checkpoint_lve' for best \
- error")
-flags.DEFINE_string("checkpoint_name", CHECKPOINT_NAME,
- "Name of checkpoint files (.ckpt appended)")
-flags.DEFINE_string("output_filename_stem", OUTPUT_FILENAME_STEM,
- "Name of output file (postfix will be added)")
-flags.DEFINE_string("device", DEVICE,
- "Which device to use (default: \"gpu:0\", can also be \
- \"cpu:0\", \"gpu:1\", etc)")
-flags.DEFINE_string("csv_log", CSV_LOG,
- "Name of file to keep running log of fit likelihoods, \
- etc (.csv appended)")
-flags.DEFINE_integer("max_ckpt_to_keep", MAX_CKPT_TO_KEEP,
- "Max # of checkpoints to keep (rolling)")
-flags.DEFINE_integer("ps_nexamples_to_process", PS_NEXAMPLES_TO_PROCESS,
- "Number of examples to process for posterior sample and \
- average (not number of samples to average over).")
-flags.DEFINE_integer("max_ckpt_to_keep_lve", MAX_CKPT_TO_KEEP_LVE,
- "Max # of checkpoints to keep for lowest validation error \
- models (rolling)")
-flags.DEFINE_integer("ext_input_dim", EXT_INPUT_DIM, "Dimension of external \
-inputs")
-flags.DEFINE_integer("num_steps_for_gen_ic", NUM_STEPS_FOR_GEN_IC,
- "Number of steps to train the generator initial conditon.")
-
-
-# If there are observed inputs, there are two ways to add that observed
-# input to the model. The first is by treating as something to be
-# inferred, and thus encoding the observed input via the encoders, and then
-# input to the generator via the "inferred inputs" channel. Second, one
-# can input the input directly into the generator. This has the downside
-# of making the generation process strictly dependent on knowing the
-# observed input for any generated trial.
-flags.DEFINE_boolean("inject_ext_input_to_gen",
- INJECT_EXT_INPUT_TO_GEN,
- "Should observed inputs be input to model via encoders, \
- or injected directly into generator?")
-
-# CELL
-
-# The combined recurrent and input weights of the encoder and
-# controller cells are by default set to scale at ws/sqrt(#inputs),
-# with ws=1.0. You can change this scaling with this parameter.
-flags.DEFINE_float("cell_weight_scale", CELL_WEIGHT_SCALE,
- "Input scaling for input weights in generator.")
-
-
-# GENERATION
-
-# Note that the dimension of the initial conditions is separated from the
-# dimensions of the generator initial conditions (and a linear matrix will
-# adapt the shapes if necessary). This is just another way to control
-# complexity. In all likelihood, setting the ic dims to the size of the
-# generator hidden state is just fine.
-flags.DEFINE_integer("ic_dim", IC_DIM, "Dimension of h0")
-# Setting the dimensions of the factors to something smaller than the data
-# dimension is a way to get a reduced dimensionality representation of your
-# data.
-flags.DEFINE_integer("factors_dim", FACTORS_DIM,
- "Number of factors from generator")
-flags.DEFINE_integer("ic_enc_dim", IC_ENC_DIM,
- "Cell hidden size, encoder of h0")
-
-# Controlling the size of the generator is one way to control complexity of
-# the dynamics (there is also l2, which will squeeze out unnecessary
-# dynamics also). The modern deep learning approach is to make these cells
-# as large as tolerable (from a waiting perspective), and then regularize
-# them to death with drop out or whatever. I don't know if this is correct
-# for the LFADS application or not.
-flags.DEFINE_integer("gen_dim", GEN_DIM,
- "Cell hidden size, generator.")
-# The weights of the generator cell by default set to scale at
-# ws/sqrt(#inputs), with ws=1.0. You can change ws for
-# the input weights or the recurrent weights with these hyperparameters.
-flags.DEFINE_float("gen_cell_input_weight_scale", GEN_CELL_INPUT_WEIGHT_SCALE,
- "Input scaling for input weights in generator.")
-flags.DEFINE_float("gen_cell_rec_weight_scale", GEN_CELL_REC_WEIGHT_SCALE,
- "Input scaling for rec weights in generator.")
-
-# KL DISTRIBUTIONS
-# If you don't know what you are donig here, please leave alone, the
-# defaults should be fine for most cases, irregardless of other parameters.
-#
-# If you don't want the prior variance to be learned, set the
-# following values to the same thing: ic_prior_var_min,
-# ic_prior_var_scale, ic_prior_var_max. The prior mean will be
-# learned regardless.
-flags.DEFINE_float("ic_prior_var_min", IC_PRIOR_VAR_MIN,
- "Minimum variance in posterior h0 codes.")
-flags.DEFINE_float("ic_prior_var_scale", IC_PRIOR_VAR_SCALE,
- "Variance of ic prior distribution")
-flags.DEFINE_float("ic_prior_var_max", IC_PRIOR_VAR_MAX,
- "Maximum variance of IC prior distribution.")
-# If you really want to limit the information from encoder to decoder,
-# Increase ic_post_var_min above 0.0.
-flags.DEFINE_float("ic_post_var_min", IC_POST_VAR_MIN,
- "Minimum variance of IC posterior distribution.")
-flags.DEFINE_float("co_prior_var_scale", CO_PRIOR_VAR_SCALE,
- "Variance of control input prior distribution.")
-
-
-flags.DEFINE_float("prior_ar_atau", PRIOR_AR_AUTOCORRELATION,
- "Initial autocorrelation of AR(1) priors.")
-flags.DEFINE_float("prior_ar_nvar", PRIOR_AR_PROCESS_VAR,
- "Initial noise variance for AR(1) priors.")
-flags.DEFINE_boolean("do_train_prior_ar_atau", DO_TRAIN_PRIOR_AR_ATAU,
- "Is the value for atau an init, or the constant value?")
-flags.DEFINE_boolean("do_train_prior_ar_nvar", DO_TRAIN_PRIOR_AR_NVAR,
- "Is the value for noise variance an init, or the constant \
- value?")
-
-# CONTROLLER
-# This parameter critically controls whether or not there is a controller
-# (along with controller encoders placed into the LFADS graph. If CO_DIM >
-# 1, that means there is a 1 dimensional controller outputs, if equal to 0,
-# then no controller.
-flags.DEFINE_integer("co_dim", CO_DIM,
- "Number of control net outputs (>0 builds that graph).")
-
-# The controller will be more powerful if it can see the encoding of the entire
-# trial. However, this allows the controller to create inferred inputs that are
-# acausal with respect to the actual data generation process. E.g. the data
-# generator could have an input at time t, but the controller, after seeing the
-# entirety of the trial could infer that the input is coming a little before
-# time t, because there are no restrictions on the data the controller sees.
-# One can force the controller to be causal (with respect to perturbations in
-# the data generator) so that it only sees forward encodings of the data at time
-# t that originate at times before or at time t. One can also control the data
-# the controller sees by using an input lag (forward encoding at time [t-tlag]
-# for controller input at time t. The same can be done in the reverse direction
-# (controller input at time t from reverse encoding at time [t+tlag], in the
-# case of an acausal controller). Setting this lag > 0 (even lag=1) can be a
-# powerful way of avoiding very spiky decodes. Finally, one can manually control
-# whether the factors at time t-1 are fed to the controller at time t.
-#
-# If you don't care about any of this, and just want to smooth your data, set
-# do_causal_controller = False
-# do_feed_factors_to_controller = True
-# causal_input_lag = 0
-flags.DEFINE_boolean("do_causal_controller",
- DO_CAUSAL_CONTROLLER,
- "Restrict the controller create only causal inferred \
- inputs?")
-# Strictly speaking, feeding either the factors or the rates to the controller
-# violates causality, since the g0 gets to see all the data. This may or may not
-# be only a theoretical concern.
-flags.DEFINE_boolean("do_feed_factors_to_controller",
- DO_FEED_FACTORS_TO_CONTROLLER,
- "Should factors[t-1] be input to controller at time t?")
-flags.DEFINE_string("feedback_factors_or_rates", FEEDBACK_FACTORS_OR_RATES,
- "Feedback the factors or the rates to the controller? \
- Acceptable values: 'factors' or 'rates'.")
-flags.DEFINE_integer("controller_input_lag", CONTROLLER_INPUT_LAG,
- "Time lag on the encoding to controller t-lag for \
- forward, t+lag for reverse.")
-
-flags.DEFINE_integer("ci_enc_dim", CI_ENC_DIM,
- "Cell hidden size, encoder of control inputs")
-flags.DEFINE_integer("con_dim", CON_DIM,
- "Cell hidden size, controller")
-
-
-# OPTIMIZATION
-flags.DEFINE_integer("batch_size", BATCH_SIZE,
- "Batch size to use during training.")
-flags.DEFINE_float("learning_rate_init", LEARNING_RATE_INIT,
- "Learning rate initial value")
-flags.DEFINE_float("learning_rate_decay_factor", LEARNING_RATE_DECAY_FACTOR,
- "Learning rate decay, decay by this fraction every so \
- often.")
-flags.DEFINE_float("learning_rate_stop", LEARNING_RATE_STOP,
- "The lr is adaptively reduced, stop training at this value.")
-# Rather put the learning rate on an exponentially decreasiong schedule,
-# the current algorithm pays attention to the learning rate, and if it
-# isn't regularly decreasing, it will decrease the learning rate. So far,
-# it works fine, though it is not perfect.
-flags.DEFINE_integer("learning_rate_n_to_compare", LEARNING_RATE_N_TO_COMPARE,
- "Number of previous costs current cost has to be worse \
- than, to lower learning rate.")
-
-# This sets a value, above which, the gradients will be clipped. This hp
-# is extremely useful to avoid an infrequent, but highly pathological
-# problem whereby the gradient is so large that it destroys the
-# optimziation by setting parameters too large, leading to a vicious cycle
-# that ends in NaNs. If it's too large, it's useless, if it's too small,
-# it essentially becomes the learning rate. It's pretty insensitive, though.
-flags.DEFINE_float("max_grad_norm", MAX_GRAD_NORM,
- "Max norm of gradient before clipping.")
-
-# If your optimizations start "NaN-ing out", reduce this value so that
-# the values of the network don't grow out of control. Typically, once
-# this parameter is set to a reasonable value, one stops having numerical
-# problems.
-flags.DEFINE_float("cell_clip_value", CELL_CLIP_VALUE,
- "Max value recurrent cell can take before being clipped.")
-
-# This flag is used for an experiment where one sees if training a model with
-# many days data can be used to learn the dynamics from a held-out days data.
-# If you don't care about that particular experiment, this flag should always be
-# false.
-flags.DEFINE_boolean("do_train_io_only", DO_TRAIN_IO_ONLY,
- "Train only the input (readin) and output (readout) \
- affine functions.")
-
-# This flag is used for an experiment where one wants to know if the dynamics
-# learned by the generator generalize across conditions. In that case, you might
-# train up a model on one set of data, and then only further train the encoder
-# on another set of data (the conditions to be tested) so that the model is
-# forced to use the same dynamics to describe that data. If you don't care about
-# that particular experiment, this flag should always be false.
-flags.DEFINE_boolean("do_train_encoder_only", DO_TRAIN_ENCODER_ONLY,
- "Train only the encoder weights.")
-
-flags.DEFINE_boolean("do_reset_learning_rate", DO_RESET_LEARNING_RATE,
- "Reset the learning rate to initial value.")
-
-
-# for multi-session "stitching" models, the per-session readin matrices map from
-# neurons to input factors which are fed into the shared encoder. These are
-# initialized by alignment_matrix_cxf and alignment_bias_c in the input .h5
-# files. They can be fixed or made trainable.
-flags.DEFINE_boolean("do_train_readin", DO_TRAIN_READIN, "Whether to train the \
- readin matrices and bias vectors. False leaves them fixed \
- at their initial values specified by the alignment \
- matrices and vectors.")
-
-
-# OVERFITTING
-# Dropout is done on the input data, on controller inputs (from
-# encoder), on outputs from generator to factors.
-flags.DEFINE_float("keep_prob", KEEP_PROB, "Dropout keep probability.")
-# It appears that the system will happily fit spikes (blessing or
-# curse, depending). You may not want this. Jittering the spikes a
-# bit will help (-/+ bin size, as specified here).
-flags.DEFINE_integer("temporal_spike_jitter_width",
- TEMPORAL_SPIKE_JITTER_WIDTH,
- "Shuffle spikes around this window.")
-
-# General note about helping ascribe controller inputs vs dynamics:
-#
-# If controller is heavily penalized, then it won't have any output.
-# If dynamics are heavily penalized, then generator won't make
-# dynamics. Note this l2 penalty is only on the recurrent portion of
-# the RNNs, as dropout is also available, penalizing the feed-forward
-# connections.
-flags.DEFINE_float("l2_gen_scale", L2_GEN_SCALE,
- "L2 regularization cost for the generator only.")
-flags.DEFINE_float("l2_con_scale", L2_CON_SCALE,
- "L2 regularization cost for the controller only.")
-flags.DEFINE_float("co_mean_corr_scale", CO_MEAN_CORR_SCALE,
- "Cost of correlation (thru time)in the means of \
- controller output.")
-
-# UNDERFITTING
-# If the primary task of LFADS is "filtering" of data and not
-# generation, then it is possible that the KL penalty is too strong.
-# Empirically, we have found this to be the case. So we add a
-# hyperparameter in front of the the two KL terms (one for the initial
-# conditions to the generator, the other for the controller outputs).
-# You should always think of the the default values as 1.0, and that
-# leads to a standard VAE formulation whereby the numbers that are
-# optimized are a lower-bound on the log-likelihood of the data. When
-# these 2 HPs deviate from 1.0, one cannot make any statement about
-# what those LL lower bounds mean anymore, and they cannot be compared
-# (AFAIK).
-flags.DEFINE_float("kl_ic_weight", KL_IC_WEIGHT,
- "Strength of KL weight on initial conditions KL penatly.")
-flags.DEFINE_float("kl_co_weight", KL_CO_WEIGHT,
- "Strength of KL weight on controller output KL penalty.")
-
-# Sometimes the task can be sufficiently hard to learn that the
-# optimizer takes the 'easy route', and simply minimizes the KL
-# divergence, setting it to near zero, and the optimization gets
-# stuck. These two parameters will help avoid that by by getting the
-# optimization to 'latch' on to the main optimization, and only
-# turning in the regularizers later.
-flags.DEFINE_integer("kl_start_step", KL_START_STEP,
- "Start increasing weight after this many steps.")
-# training passes, not epochs, increase by 0.5 every kl_increase_steps
-flags.DEFINE_integer("kl_increase_steps", KL_INCREASE_STEPS,
- "Increase weight of kl cost to avoid local minimum.")
-# Same story for l2 regularizer. One wants a simple generator, for scientific
-# reasons, but not at the expense of hosing the optimization.
-flags.DEFINE_integer("l2_start_step", L2_START_STEP,
- "Start increasing l2 weight after this many steps.")
-flags.DEFINE_integer("l2_increase_steps", L2_INCREASE_STEPS,
- "Increase weight of l2 cost to avoid local minimum.")
-
-FLAGS = flags.FLAGS
-
-
-def build_model(hps, kind="train", datasets=None):
- """Builds a model from either random initialization, or saved parameters.
-
- Args:
- hps: The hyper parameters for the model.
- kind: (optional) The kind of model to build. Training vs inference require
- different graphs.
- datasets: The datasets structure (see top of lfads.py).
-
- Returns:
- an LFADS model.
- """
-
- build_kind = kind
- if build_kind == "write_model_params":
- build_kind = "train"
- with tf.variable_scope("LFADS", reuse=None):
- model = LFADS(hps, kind=build_kind, datasets=datasets)
-
- if not os.path.exists(hps.lfads_save_dir):
- print("Save directory %s does not exist, creating it." % hps.lfads_save_dir)
- os.makedirs(hps.lfads_save_dir)
-
- cp_pb_ln = hps.checkpoint_pb_load_name
- cp_pb_ln = 'checkpoint' if cp_pb_ln == "" else cp_pb_ln
- if cp_pb_ln == 'checkpoint':
- print("Loading latest training checkpoint in: ", hps.lfads_save_dir)
- saver = model.seso_saver
- elif cp_pb_ln == 'checkpoint_lve':
- print("Loading lowest validation checkpoint in: ", hps.lfads_save_dir)
- saver = model.lve_saver
- else:
- print("Loading checkpoint: ", cp_pb_ln, ", in: ", hps.lfads_save_dir)
- saver = model.seso_saver
-
- ckpt = tf.train.get_checkpoint_state(hps.lfads_save_dir,
- latest_filename=cp_pb_ln)
-
- session = tf.get_default_session()
- print("ckpt: ", ckpt)
- if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
- print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
- saver.restore(session, ckpt.model_checkpoint_path)
- else:
- print("Created model with fresh parameters.")
- if kind in ["posterior_sample_and_average", "posterior_push_mean",
- "prior_sample", "write_model_params"]:
- print("Possible error!!! You are running ", kind, " on a newly \
- initialized model!")
- # cannot print ckpt.model_check_point path if no ckpt
- print("Are you sure you sure a checkpoint in ", hps.lfads_save_dir,
- " exists?")
-
- tf.global_variables_initializer().run()
-
- if ckpt:
- train_step_str = re.search('-[0-9]+$', ckpt.model_checkpoint_path).group()
- else:
- train_step_str = '-0'
-
- fname = 'hyperparameters' + train_step_str + '.txt'
- hp_fname = os.path.join(hps.lfads_save_dir, fname)
- hps_for_saving = jsonify_dict(hps)
- utils.write_data(hp_fname, hps_for_saving, use_json=True)
-
- return model
-
-
-def jsonify_dict(d):
- """Turns python booleans into strings so hps dict can be written in json.
- Creates a shallow-copied dictionary first, then accomplishes string
- conversion.
-
- Args:
- d: hyperparameter dictionary
-
- Returns: hyperparameter dictionary with bool's as strings
- """
-
- d2 = d.copy() # shallow copy is fine by assumption of d being shallow
- def jsonify_bool(boolean_value):
- if boolean_value:
- return "true"
- else:
- return "false"
-
- for key in d2.keys():
- if isinstance(d2[key], bool):
- d2[key] = jsonify_bool(d2[key])
- return d2
-
-
-def build_hyperparameter_dict(flags):
- """Simple script for saving hyper parameters. Under the hood the
- flags structure isn't a dictionary, so it has to be simplified since we
- want to be able to view file as text.
-
- Args:
- flags: From tf.app.flags
-
- Returns:
- dictionary of hyper parameters (ignoring other flag types).
- """
- d = {}
- # Data
- d['output_dist'] = flags.output_dist
- d['data_dir'] = flags.data_dir
- d['lfads_save_dir'] = flags.lfads_save_dir
- d['checkpoint_pb_load_name'] = flags.checkpoint_pb_load_name
- d['checkpoint_name'] = flags.checkpoint_name
- d['output_filename_stem'] = flags.output_filename_stem
- d['max_ckpt_to_keep'] = flags.max_ckpt_to_keep
- d['max_ckpt_to_keep_lve'] = flags.max_ckpt_to_keep_lve
- d['ps_nexamples_to_process'] = flags.ps_nexamples_to_process
- d['ext_input_dim'] = flags.ext_input_dim
- d['data_filename_stem'] = flags.data_filename_stem
- d['device'] = flags.device
- d['csv_log'] = flags.csv_log
- d['num_steps_for_gen_ic'] = flags.num_steps_for_gen_ic
- d['inject_ext_input_to_gen'] = flags.inject_ext_input_to_gen
- # Cell
- d['cell_weight_scale'] = flags.cell_weight_scale
- # Generation
- d['ic_dim'] = flags.ic_dim
- d['factors_dim'] = flags.factors_dim
- d['ic_enc_dim'] = flags.ic_enc_dim
- d['gen_dim'] = flags.gen_dim
- d['gen_cell_input_weight_scale'] = flags.gen_cell_input_weight_scale
- d['gen_cell_rec_weight_scale'] = flags.gen_cell_rec_weight_scale
- # KL distributions
- d['ic_prior_var_min'] = flags.ic_prior_var_min
- d['ic_prior_var_scale'] = flags.ic_prior_var_scale
- d['ic_prior_var_max'] = flags.ic_prior_var_max
- d['ic_post_var_min'] = flags.ic_post_var_min
- d['co_prior_var_scale'] = flags.co_prior_var_scale
- d['prior_ar_atau'] = flags.prior_ar_atau
- d['prior_ar_nvar'] = flags.prior_ar_nvar
- d['do_train_prior_ar_atau'] = flags.do_train_prior_ar_atau
- d['do_train_prior_ar_nvar'] = flags.do_train_prior_ar_nvar
- # Controller
- d['do_causal_controller'] = flags.do_causal_controller
- d['controller_input_lag'] = flags.controller_input_lag
- d['do_feed_factors_to_controller'] = flags.do_feed_factors_to_controller
- d['feedback_factors_or_rates'] = flags.feedback_factors_or_rates
- d['co_dim'] = flags.co_dim
- d['ci_enc_dim'] = flags.ci_enc_dim
- d['con_dim'] = flags.con_dim
- d['co_mean_corr_scale'] = flags.co_mean_corr_scale
- # Optimization
- d['batch_size'] = flags.batch_size
- d['learning_rate_init'] = flags.learning_rate_init
- d['learning_rate_decay_factor'] = flags.learning_rate_decay_factor
- d['learning_rate_stop'] = flags.learning_rate_stop
- d['learning_rate_n_to_compare'] = flags.learning_rate_n_to_compare
- d['max_grad_norm'] = flags.max_grad_norm
- d['cell_clip_value'] = flags.cell_clip_value
- d['do_train_io_only'] = flags.do_train_io_only
- d['do_train_encoder_only'] = flags.do_train_encoder_only
- d['do_reset_learning_rate'] = flags.do_reset_learning_rate
- d['do_train_readin'] = flags.do_train_readin
-
- # Overfitting
- d['keep_prob'] = flags.keep_prob
- d['temporal_spike_jitter_width'] = flags.temporal_spike_jitter_width
- d['l2_gen_scale'] = flags.l2_gen_scale
- d['l2_con_scale'] = flags.l2_con_scale
- # Underfitting
- d['kl_ic_weight'] = flags.kl_ic_weight
- d['kl_co_weight'] = flags.kl_co_weight
- d['kl_start_step'] = flags.kl_start_step
- d['kl_increase_steps'] = flags.kl_increase_steps
- d['l2_start_step'] = flags.l2_start_step
- d['l2_increase_steps'] = flags.l2_increase_steps
- d['_clip_value'] = 80 # bounds the tf.exp to avoid INF
-
- return d
-
-
-class hps_dict_to_obj(dict):
- """Helper class allowing us to access hps dictionary more easily."""
-
- def __getattr__(self, key):
- if key in self:
- return self[key]
- else:
- assert False, ("%s does not exist." % key)
- def __setattr__(self, key, value):
- self[key] = value
-
-
-def train(hps, datasets):
- """Train the LFADS model.
-
- Args:
- hps: The dictionary of hyperparameters.
- datasets: A dictionary of data dictionaries. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- """
- model = build_model(hps, kind="train", datasets=datasets)
- if hps.do_reset_learning_rate:
- sess = tf.get_default_session()
- sess.run(model.learning_rate.initializer)
-
- model.train_model(datasets)
-
-
-def write_model_runs(hps, datasets, output_fname=None, push_mean=False):
- """Run the model on the data in data_dict, and save the computed values.
-
- LFADS generates a number of outputs for each examples, and these are all
- saved. They are:
- The mean and variance of the prior of g0.
- The mean and variance of approximate posterior of g0.
- The control inputs (if enabled)
- The initial conditions, g0, for all examples.
- The generator states for all time.
- The factors for all time.
- The rates for all time.
-
- Args:
- hps: The dictionary of hyperparameters.
- datasets: A dictionary of data dictionaries. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- output_fname (optional): output filename stem to write the model runs.
- push_mean: if False (default), generates batch_size samples for each trial
- and averages the results. if True, runs each trial once without noise,
- pushing the posterior mean initial conditions and control inputs through
- the trained model. False is used for posterior_sample_and_average, True
- is used for posterior_push_mean.
- """
- model = build_model(hps, kind=hps.kind, datasets=datasets)
- model.write_model_runs(datasets, output_fname, push_mean)
-
-
-def write_model_samples(hps, datasets, dataset_name=None, output_fname=None):
- """Use the prior distribution to generate samples from the model.
- Generates batch_size number of samples (set through FLAGS).
-
- LFADS generates a number of outputs for each examples, and these are all
- saved. They are:
- The mean and variance of the prior of g0.
- The control inputs (if enabled)
- The initial conditions, g0, for all examples.
- The generator states for all time.
- The factors for all time.
- The output distribution parameters (e.g. rates) for all time.
-
- Args:
- hps: The dictionary of hyperparameters.
- datasets: A dictionary of data dictionaries. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- dataset_name: The name of the dataset to grab the factors -> rates
- alignment matrices from. Only a concern with models trained on
- multi-session data. By default, uses the first dataset in the data dict.
- output_fname: The name prefix of the file in which to save the generated
- samples.
- """
- if not output_fname:
- output_fname = "model_runs_" + hps.kind
- else:
- output_fname = output_fname + "model_runs_" + hps.kind
- if not dataset_name:
- dataset_name = datasets.keys()[0]
- else:
- if dataset_name not in datasets.keys():
- raise ValueError("Invalid dataset name '%s'."%(dataset_name))
- model = build_model(hps, kind=hps.kind, datasets=datasets)
- model.write_model_samples(dataset_name, output_fname)
-
-
-def write_model_parameters(hps, output_fname=None, datasets=None):
- """Save all the model parameters
-
- Save all the parameters to hps.lfads_save_dir.
-
- Args:
- hps: The dictionary of hyperparameters.
- output_fname: The prefix of the file in which to save the generated
- samples.
- datasets: A dictionary of data dictionaries. The dataset dict is simply a
- name(string)-> data dictionary mapping (See top of lfads.py).
- """
- if not output_fname:
- output_fname = "model_params"
- else:
- output_fname = output_fname + "_model_params"
- fname = os.path.join(hps.lfads_save_dir, output_fname)
- print("Writing model parameters to: ", fname)
- # save the optimizer params as well
- model = build_model(hps, kind="write_model_params", datasets=datasets)
- model_params = model.eval_model_parameters(use_nested=False,
- include_strs="LFADS")
- utils.write_data(fname, model_params, compression=None)
- print("Done.")
-
-
-def clean_data_dict(data_dict):
- """Add some key/value pairs to the data dict, if they are missing.
- Args:
- data_dict - dictionary containing data for LFADS
- Returns:
- data_dict with some keys filled in, if they are absent.
- """
-
- keys = ['train_truth', 'train_ext_input', 'valid_data',
- 'valid_truth', 'valid_ext_input', 'valid_train']
- for k in keys:
- if k not in data_dict:
- data_dict[k] = None
-
- return data_dict
-
-
-def load_datasets(data_dir, data_filename_stem):
- """Load the datasets from a specified directory.
-
- Example files look like
- >data_dir/my_dataset_first_day
- >data_dir/my_dataset_second_day
-
- If my_dataset (filename) stem is in the directory, the read routine will try
- and load it. The datasets dictionary will then look like
- dataset['first_day'] -> (first day data dictionary)
- dataset['second_day'] -> (first day data dictionary)
-
- Args:
- data_dir: The directory from which to load the datasets.
- data_filename_stem: The stem of the filename for the datasets.
-
- Returns:
- datasets: a dataset dictionary, with one name->data dictionary pair for
- each dataset file.
- """
- print("Reading data from ", data_dir)
- datasets = utils.read_datasets(data_dir, data_filename_stem)
- for k, data_dict in datasets.items():
- datasets[k] = clean_data_dict(data_dict)
-
- train_total_size = len(data_dict['train_data'])
- if train_total_size == 0:
- print("Did not load training set.")
- else:
- print("Found training set with number examples: ", train_total_size)
-
- valid_total_size = len(data_dict['valid_data'])
- if valid_total_size == 0:
- print("Did not load validation set.")
- else:
- print("Found validation set with number examples: ", valid_total_size)
-
- return datasets
-
-
-def main(_):
- """Get this whole shindig off the ground."""
- d = build_hyperparameter_dict(FLAGS)
- hps = hps_dict_to_obj(d) # hyper parameters
- kind = FLAGS.kind
-
- # Read the data, if necessary.
- train_set = valid_set = None
- if kind in ["train", "posterior_sample_and_average", "posterior_push_mean",
- "prior_sample", "write_model_params"]:
- datasets = load_datasets(hps.data_dir, hps.data_filename_stem)
- else:
- raise ValueError('Kind {} is not supported.'.format(kind))
-
- # infer the dataset names and dataset dimensions from the loaded files
- hps.kind = kind # needs to be added here, cuz not saved as hyperparam
- hps.dataset_names = []
- hps.dataset_dims = {}
- for key in datasets:
- hps.dataset_names.append(key)
- hps.dataset_dims[key] = datasets[key]['data_dim']
-
- # also store down the dimensionality of the data
- # - just pull from one set, required to be same for all sets
- hps.num_steps = datasets.values()[0]['num_steps']
- hps.ndatasets = len(hps.dataset_names)
-
- if hps.num_steps_for_gen_ic > hps.num_steps:
- hps.num_steps_for_gen_ic = hps.num_steps
-
- # Build and run the model, for varying purposes.
- config = tf.ConfigProto(allow_soft_placement=True,
- log_device_placement=False)
- if FLAGS.allow_gpu_growth:
- config.gpu_options.allow_growth = True
- sess = tf.Session(config=config)
- with sess.as_default():
- with tf.device(hps.device):
- if kind == "train":
- train(hps, datasets)
- elif kind == "posterior_sample_and_average":
- write_model_runs(hps, datasets, hps.output_filename_stem,
- push_mean=False)
- elif kind == "posterior_push_mean":
- write_model_runs(hps, datasets, hps.output_filename_stem,
- push_mean=True)
- elif kind == "prior_sample":
- write_model_samples(hps, datasets, hps.output_filename_stem)
- elif kind == "write_model_params":
- write_model_parameters(hps, hps.output_filename_stem, datasets)
- else:
- assert False, ("Kind %s is not implemented. " % kind)
-
-
-if __name__ == "__main__":
- tf.app.run()
diff --git a/research/lfads/synth_data/generate_chaotic_rnn_data.py b/research/lfads/synth_data/generate_chaotic_rnn_data.py
deleted file mode 100644
index 3de72e58b2208eacf508e6048d3fb6d66bf2e167..0000000000000000000000000000000000000000
--- a/research/lfads/synth_data/generate_chaotic_rnn_data.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import print_function
-
-import h5py
-import numpy as np
-import os
-import tensorflow as tf # used for flags here
-
-from utils import write_datasets
-from synthetic_data_utils import add_alignment_projections, generate_data
-from synthetic_data_utils import generate_rnn, get_train_n_valid_inds
-from synthetic_data_utils import nparray_and_transpose
-from synthetic_data_utils import spikify_data, gaussify_data, split_list_by_inds
-import matplotlib
-import matplotlib.pyplot as plt
-import scipy.signal
-
-matplotlib.rcParams['image.interpolation'] = 'nearest'
-DATA_DIR = "rnn_synth_data_v1.0"
-
-flags = tf.app.flags
-flags.DEFINE_string("save_dir", "/tmp/" + DATA_DIR + "/",
- "Directory for saving data.")
-flags.DEFINE_string("datafile_name", "thits_data",
- "Name of data file for input case.")
-flags.DEFINE_string("noise_type", "poisson", "Noise type for data.")
-flags.DEFINE_integer("synth_data_seed", 5, "Random seed for RNN generation.")
-flags.DEFINE_float("T", 1.0, "Time in seconds to generate.")
-flags.DEFINE_integer("C", 100, "Number of conditions")
-flags.DEFINE_integer("N", 50, "Number of units for the RNN")
-flags.DEFINE_integer("S", 50, "Number of sampled units from RNN")
-flags.DEFINE_integer("npcs", 10, "Number of PCS for multi-session case.")
-flags.DEFINE_float("train_percentage", 4.0/5.0,
- "Percentage of train vs validation trials")
-flags.DEFINE_integer("nreplications", 40,
- "Number of noise replications of the same underlying rates.")
-flags.DEFINE_float("g", 1.5, "Complexity of dynamics")
-flags.DEFINE_float("x0_std", 1.0,
- "Volume from which to pull initial conditions (affects diversity of dynamics.")
-flags.DEFINE_float("tau", 0.025, "Time constant of RNN")
-flags.DEFINE_float("dt", 0.010, "Time bin")
-flags.DEFINE_float("input_magnitude", 20.0,
- "For the input case, what is the value of the input?")
-flags.DEFINE_float("max_firing_rate", 30.0, "Map 1.0 of RNN to a spikes per second")
-FLAGS = flags.FLAGS
-
-
-# Note that with N small, (as it is 25 above), the finite size effects
-# will have pretty dramatic effects on the dynamics of the random RNN.
-# If you want more complex dynamics, you'll have to run the script a
-# lot, or increase N (or g).
-
-# Getting hard vs. easy data can be a little stochastic, so we set the seed.
-
-# Pull out some commonly used parameters.
-# These are user parameters (configuration)
-rng = np.random.RandomState(seed=FLAGS.synth_data_seed)
-T = FLAGS.T
-C = FLAGS.C
-N = FLAGS.N
-S = FLAGS.S
-input_magnitude = FLAGS.input_magnitude
-nreplications = FLAGS.nreplications
-E = nreplications * C # total number of trials
-# S is the number of measurements in each datasets, w/ each
-# dataset having a different set of observations.
-ndatasets = N/S # ok if rounded down
-train_percentage = FLAGS.train_percentage
-ntime_steps = int(T / FLAGS.dt)
-# End of user parameters
-
-rnn = generate_rnn(rng, N, FLAGS.g, FLAGS.tau, FLAGS.dt, FLAGS.max_firing_rate)
-
-# Check to make sure the RNN is the one we used in the paper.
-if N == 50:
- assert abs(rnn['W'][0,0] - 0.06239899) < 1e-8, 'Error in random seed?'
- rem_check = nreplications * train_percentage
- assert abs(rem_check - int(rem_check)) < 1e-8, \
- 'Train percentage * nreplications should be integral number.'
-
-
-# Initial condition generation, and condition label generation. This
-# happens outside of the dataset loop, so that all datasets have the
-# same conditions, which is similar to a neurophys setup.
-condition_number = 0
-x0s = []
-condition_labels = []
-for c in range(C):
- x0 = FLAGS.x0_std * rng.randn(N, 1)
- x0s.append(np.tile(x0, nreplications)) # replicate x0 nreplications times
- # replicate the condition label nreplications times
- for ns in range(nreplications):
- condition_labels.append(condition_number)
- condition_number += 1
-x0s = np.concatenate(x0s, axis=1)
-
-# Containers for storing data across data.
-datasets = {}
-for n in range(ndatasets):
- print(n+1, " of ", ndatasets)
-
- # First generate all firing rates. in the next loop, generate all
- # replications this allows the random state for rate generation to be
- # independent of n_replications.
- dataset_name = 'dataset_N' + str(N) + '_S' + str(S)
- if S < N:
- dataset_name += '_n' + str(n+1)
-
- # Sample neuron subsets. The assumption is the PC axes of the RNN
- # are not unit aligned, so sampling units is adequate to sample all
- # the high-variance PCs.
- P_sxn = np.eye(S,N)
- for m in range(n):
- P_sxn = np.roll(P_sxn, S, axis=1)
-
- if input_magnitude > 0.0:
- # time of "hits" randomly chosen between [1/4 and 3/4] of total time
- input_times = rng.choice(int(ntime_steps/2), size=[E]) + int(ntime_steps/4)
- else:
- input_times = None
-
- rates, x0s, inputs = \
- generate_data(rnn, T=T, E=E, x0s=x0s, P_sxn=P_sxn,
- input_magnitude=input_magnitude,
- input_times=input_times)
-
- if FLAGS.noise_type == "poisson":
- noisy_data = spikify_data(rates, rng, rnn['dt'], rnn['max_firing_rate'])
- elif FLAGS.noise_type == "gaussian":
- noisy_data = gaussify_data(rates, rng, rnn['dt'], rnn['max_firing_rate'])
- else:
- raise ValueError("Only noise types supported are poisson or gaussian")
-
- # split into train and validation sets
- train_inds, valid_inds = get_train_n_valid_inds(E, train_percentage,
- nreplications)
-
- # Split the data, inputs, labels and times into train vs. validation.
- rates_train, rates_valid = \
- split_list_by_inds(rates, train_inds, valid_inds)
- noisy_data_train, noisy_data_valid = \
- split_list_by_inds(noisy_data, train_inds, valid_inds)
- input_train, inputs_valid = \
- split_list_by_inds(inputs, train_inds, valid_inds)
- condition_labels_train, condition_labels_valid = \
- split_list_by_inds(condition_labels, train_inds, valid_inds)
- input_times_train, input_times_valid = \
- split_list_by_inds(input_times, train_inds, valid_inds)
-
- # Turn rates, noisy_data, and input into numpy arrays.
- rates_train = nparray_and_transpose(rates_train)
- rates_valid = nparray_and_transpose(rates_valid)
- noisy_data_train = nparray_and_transpose(noisy_data_train)
- noisy_data_valid = nparray_and_transpose(noisy_data_valid)
- input_train = nparray_and_transpose(input_train)
- inputs_valid = nparray_and_transpose(inputs_valid)
-
- # Note that we put these 'truth' rates and input into this
- # structure, the only data that is used in LFADS are the noisy
- # data e.g. spike trains. The rest is either for printing or posterity.
- data = {'train_truth': rates_train,
- 'valid_truth': rates_valid,
- 'input_train_truth' : input_train,
- 'input_valid_truth' : inputs_valid,
- 'train_data' : noisy_data_train,
- 'valid_data' : noisy_data_valid,
- 'train_percentage' : train_percentage,
- 'nreplications' : nreplications,
- 'dt' : rnn['dt'],
- 'input_magnitude' : input_magnitude,
- 'input_times_train' : input_times_train,
- 'input_times_valid' : input_times_valid,
- 'P_sxn' : P_sxn,
- 'condition_labels_train' : condition_labels_train,
- 'condition_labels_valid' : condition_labels_valid,
- 'conversion_factor': 1.0 / rnn['conversion_factor']}
- datasets[dataset_name] = data
-
-if S < N:
- # Note that this isn't necessary for this synthetic example, but
- # it's useful to see how the input factor matrices were initialized
- # for actual neurophysiology data.
- datasets = add_alignment_projections(datasets, npcs=FLAGS.npcs)
-
-# Write out the datasets.
-write_datasets(FLAGS.save_dir, FLAGS.datafile_name, datasets)
diff --git a/research/lfads/synth_data/generate_itb_data.py b/research/lfads/synth_data/generate_itb_data.py
deleted file mode 100644
index 66bc45d02e962915eb4be09d41da3162763ad40c..0000000000000000000000000000000000000000
--- a/research/lfads/synth_data/generate_itb_data.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import print_function
-
-import h5py
-import numpy as np
-import os
-from six.moves import xrange
-import tensorflow as tf
-
-from utils import write_datasets
-from synthetic_data_utils import normalize_rates
-from synthetic_data_utils import get_train_n_valid_inds, nparray_and_transpose
-from synthetic_data_utils import spikify_data, split_list_by_inds
-
-DATA_DIR = "rnn_synth_data_v1.0"
-
-flags = tf.app.flags
-flags.DEFINE_string("save_dir", "/tmp/" + DATA_DIR + "/",
- "Directory for saving data.")
-flags.DEFINE_string("datafile_name", "itb_rnn",
- "Name of data file for input case.")
-flags.DEFINE_integer("synth_data_seed", 5, "Random seed for RNN generation.")
-flags.DEFINE_float("T", 1.0, "Time in seconds to generate.")
-flags.DEFINE_integer("C", 800, "Number of conditions")
-flags.DEFINE_integer("N", 50, "Number of units for the RNN")
-flags.DEFINE_float("train_percentage", 4.0/5.0,
- "Percentage of train vs validation trials")
-flags.DEFINE_integer("nreplications", 5,
- "Number of spikifications of the same underlying rates.")
-flags.DEFINE_float("tau", 0.025, "Time constant of RNN")
-flags.DEFINE_float("dt", 0.010, "Time bin")
-flags.DEFINE_float("max_firing_rate", 30.0,
- "Map 1.0 of RNN to a spikes per second")
-flags.DEFINE_float("u_std", 0.25,
- "Std dev of input to integration to bound model")
-flags.DEFINE_string("checkpoint_path", "SAMPLE_CHECKPOINT",
- """Path to directory with checkpoints of model
- trained on integration to bound task. Currently this
- is a placeholder which tells the code to grab the
- checkpoint that is provided with the code
- (in /trained_itb/..). If you have your own checkpoint
- you would like to restore, you would point it to
- that path.""")
-FLAGS = flags.FLAGS
-
-
-class IntegrationToBoundModel:
- def __init__(self, N):
- scale = 0.8 / float(N**0.5)
- self.N = N
- self.Wh_nxn = tf.Variable(tf.random_normal([N, N], stddev=scale))
- self.b_1xn = tf.Variable(tf.zeros([1, N]))
- self.Bu_1xn = tf.Variable(tf.zeros([1, N]))
- self.Wro_nxo = tf.Variable(tf.random_normal([N, 1], stddev=scale))
- self.bro_o = tf.Variable(tf.zeros([1]))
-
- def call(self, h_tm1_bxn, u_bx1):
- act_t_bxn = tf.matmul(h_tm1_bxn, self.Wh_nxn) + self.b_1xn + u_bx1 * self.Bu_1xn
- h_t_bxn = tf.nn.tanh(act_t_bxn)
- z_t = tf.nn.xw_plus_b(h_t_bxn, self.Wro_nxo, self.bro_o)
- return z_t, h_t_bxn
-
-def get_data_batch(batch_size, T, rng, u_std):
- u_bxt = rng.randn(batch_size, T) * u_std
- running_sum_b = np.zeros([batch_size])
- labels_bxt = np.zeros([batch_size, T])
- for t in xrange(T):
- running_sum_b += u_bxt[:, t]
- labels_bxt[:, t] += running_sum_b
- labels_bxt = np.clip(labels_bxt, -1, 1)
- return u_bxt, labels_bxt
-
-
-rng = np.random.RandomState(seed=FLAGS.synth_data_seed)
-u_rng = np.random.RandomState(seed=FLAGS.synth_data_seed+1)
-T = FLAGS.T
-C = FLAGS.C
-N = FLAGS.N # must be same N as in trained model (provided example is N = 50)
-nreplications = FLAGS.nreplications
-E = nreplications * C # total number of trials
-train_percentage = FLAGS.train_percentage
-ntimesteps = int(T / FLAGS.dt)
-batch_size = 1 # gives one example per ntrial
-
-model = IntegrationToBoundModel(N)
-inputs_ph_t = [tf.placeholder(tf.float32,
- shape=[None, 1]) for _ in range(ntimesteps)]
-state = tf.zeros([batch_size, N])
-saver = tf.train.Saver()
-
-P_nxn = rng.randn(N,N) / np.sqrt(N) # random projections
-
-# unroll RNN for T timesteps
-outputs_t = []
-states_t = []
-
-for inp in inputs_ph_t:
- output, state = model.call(state, inp)
- outputs_t.append(output)
- states_t.append(state)
-
-with tf.Session() as sess:
- # restore the latest model ckpt
- if FLAGS.checkpoint_path == "SAMPLE_CHECKPOINT":
- dir_path = os.path.dirname(os.path.realpath(__file__))
- model_checkpoint_path = os.path.join(dir_path, "trained_itb/model-65000")
- else:
- model_checkpoint_path = FLAGS.checkpoint_path
- try:
- saver.restore(sess, model_checkpoint_path)
- print ('Model restored from', model_checkpoint_path)
- except:
- assert False, ("No checkpoints to restore from, is the path %s correct?"
- %model_checkpoint_path)
-
- # generate data for trials
- data_e = []
- u_e = []
- outs_e = []
- for c in range(C):
- u_1xt, outs_1xt = get_data_batch(batch_size, ntimesteps, u_rng, FLAGS.u_std)
-
- feed_dict = {}
- for t in xrange(ntimesteps):
- feed_dict[inputs_ph_t[t]] = np.reshape(u_1xt[:,t], (batch_size,-1))
-
- states_t_bxn, outputs_t_bxn = sess.run([states_t, outputs_t],
- feed_dict=feed_dict)
- states_nxt = np.transpose(np.squeeze(np.asarray(states_t_bxn)))
- outputs_t_bxn = np.squeeze(np.asarray(outputs_t_bxn))
- r_sxt = np.dot(P_nxn, states_nxt)
-
- for s in xrange(nreplications):
- data_e.append(r_sxt)
- u_e.append(u_1xt)
- outs_e.append(outputs_t_bxn)
-
- truth_data_e = normalize_rates(data_e, E, N)
-
-spiking_data_e = spikify_data(truth_data_e, rng, dt=FLAGS.dt,
- max_firing_rate=FLAGS.max_firing_rate)
-train_inds, valid_inds = get_train_n_valid_inds(E, train_percentage,
- nreplications)
-
-data_train_truth, data_valid_truth = split_list_by_inds(truth_data_e,
- train_inds,
- valid_inds)
-data_train_spiking, data_valid_spiking = split_list_by_inds(spiking_data_e,
- train_inds,
- valid_inds)
-
-data_train_truth = nparray_and_transpose(data_train_truth)
-data_valid_truth = nparray_and_transpose(data_valid_truth)
-data_train_spiking = nparray_and_transpose(data_train_spiking)
-data_valid_spiking = nparray_and_transpose(data_valid_spiking)
-
-# save down the inputs used to generate this data
-train_inputs_u, valid_inputs_u = split_list_by_inds(u_e,
- train_inds,
- valid_inds)
-train_inputs_u = nparray_and_transpose(train_inputs_u)
-valid_inputs_u = nparray_and_transpose(valid_inputs_u)
-
-# save down the network outputs (may be useful later)
-train_outputs_u, valid_outputs_u = split_list_by_inds(outs_e,
- train_inds,
- valid_inds)
-train_outputs_u = np.array(train_outputs_u)
-valid_outputs_u = np.array(valid_outputs_u)
-
-
-data = { 'train_truth': data_train_truth,
- 'valid_truth': data_valid_truth,
- 'train_data' : data_train_spiking,
- 'valid_data' : data_valid_spiking,
- 'train_percentage' : train_percentage,
- 'nreplications' : nreplications,
- 'dt' : FLAGS.dt,
- 'u_std' : FLAGS.u_std,
- 'max_firing_rate': FLAGS.max_firing_rate,
- 'train_inputs_u': train_inputs_u,
- 'valid_inputs_u': valid_inputs_u,
- 'train_outputs_u': train_outputs_u,
- 'valid_outputs_u': valid_outputs_u,
- 'conversion_factor' : FLAGS.max_firing_rate/(1.0/FLAGS.dt) }
-
-# just one dataset here
-datasets = {}
-dataset_name = 'dataset_N' + str(N)
-datasets[dataset_name] = data
-
-# write out the dataset
-write_datasets(FLAGS.save_dir, FLAGS.datafile_name, datasets)
-print ('Saved to ', os.path.join(FLAGS.save_dir,
- FLAGS.datafile_name + '_' + dataset_name))
diff --git a/research/lfads/synth_data/generate_labeled_rnn_data.py b/research/lfads/synth_data/generate_labeled_rnn_data.py
deleted file mode 100644
index 0695585486534428c77e328e7ee1de755292d6c0..0000000000000000000000000000000000000000
--- a/research/lfads/synth_data/generate_labeled_rnn_data.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import print_function
-
-import os
-import h5py
-import numpy as np
-from six.moves import xrange
-
-from synthetic_data_utils import generate_data, generate_rnn
-from synthetic_data_utils import get_train_n_valid_inds
-from synthetic_data_utils import nparray_and_transpose
-from synthetic_data_utils import spikify_data, split_list_by_inds
-import tensorflow as tf
-from utils import write_datasets
-
-DATA_DIR = "rnn_synth_data_v1.0"
-
-flags = tf.app.flags
-flags.DEFINE_string("save_dir", "/tmp/" + DATA_DIR + "/",
- "Directory for saving data.")
-flags.DEFINE_string("datafile_name", "conditioned_rnn_data",
- "Name of data file for input case.")
-flags.DEFINE_integer("synth_data_seed", 5, "Random seed for RNN generation.")
-flags.DEFINE_float("T", 1.0, "Time in seconds to generate.")
-flags.DEFINE_integer("C", 400, "Number of conditions")
-flags.DEFINE_integer("N", 50, "Number of units for the RNN")
-flags.DEFINE_float("train_percentage", 4.0/5.0,
- "Percentage of train vs validation trials")
-flags.DEFINE_integer("nreplications", 10,
- "Number of spikifications of the same underlying rates.")
-flags.DEFINE_float("g", 1.5, "Complexity of dynamics")
-flags.DEFINE_float("x0_std", 1.0,
- "Volume from which to pull initial conditions (affects diversity of dynamics.")
-flags.DEFINE_float("tau", 0.025, "Time constant of RNN")
-flags.DEFINE_float("dt", 0.010, "Time bin")
-flags.DEFINE_float("max_firing_rate", 30.0, "Map 1.0 of RNN to a spikes per second")
-FLAGS = flags.FLAGS
-
-rng = np.random.RandomState(seed=FLAGS.synth_data_seed)
-rnn_rngs = [np.random.RandomState(seed=FLAGS.synth_data_seed+1),
- np.random.RandomState(seed=FLAGS.synth_data_seed+2)]
-T = FLAGS.T
-C = FLAGS.C
-N = FLAGS.N
-nreplications = FLAGS.nreplications
-E = nreplications * C
-train_percentage = FLAGS.train_percentage
-ntimesteps = int(T / FLAGS.dt)
-
-rnn_a = generate_rnn(rnn_rngs[0], N, FLAGS.g, FLAGS.tau, FLAGS.dt,
- FLAGS.max_firing_rate)
-rnn_b = generate_rnn(rnn_rngs[1], N, FLAGS.g, FLAGS.tau, FLAGS.dt,
- FLAGS.max_firing_rate)
-rnns = [rnn_a, rnn_b]
-
-# pick which RNN is used on each trial
-rnn_to_use = rng.randint(2, size=E)
-ext_input = np.repeat(np.expand_dims(rnn_to_use, axis=1), ntimesteps, axis=1)
-ext_input = np.expand_dims(ext_input, axis=2) # these are "a's" in the paper
-
-x0s = []
-condition_labels = []
-condition_number = 0
-for c in range(C):
- x0 = FLAGS.x0_std * rng.randn(N, 1)
- x0s.append(np.tile(x0, nreplications))
- for ns in range(nreplications):
- condition_labels.append(condition_number)
- condition_number += 1
-x0s = np.concatenate(x0s, axis=1)
-
-P_nxn = rng.randn(N, N) / np.sqrt(N)
-
-# generate trials for both RNNs
-rates_a, x0s_a, _ = generate_data(rnn_a, T=T, E=E, x0s=x0s, P_sxn=P_nxn,
- input_magnitude=0.0, input_times=None)
-spikes_a = spikify_data(rates_a, rng, rnn_a['dt'], rnn_a['max_firing_rate'])
-
-rates_b, x0s_b, _ = generate_data(rnn_b, T=T, E=E, x0s=x0s, P_sxn=P_nxn,
- input_magnitude=0.0, input_times=None)
-spikes_b = spikify_data(rates_b, rng, rnn_b['dt'], rnn_b['max_firing_rate'])
-
-# not the best way to do this but E is small enough
-rates = []
-spikes = []
-for trial in xrange(E):
- if rnn_to_use[trial] == 0:
- rates.append(rates_a[trial])
- spikes.append(spikes_a[trial])
- else:
- rates.append(rates_b[trial])
- spikes.append(spikes_b[trial])
-
-# split into train and validation sets
-train_inds, valid_inds = get_train_n_valid_inds(E, train_percentage,
- nreplications)
-
-rates_train, rates_valid = split_list_by_inds(rates, train_inds, valid_inds)
-spikes_train, spikes_valid = split_list_by_inds(spikes, train_inds, valid_inds)
-condition_labels_train, condition_labels_valid = split_list_by_inds(
- condition_labels, train_inds, valid_inds)
-ext_input_train, ext_input_valid = split_list_by_inds(
- ext_input, train_inds, valid_inds)
-
-rates_train = nparray_and_transpose(rates_train)
-rates_valid = nparray_and_transpose(rates_valid)
-spikes_train = nparray_and_transpose(spikes_train)
-spikes_valid = nparray_and_transpose(spikes_valid)
-
-# add train_ext_input and valid_ext input
-data = {'train_truth': rates_train,
- 'valid_truth': rates_valid,
- 'train_data' : spikes_train,
- 'valid_data' : spikes_valid,
- 'train_ext_input' : np.array(ext_input_train),
- 'valid_ext_input': np.array(ext_input_valid),
- 'train_percentage' : train_percentage,
- 'nreplications' : nreplications,
- 'dt' : FLAGS.dt,
- 'P_sxn' : P_nxn,
- 'condition_labels_train' : condition_labels_train,
- 'condition_labels_valid' : condition_labels_valid,
- 'conversion_factor': 1.0 / rnn_a['conversion_factor']}
-
-# just one dataset here
-datasets = {}
-dataset_name = 'dataset_N' + str(N)
-datasets[dataset_name] = data
-
-# write out the dataset
-write_datasets(FLAGS.save_dir, FLAGS.datafile_name, datasets)
-print ('Saved to ', os.path.join(FLAGS.save_dir,
- FLAGS.datafile_name + '_' + dataset_name))
diff --git a/research/lfads/synth_data/run_generate_synth_data.sh b/research/lfads/synth_data/run_generate_synth_data.sh
deleted file mode 100755
index 9ebc8ce2e5eec1e21fd839db18f247b38ebfde38..0000000000000000000000000000000000000000
--- a/research/lfads/synth_data/run_generate_synth_data.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-
-SYNTH_PATH=/tmp/rnn_synth_data_v1.0/
-
- echo "Generating chaotic rnn data with no input pulses (g=1.5) with spiking noise"
- python generate_chaotic_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=chaotic_rnn_no_inputs --synth_data_seed=5 --T=1.0 --C=400 --N=50 --S=50 --train_percentage=0.8 --nreplications=10 --g=1.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --input_magnitude=0.0 --max_firing_rate=30.0 --noise_type='poisson'
-
-echo "Generating chaotic rnn data with no input pulses (g=1.5) with Gaussian noise"
-python generate_chaotic_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=gaussian_chaotic_rnn_no_inputs --synth_data_seed=5 --T=1.0 --C=400 --N=50 --S=50 --train_percentage=0.8 --nreplications=10 --g=1.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --input_magnitude=0.0 --max_firing_rate=30.0 --noise_type='gaussian'
-
- echo "Generating chaotic rnn data with input pulses (g=1.5)"
- python generate_chaotic_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=chaotic_rnn_inputs_g1p5 --synth_data_seed=5 --T=1.0 --C=400 --N=50 --S=50 --train_percentage=0.8 --nreplications=10 --g=1.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --input_magnitude=20.0 --max_firing_rate=30.0 --noise_type='poisson'
-
- echo "Generating chaotic rnn data with input pulses (g=2.5)"
- python generate_chaotic_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=chaotic_rnn_inputs_g2p5 --synth_data_seed=5 --T=1.0 --C=400 --N=50 --S=50 --train_percentage=0.8 --nreplications=10 --g=2.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --input_magnitude=20.0 --max_firing_rate=30.0 --noise_type='poisson'
-
- echo "Generate the multi-session RNN data (no multi-session synth example in paper)"
- python generate_chaotic_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=chaotic_rnn_multisession --synth_data_seed=5 --T=1.0 --C=150 --N=100 --S=20 --npcs=10 --train_percentage=0.8 --nreplications=40 --g=1.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --input_magnitude=0.0 --max_firing_rate=30.0 --noise_type='poisson'
-
- echo "Generating Integration-to-bound RNN data"
- python generate_itb_data.py --save_dir=$SYNTH_PATH --datafile_name=itb_rnn --u_std=0.25 --checkpoint_path=SAMPLE_CHECKPOINT --synth_data_seed=5 --T=1.0 --C=800 --N=50 --train_percentage=0.8 --nreplications=5 --tau=0.025 --dt=0.01 --max_firing_rate=30.0
-
- echo "Generating chaotic rnn data with external input labels (no external input labels example in paper)"
- python generate_labeled_rnn_data.py --save_dir=$SYNTH_PATH --datafile_name=chaotic_rnns_labeled --synth_data_seed=5 --T=1.0 --C=400 --N=50 --train_percentage=0.8 --nreplications=10 --g=1.5 --x0_std=1.0 --tau=0.025 --dt=0.01 --max_firing_rate=30.0
diff --git a/research/lfads/synth_data/synthetic_data_utils.py b/research/lfads/synth_data/synthetic_data_utils.py
deleted file mode 100644
index cc264ee49fdc7fbb53f17d52ca4ced64addefb27..0000000000000000000000000000000000000000
--- a/research/lfads/synth_data/synthetic_data_utils.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import print_function
-
-import h5py
-import numpy as np
-import os
-
-from utils import write_datasets
-import matplotlib
-import matplotlib.pyplot as plt
-import scipy.signal
-
-
-def generate_rnn(rng, N, g, tau, dt, max_firing_rate):
- """Create a (vanilla) RNN with a bunch of hyper parameters for generating
-chaotic data.
- Args:
- rng: numpy random number generator
- N: number of hidden units
- g: scaling of recurrent weight matrix in g W, with W ~ N(0,1/N)
- tau: time scale of individual unit dynamics
- dt: time step for equation updates
- max_firing_rate: how to resecale the -1,1 firing rates
- Returns:
- the dictionary of these parameters, plus some others.
-"""
- rnn = {}
- rnn['N'] = N
- rnn['W'] = rng.randn(N,N)/np.sqrt(N)
- rnn['Bin'] = rng.randn(N)/np.sqrt(1.0)
- rnn['Bin2'] = rng.randn(N)/np.sqrt(1.0)
- rnn['b'] = np.zeros(N)
- rnn['g'] = g
- rnn['tau'] = tau
- rnn['dt'] = dt
- rnn['max_firing_rate'] = max_firing_rate
- mfr = rnn['max_firing_rate'] # spikes / sec
- nbins_per_sec = 1.0/rnn['dt'] # bins / sec
- # Used for plotting in LFADS
- rnn['conversion_factor'] = mfr / nbins_per_sec # spikes / bin
- return rnn
-
-
-def generate_data(rnn, T, E, x0s=None, P_sxn=None, input_magnitude=0.0,
- input_times=None):
- """ Generates data from an randomly initialized RNN.
- Args:
- rnn: the rnn
- T: Time in seconds to run (divided by rnn['dt'] to get steps, rounded down.
- E: total number of examples
- S: number of samples (subsampling N)
- Returns:
- A list of length E of NxT tensors of the network being run.
- """
- N = rnn['N']
- def run_rnn(rnn, x0, ntime_steps, input_time=None):
- rs = np.zeros([N,ntime_steps])
- x_tm1 = x0
- r_tm1 = np.tanh(x0)
- tau = rnn['tau']
- dt = rnn['dt']
- alpha = (1.0-dt/tau)
- W = dt/tau*rnn['W']*rnn['g']
- Bin = dt/tau*rnn['Bin']
- Bin2 = dt/tau*rnn['Bin2']
- b = dt/tau*rnn['b']
-
- us = np.zeros([1, ntime_steps])
- for t in range(ntime_steps):
- x_t = alpha*x_tm1 + np.dot(W,r_tm1) + b
- if input_time is not None and t == input_time:
- us[0,t] = input_magnitude
- x_t += Bin * us[0,t] # DCS is this what was used?
- r_t = np.tanh(x_t)
- x_tm1 = x_t
- r_tm1 = r_t
- rs[:,t] = r_t
- return rs, us
-
- if P_sxn is None:
- P_sxn = np.eye(N)
- ntime_steps = int(T / rnn['dt'])
- data_e = []
- inputs_e = []
- for e in range(E):
- input_time = input_times[e] if input_times is not None else None
- r_nxt, u_uxt = run_rnn(rnn, x0s[:,e], ntime_steps, input_time)
- r_sxt = np.dot(P_sxn, r_nxt)
- inputs_e.append(u_uxt)
- data_e.append(r_sxt)
-
- S = P_sxn.shape[0]
- data_e = normalize_rates(data_e, E, S)
-
- return data_e, x0s, inputs_e
-
-
-def normalize_rates(data_e, E, S):
- # Normalization, made more complex because of the P matrices.
- # Normalize by min and max in each channel. This normalization will
- # cause offset differences between identical rnn runs, but different
- # t hits.
- for e in range(E):
- r_sxt = data_e[e]
- for i in range(S):
- rmin = np.min(r_sxt[i,:])
- rmax = np.max(r_sxt[i,:])
- assert rmax - rmin != 0, 'Something wrong'
- r_sxt[i,:] = (r_sxt[i,:] - rmin)/(rmax-rmin)
- data_e[e] = r_sxt
- return data_e
-
-
-def spikify_data(data_e, rng, dt=1.0, max_firing_rate=100):
- """ Apply spikes to a continuous dataset whose values are between 0.0 and 1.0
- Args:
- data_e: nexamples length list of NxT trials
- dt: how often the data are sampled
- max_firing_rate: the firing rate that is associated with a value of 1.0
- Returns:
- spikified_e: a list of length b of the data represented as spikes,
- sampled from the underlying poisson process.
- """
-
- E = len(data_e)
- spikes_e = []
- for e in range(E):
- data = data_e[e]
- N,T = data.shape
- data_s = np.zeros([N,T]).astype(np.int)
- for n in range(N):
- f = data[n,:]
- s = rng.poisson(f*max_firing_rate*dt, size=T)
- data_s[n,:] = s
- spikes_e.append(data_s)
-
- return spikes_e
-
-
-def gaussify_data(data_e, rng, dt=1.0, max_firing_rate=100):
- """ Apply gaussian noise to a continuous dataset whose values are between
- 0.0 and 1.0
-
- Args:
- data_e: nexamples length list of NxT trials
- dt: how often the data are sampled
- max_firing_rate: the firing rate that is associated with a value of 1.0
- Returns:
- gauss_e: a list of length b of the data with noise.
- """
-
- E = len(data_e)
- mfr = max_firing_rate
- gauss_e = []
- for e in range(E):
- data = data_e[e]
- N,T = data.shape
- noisy_data = data * mfr + np.random.randn(N,T) * (5.0*mfr) * np.sqrt(dt)
- gauss_e.append(noisy_data)
-
- return gauss_e
-
-
-
-def get_train_n_valid_inds(num_trials, train_fraction, nreplications):
- """Split the numbers between 0 and num_trials-1 into two portions for
- training and validation, based on the train fraction.
- Args:
- num_trials: the number of trials
- train_fraction: (e.g. .80)
- nreplications: the number of spiking trials per initial condition
- Returns:
- a 2-tuple of two lists: the training indices and validation indices
- """
- train_inds = []
- valid_inds = []
- for i in range(num_trials):
- # This line divides up the trials so that within one initial condition,
- # the randomness of spikifying the condition is shared among both
- # training and validation data splits.
- if (i % nreplications)+1 > train_fraction * nreplications:
- valid_inds.append(i)
- else:
- train_inds.append(i)
-
- return train_inds, valid_inds
-
-
-def split_list_by_inds(data, inds1, inds2):
- """Take the data, a list, and split it up based on the indices in inds1 and
- inds2.
- Args:
- data: the list of data to split
- inds1, the first list of indices
- inds2, the second list of indices
- Returns: a 2-tuple of two lists.
- """
- if data is None or len(data) == 0:
- return [], []
- else:
- dout1 = [data[i] for i in inds1]
- dout2 = [data[i] for i in inds2]
- return dout1, dout2
-
-
-def nparray_and_transpose(data_a_b_c):
- """Convert the list of items in data to a numpy array, and transpose it
- Args:
- data: data_asbsc: a nested, nested list of length a, with sublist length
- b, with sublist length c.
- Returns:
- a numpy 3-tensor with dimensions a x c x b
-"""
- data_axbxc = np.array([datum_b_c for datum_b_c in data_a_b_c])
- data_axcxb = np.transpose(data_axbxc, axes=[0,2,1])
- return data_axcxb
-
-
-def add_alignment_projections(datasets, npcs, ntime=None, nsamples=None):
- """Create a matrix that aligns the datasets a bit, under
- the assumption that each dataset is observing the same underlying dynamical
- system.
-
- Args:
- datasets: The dictionary of dataset structures.
- npcs: The number of pcs for each, basically like lfads factors.
- nsamples (optional): Number of samples to take for each dataset.
- ntime (optional): Number of time steps to take in each sample.
-
- Returns:
- The dataset structures, with the field alignment_matrix_cxf added.
- This is # channels x npcs dimension
-"""
- nchannels_all = 0
- channel_idxs = {}
- conditions_all = {}
- nconditions_all = 0
- for name, dataset in datasets.items():
- cidxs = np.where(dataset['P_sxn'])[1] # non-zero entries in columns
- channel_idxs[name] = [cidxs[0], cidxs[-1]+1]
- nchannels_all += cidxs[-1]+1 - cidxs[0]
- conditions_all[name] = np.unique(dataset['condition_labels_train'])
-
- all_conditions_list = \
- np.unique(np.ndarray.flatten(np.array(conditions_all.values())))
- nconditions_all = all_conditions_list.shape[0]
-
- if ntime is None:
- ntime = dataset['train_data'].shape[1]
- if nsamples is None:
- nsamples = dataset['train_data'].shape[0]
-
- # In the data workup in the paper, Chethan did intra condition
- # averaging, so let's do that here.
- avg_data_all = {}
- for name, conditions in conditions_all.items():
- dataset = datasets[name]
- avg_data_all[name] = {}
- for cname in conditions:
- td_idxs = np.argwhere(np.array(dataset['condition_labels_train'])==cname)
- data = np.squeeze(dataset['train_data'][td_idxs,:,:], axis=1)
- avg_data = np.mean(data, axis=0)
- avg_data_all[name][cname] = avg_data
-
- # Visualize this in the morning.
- all_data_nxtc = np.zeros([nchannels_all, ntime * nconditions_all])
- for name, dataset in datasets.items():
- cidx_s = channel_idxs[name][0]
- cidx_f = channel_idxs[name][1]
- for cname in conditions_all[name]:
- cidxs = np.argwhere(all_conditions_list == cname)
- if cidxs.shape[0] > 0:
- cidx = cidxs[0][0]
- all_tidxs = np.arange(0, ntime+1) + cidx*ntime
- all_data_nxtc[cidx_s:cidx_f, all_tidxs[0]:all_tidxs[-1]] = \
- avg_data_all[name][cname].T
-
- # A bit of filtering. We don't care about spectral properties, or
- # filtering artifacts, simply correlate time steps a bit.
- filt_len = 6
- bc_filt = np.ones([filt_len])/float(filt_len)
- for c in range(nchannels_all):
- all_data_nxtc[c,:] = scipy.signal.filtfilt(bc_filt, [1.0], all_data_nxtc[c,:])
-
- # Compute the PCs.
- all_data_mean_nx1 = np.mean(all_data_nxtc, axis=1, keepdims=True)
- all_data_zm_nxtc = all_data_nxtc - all_data_mean_nx1
- corr_mat_nxn = np.dot(all_data_zm_nxtc, all_data_zm_nxtc.T)
- evals_n, evecs_nxn = np.linalg.eigh(corr_mat_nxn)
- sidxs = np.flipud(np.argsort(evals_n)) # sort such that 0th is highest
- evals_n = evals_n[sidxs]
- evecs_nxn = evecs_nxn[:,sidxs]
-
- # Project all the channels data onto the low-D PCA basis, where
- # low-d is the npcs parameter.
- all_data_pca_pxtc = np.dot(evecs_nxn[:, 0:npcs].T, all_data_zm_nxtc)
-
- # Now for each dataset, we regress the channel data onto the top
- # pcs, and this will be our alignment matrix for that dataset.
- # |B - A*W|^2
- for name, dataset in datasets.items():
- cidx_s = channel_idxs[name][0]
- cidx_f = channel_idxs[name][1]
- all_data_zm_chxtc = all_data_zm_nxtc[cidx_s:cidx_f,:] # ch for channel
- W_chxp, _, _, _ = \
- np.linalg.lstsq(all_data_zm_chxtc.T, all_data_pca_pxtc.T)
- dataset['alignment_matrix_cxf'] = W_chxp
- alignment_bias_cx1 = all_data_mean_nx1[cidx_s:cidx_f]
- dataset['alignment_bias_c'] = np.squeeze(alignment_bias_cx1, axis=1)
-
- do_debug_plot = False
- if do_debug_plot:
- pc_vecs = evecs_nxn[:,0:npcs]
- ntoplot = 400
-
- plt.figure()
- plt.plot(np.log10(evals_n), '-x')
- plt.figure()
- plt.subplot(311)
- plt.imshow(all_data_pca_pxtc)
- plt.colorbar()
-
- plt.subplot(312)
- plt.imshow(np.dot(W_chxp.T, all_data_zm_chxtc))
- plt.colorbar()
-
- plt.subplot(313)
- plt.imshow(np.dot(all_data_zm_chxtc.T, W_chxp).T - all_data_pca_pxtc)
- plt.colorbar()
-
- import pdb
- pdb.set_trace()
-
- return datasets
diff --git a/research/lfads/synth_data/trained_itb/model-65000.data-00000-of-00001 b/research/lfads/synth_data/trained_itb/model-65000.data-00000-of-00001
deleted file mode 100644
index 9459a2a1b72f56dc16b3eca210911f14081e7fd5..0000000000000000000000000000000000000000
Binary files a/research/lfads/synth_data/trained_itb/model-65000.data-00000-of-00001 and /dev/null differ
diff --git a/research/lfads/synth_data/trained_itb/model-65000.index b/research/lfads/synth_data/trained_itb/model-65000.index
deleted file mode 100644
index dd9c793acf8dc79e07833d1c0edc8a2fa86d806a..0000000000000000000000000000000000000000
Binary files a/research/lfads/synth_data/trained_itb/model-65000.index and /dev/null differ
diff --git a/research/lfads/synth_data/trained_itb/model-65000.meta b/research/lfads/synth_data/trained_itb/model-65000.meta
deleted file mode 100644
index 07bd2b9688eda16e329e7b08492151a65a88fb8a..0000000000000000000000000000000000000000
Binary files a/research/lfads/synth_data/trained_itb/model-65000.meta and /dev/null differ
diff --git a/research/lfads/utils.py b/research/lfads/utils.py
deleted file mode 100644
index e64825ffc1d423de1d9fe85bc1c00a19e5f4ad7e..0000000000000000000000000000000000000000
--- a/research/lfads/utils.py
+++ /dev/null
@@ -1,367 +0,0 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-from __future__ import print_function
-
-import os
-import h5py
-import json
-
-import numpy as np
-import tensorflow as tf
-
-
-def log_sum_exp(x_k):
- """Computes log \sum exp in a numerically stable way.
- log ( sum_i exp(x_i) )
- log ( sum_i exp(x_i - m + m) ), with m = max(x_i)
- log ( sum_i exp(x_i - m)*exp(m) )
- log ( sum_i exp(x_i - m) + m
-
- Args:
- x_k - k -dimensional list of arguments to log_sum_exp.
-
- Returns:
- log_sum_exp of the arguments.
- """
- m = tf.reduce_max(x_k)
- x1_k = x_k - m
- u_k = tf.exp(x1_k)
- z = tf.reduce_sum(u_k)
- return tf.log(z) + m
-
-
-def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False,
- normalized=False, name=None, collections=None):
- """Linear (affine) transformation, y = x W + b, for a variety of
- configurations.
-
- Args:
- x: input The tensor to tranformation.
- out_size: The integer size of non-batch output dimension.
- do_bias (optional): Add a learnable bias vector to the operation.
- alpha (optional): A multiplicative scaling for the weight initialization
- of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
- identity_if_possible (optional): just return identity,
- if x.shape[1] == out_size.
- normalized (optional): Option to divide out by the norms of the rows of W.
- name (optional): The name prefix to add to variables.
- collections (optional): List of additional collections. (Placed in
- tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
-
- Returns:
- In the equation, y = x W + b, returns the tensorflow op that yields y.
- """
- in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10
- stddev = alpha/np.sqrt(float(in_size))
- mat_init = tf.random_normal_initializer(0.0, stddev)
- wname = (name + "/W") if name else "/W"
-
- if identity_if_possible and in_size == out_size:
- # Sometimes linear layers are nothing more than size adapters.
- return tf.identity(x, name=(wname+'_ident'))
-
- W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha,
- normalized=normalized, name=name, collections=collections)
-
- if do_bias:
- return tf.matmul(x, W) + b
- else:
- return tf.matmul(x, W)
-
-
-def init_linear(in_size, out_size, do_bias=True, mat_init_value=None,
- bias_init_value=None, alpha=1.0, identity_if_possible=False,
- normalized=False, name=None, collections=None, trainable=True):
- """Linear (affine) transformation, y = x W + b, for a variety of
- configurations.
-
- Args:
- in_size: The integer size of the non-batc input dimension. [(x),y]
- out_size: The integer size of non-batch output dimension. [x,(y)]
- do_bias (optional): Add a (learnable) bias vector to the operation,
- if false, b will be None
- mat_init_value (optional): numpy constant for matrix initialization, if None
- , do random, with additional parameters.
- alpha (optional): A multiplicative scaling for the weight initialization
- of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
- identity_if_possible (optional): just return identity,
- if x.shape[1] == out_size.
- normalized (optional): Option to divide out by the norms of the rows of W.
- name (optional): The name prefix to add to variables.
- collections (optional): List of additional collections. (Placed in
- tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
-
- Returns:
- In the equation, y = x W + b, returns the pair (W, b).
- """
-
- if mat_init_value is not None and mat_init_value.shape != (in_size, out_size):
- raise ValueError(
- 'Provided mat_init_value must have shape [%d, %d].'%(in_size, out_size))
- if bias_init_value is not None and bias_init_value.shape != (1,out_size):
- raise ValueError(
- 'Provided bias_init_value must have shape [1,%d].'%(out_size,))
-
- if mat_init_value is None:
- stddev = alpha/np.sqrt(float(in_size))
- mat_init = tf.random_normal_initializer(0.0, stddev)
-
- wname = (name + "/W") if name else "/W"
-
- if identity_if_possible and in_size == out_size:
- return (tf.constant(np.eye(in_size).astype(np.float32)),
- tf.zeros(in_size))
-
- # Note the use of get_variable vs. tf.Variable. this is because get_variable
- # does not allow the initialization of the variable with a value.
- if normalized:
- w_collections = [tf.GraphKeys.GLOBAL_VARIABLES, "norm-variables"]
- if collections:
- w_collections += collections
- if mat_init_value is not None:
- w = tf.Variable(mat_init_value, name=wname, collections=w_collections,
- trainable=trainable)
- else:
- w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
- collections=w_collections, trainable=trainable)
- w = tf.nn.l2_normalize(w, dim=0) # x W, so xW_j = \sum_i x_bi W_ij
- else:
- w_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
- if collections:
- w_collections += collections
- if mat_init_value is not None:
- w = tf.Variable(mat_init_value, name=wname, collections=w_collections,
- trainable=trainable)
- else:
- w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
- collections=w_collections, trainable=trainable)
- b = None
- if do_bias:
- b_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
- if collections:
- b_collections += collections
- bname = (name + "/b") if name else "/b"
- if bias_init_value is None:
- b = tf.get_variable(bname, [1, out_size],
- initializer=tf.zeros_initializer(),
- collections=b_collections,
- trainable=trainable)
- else:
- b = tf.Variable(bias_init_value, name=bname,
- collections=b_collections,
- trainable=trainable)
-
- return (w, b)
-
-
-def write_data(data_fname, data_dict, use_json=False, compression=None):
- """Write data in HD5F format.
-
- Args:
- data_fname: The filename of teh file in which to write the data.
- data_dict: The dictionary of data to write. The keys are strings
- and the values are numpy arrays.
- use_json (optional): human readable format for simple items
- compression (optional): The compression to use for h5py (disabled by
- default because the library borks on scalars, otherwise try 'gzip').
- """
-
- dir_name = os.path.dirname(data_fname)
- if not os.path.exists(dir_name):
- os.makedirs(dir_name)
-
- if use_json:
- the_file = open(data_fname,'wb')
- json.dump(data_dict, the_file)
- the_file.close()
- else:
- try:
- with h5py.File(data_fname, 'w') as hf:
- for k, v in data_dict.items():
- clean_k = k.replace('/', '_')
- if clean_k is not k:
- print('Warning: saving variable with name: ', k, ' as ', clean_k)
- else:
- print('Saving variable with name: ', clean_k)
- hf.create_dataset(clean_k, data=v, compression=compression)
- except IOError:
- print("Cannot open %s for writing.", data_fname)
- raise
-
-
-def read_data(data_fname):
- """ Read saved data in HDF5 format.
-
- Args:
- data_fname: The filename of the file from which to read the data.
- Returns:
- A dictionary whose keys will vary depending on dataset (but should
- always contain the keys 'train_data' and 'valid_data') and whose
- values are numpy arrays.
- """
-
- try:
- with h5py.File(data_fname, 'r') as hf:
- data_dict = {k: np.array(v) for k, v in hf.items()}
- return data_dict
- except IOError:
- print("Cannot open %s for reading." % data_fname)
- raise
-
-
-def write_datasets(data_path, data_fname_stem, dataset_dict, compression=None):
- """Write datasets in HD5F format.
-
- This function assumes the dataset_dict is a mapping ( string ->
- to data_dict ). It calls write_data for each data dictionary,
- post-fixing the data filename with the key of the dataset.
-
- Args:
- data_path: The path to the save directory.
- data_fname_stem: The filename stem of the file in which to write the data.
- dataset_dict: The dictionary of datasets. The keys are strings
- and the values data dictionaries (str -> numpy arrays) associations.
- compression (optional): The compression to use for h5py (disabled by
- default because the library borks on scalars, otherwise try 'gzip').
- """
-
- full_name_stem = os.path.join(data_path, data_fname_stem)
- for s, data_dict in dataset_dict.items():
- write_data(full_name_stem + "_" + s, data_dict, compression=compression)
-
-
-def read_datasets(data_path, data_fname_stem):
- """Read dataset sin HD5F format.
-
- This function assumes the dataset_dict is a mapping ( string ->
- to data_dict ). It calls write_data for each data dictionary,
- post-fixing the data filename with the key of the dataset.
-
- Args:
- data_path: The path to the save directory.
- data_fname_stem: The filename stem of the file in which to write the data.
- """
-
- dataset_dict = {}
- fnames = os.listdir(data_path)
-
- print ('loading data from ' + data_path + ' with stem ' + data_fname_stem)
- for fname in fnames:
- if fname.startswith(data_fname_stem):
- data_dict = read_data(os.path.join(data_path,fname))
- idx = len(data_fname_stem) + 1
- key = fname[idx:]
- data_dict['data_dim'] = data_dict['train_data'].shape[2]
- data_dict['num_steps'] = data_dict['train_data'].shape[1]
- dataset_dict[key] = data_dict
-
- if len(dataset_dict) == 0:
- raise ValueError("Failed to load any datasets, are you sure that the "
- "'--data_dir' and '--data_filename_stem' flag values "
- "are correct?")
-
- print (str(len(dataset_dict)) + ' datasets loaded')
- return dataset_dict
-
-
-# NUMPY utility functions
-def list_t_bxn_to_list_b_txn(values_t_bxn):
- """Convert a length T list of BxN numpy tensors of length B list of TxN numpy
- tensors.
-
- Args:
- values_t_bxn: The length T list of BxN numpy tensors.
-
- Returns:
- The length B list of TxN numpy tensors.
- """
- T = len(values_t_bxn)
- B, N = values_t_bxn[0].shape
- values_b_txn = []
- for b in range(B):
- values_pb_txn = np.zeros([T,N])
- for t in range(T):
- values_pb_txn[t,:] = values_t_bxn[t][b,:]
- values_b_txn.append(values_pb_txn)
-
- return values_b_txn
-
-
-def list_t_bxn_to_tensor_bxtxn(values_t_bxn):
- """Convert a length T list of BxN numpy tensors to single numpy tensor with
- shape BxTxN.
-
- Args:
- values_t_bxn: The length T list of BxN numpy tensors.
-
- Returns:
- values_bxtxn: The BxTxN numpy tensor.
- """
-
- T = len(values_t_bxn)
- B, N = values_t_bxn[0].shape
- values_bxtxn = np.zeros([B,T,N])
- for t in range(T):
- values_bxtxn[:,t,:] = values_t_bxn[t]
-
- return values_bxtxn
-
-
-def tensor_bxtxn_to_list_t_bxn(tensor_bxtxn):
- """Convert a numpy tensor with shape BxTxN to a length T list of numpy tensors
- with shape BxT.
-
- Args:
- tensor_bxtxn: The BxTxN numpy tensor.
-
- Returns:
- A length T list of numpy tensors with shape BxT.
- """
-
- values_t_bxn = []
- B, T, N = tensor_bxtxn.shape
- for t in range(T):
- values_t_bxn.append(np.squeeze(tensor_bxtxn[:,t,:]))
-
- return values_t_bxn
-
-
-def flatten(list_of_lists):
- """Takes a list of lists and returns a list of the elements.
-
- Args:
- list_of_lists: List of lists.
-
- Returns:
- flat_list: Flattened list.
- flat_list_idxs: Flattened list indices.
- """
- flat_list = []
- flat_list_idxs = []
- start_idx = 0
- for item in list_of_lists:
- if isinstance(item, list):
- flat_list += item
- l = len(item)
- idxs = range(start_idx, start_idx+l)
- start_idx = start_idx+l
- else: # a value
- flat_list.append(item)
- idxs = [start_idx]
- start_idx += 1
- flat_list_idxs.append(idxs)
-
- return flat_list, flat_list_idxs
diff --git a/research/lm_1b/BUILD b/research/lm_1b/BUILD
deleted file mode 100644
index ca5bc1f6ce4347a3b5f18d1bb59284aa9d07a567..0000000000000000000000000000000000000000
--- a/research/lm_1b/BUILD
+++ /dev/null
@@ -1,27 +0,0 @@
-package(default_visibility = [":internal"])
-
-licenses(["notice"]) # Apache 2.0
-
-exports_files(["LICENSE"])
-
-package_group(
- name = "internal",
- packages = [
- "//lm_1b/...",
- ],
-)
-
-py_library(
- name = "data_utils",
- srcs = ["data_utils.py"],
-)
-
-py_binary(
- name = "lm_1b_eval",
- srcs = [
- "lm_1b_eval.py",
- ],
- deps = [
- ":data_utils",
- ],
-)
diff --git a/research/lm_1b/README.md b/research/lm_1b/README.md
deleted file mode 100644
index f48afbfe23aff6681e641296e73b2c6b0e5a9b48..0000000000000000000000000000000000000000
--- a/research/lm_1b/README.md
+++ /dev/null
@@ -1,198 +0,0 @@
-
-
-
-
-Language Model on One Billion Word Benchmark
-
-Authors:
-
-Oriol Vinyals (vinyals@google.com, github: OriolVinyals),
-Xin Pan
-
-Paper Authors:
-
-Rafal Jozefowicz, Oriol Vinyals, Mike Schuster, Noam Shazeer, Yonghui Wu
-
-TL;DR
-
-This is a pretrained model on One Billion Word Benchmark.
-If you use this model in your publication, please cite the original paper:
-
-@article{jozefowicz2016exploring,
- title={Exploring the Limits of Language Modeling},
- author={Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike
- and Shazeer, Noam and Wu, Yonghui},
- journal={arXiv preprint arXiv:1602.02410},
- year={2016}
-}
-
-Introduction
-
-In this release, we open source a model trained on the One Billion Word
-Benchmark (http://arxiv.org/abs/1312.3005), a large language corpus in English
-which was released in 2013. This dataset contains about one billion words, and
-has a vocabulary size of about 800K words. It contains mostly news data. Since
-sentences in the training set are shuffled, models can ignore the context and
-focus on sentence level language modeling.
-
-In the original release and subsequent work, people have used the same test set
-to train models on this dataset as a standard benchmark for language modeling.
-Recently, we wrote an article (http://arxiv.org/abs/1602.02410) describing a
-model hybrid between character CNN, a large and deep LSTM, and a specific
-Softmax architecture which allowed us to train the best model on this dataset
-thus far, almost halving the best perplexity previously obtained by others.
-
-Code Release
-
-The open-sourced components include:
-
-* TensorFlow GraphDef proto buffer text file.
-* TensorFlow pre-trained checkpoint shards.
-* Code used to evaluate the pre-trained model.
-* Vocabulary file.
-* Test set from LM-1B evaluation.
-
-The code supports 4 evaluation modes:
-
-* Given provided dataset, calculate the model's perplexity.
-* Given a prefix sentence, predict the next words.
-* Dump the softmax embedding, character-level CNN word embeddings.
-* Give a sentence, dump the embedding from the LSTM state.
-
-Results
-
-Model | Test Perplexity | Number of Params [billions]
-------|-----------------|----------------------------
-Sigmoid-RNN-2048 [Blackout] | 68.3 | 4.1
-Interpolated KN 5-gram, 1.1B n-grams [chelba2013one] | 67.6 | 1.76
-Sparse Non-Negative Matrix LM [shazeer2015sparse] | 52.9 | 33
-RNN-1024 + MaxEnt 9-gram features [chelba2013one] | 51.3 | 20
-LSTM-512-512 | 54.1 | 0.82
-LSTM-1024-512 | 48.2 | 0.82
-LSTM-2048-512 | 43.7 | 0.83
-LSTM-8192-2048 (No Dropout) | 37.9 | 3.3
-LSTM-8192-2048 (50\% Dropout) | 32.2 | 3.3
-2-Layer LSTM-8192-1024 (BIG LSTM) | 30.6 | 1.8
-(THIS RELEASE) BIG LSTM+CNN Inputs | 30.0 | 1.04
-
-How To Run
-
-Prerequisites:
-
-* Install TensorFlow.
-* Install Bazel.
-* Download the data files:
- * Model GraphDef file:
- [link](http://download.tensorflow.org/models/LM_LSTM_CNN/graph-2016-09-10.pbtxt)
- * Model Checkpoint sharded file:
- [1](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-base)
- [2](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-char-embedding)
- [3](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-lstm)
- [4](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax0)
- [5](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax1)
- [6](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax2)
- [7](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax3)
- [8](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax4)
- [9](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax5)
- [10](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax6)
- [11](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax7)
- [12](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax8)
- * Vocabulary file:
- [link](http://download.tensorflow.org/models/LM_LSTM_CNN/vocab-2016-09-10.txt)
- * test dataset: link
- [link](http://download.tensorflow.org/models/LM_LSTM_CNN/test/news.en.heldout-00000-of-00050)
-* It is recommended to run on a modern desktop instead of a laptop.
-
-```shell
-# 1. Clone the code to your workspace.
-# 2. Download the data to your workspace.
-# 3. Create an empty WORKSPACE file in your workspace.
-# 4. Create an empty output directory in your workspace.
-# Example directory structure below:
-$ ls -R
-.:
-data lm_1b output WORKSPACE
-
-./data:
-ckpt-base ckpt-lstm ckpt-softmax1 ckpt-softmax3 ckpt-softmax5
-ckpt-softmax7 graph-2016-09-10.pbtxt vocab-2016-09-10.txt
-ckpt-char-embedding ckpt-softmax0 ckpt-softmax2 ckpt-softmax4 ckpt-softmax6
-ckpt-softmax8 news.en.heldout-00000-of-00050
-
-./lm_1b:
-BUILD data_utils.py lm_1b_eval.py README.md
-
-./output:
-
-# Build the codes.
-$ bazel build -c opt lm_1b/...
-# Run sample mode:
-$ bazel-bin/lm_1b/lm_1b_eval --mode sample \
- --prefix "I love that I" \
- --pbtxt data/graph-2016-09-10.pbtxt \
- --vocab_file data/vocab-2016-09-10.txt \
- --ckpt 'data/ckpt-*'
-...(omitted some TensorFlow output)
-I love
-I love that
-I love that I
-I love that I find
-I love that I find that
-I love that I find that amazing
-...(omitted)
-
-# Run eval mode:
-$ bazel-bin/lm_1b/lm_1b_eval --mode eval \
- --pbtxt data/graph-2016-09-10.pbtxt \
- --vocab_file data/vocab-2016-09-10.txt \
- --input_data data/news.en.heldout-00000-of-00050 \
- --ckpt 'data/ckpt-*'
-...(omitted some TensorFlow output)
-Loaded step 14108582.
-# perplexity is high initially because words without context are harder to
-# predict.
-Eval Step: 0, Average Perplexity: 2045.512297.
-Eval Step: 1, Average Perplexity: 229.478699.
-Eval Step: 2, Average Perplexity: 208.116787.
-Eval Step: 3, Average Perplexity: 338.870601.
-Eval Step: 4, Average Perplexity: 228.950107.
-Eval Step: 5, Average Perplexity: 197.685857.
-Eval Step: 6, Average Perplexity: 156.287063.
-Eval Step: 7, Average Perplexity: 124.866189.
-Eval Step: 8, Average Perplexity: 147.204975.
-Eval Step: 9, Average Perplexity: 90.124864.
-Eval Step: 10, Average Perplexity: 59.897914.
-Eval Step: 11, Average Perplexity: 42.591137.
-...(omitted)
-Eval Step: 4529, Average Perplexity: 29.243668.
-Eval Step: 4530, Average Perplexity: 29.302362.
-Eval Step: 4531, Average Perplexity: 29.285674.
-...(omitted. At convergence, it should be around 30.)
-
-# Run dump_emb mode:
-$ bazel-bin/lm_1b/lm_1b_eval --mode dump_emb \
- --pbtxt data/graph-2016-09-10.pbtxt \
- --vocab_file data/vocab-2016-09-10.txt \
- --ckpt 'data/ckpt-*' \
- --save_dir output
-...(omitted some TensorFlow output)
-Finished softmax weights
-Finished word embedding 0/793471
-Finished word embedding 1/793471
-Finished word embedding 2/793471
-...(omitted)
-$ ls output/
-embeddings_softmax.npy ...
-
-# Run dump_lstm_emb mode:
-$ bazel-bin/lm_1b/lm_1b_eval --mode dump_lstm_emb \
- --pbtxt data/graph-2016-09-10.pbtxt \
- --vocab_file data/vocab-2016-09-10.txt \
- --ckpt 'data/ckpt-*' \
- --sentence "I love who I am ." \
- --save_dir output
-$ ls output/
-lstm_emb_step_0.npy lstm_emb_step_2.npy lstm_emb_step_4.npy
-lstm_emb_step_6.npy lstm_emb_step_1.npy lstm_emb_step_3.npy
-lstm_emb_step_5.npy
-```
diff --git a/research/lm_1b/data_utils.py b/research/lm_1b/data_utils.py
deleted file mode 100644
index ad8d3391ef6db07c1d6c234450a6d23a8e19a178..0000000000000000000000000000000000000000
--- a/research/lm_1b/data_utils.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A library for loading 1B word benchmark dataset."""
-
-import random
-
-import numpy as np
-import tensorflow as tf
-
-
-class Vocabulary(object):
- """Class that holds a vocabulary for the dataset."""
-
- def __init__(self, filename):
- """Initialize vocabulary.
-
- Args:
- filename: Vocabulary file name.
- """
-
- self._id_to_word = []
- self._word_to_id = {}
- self._unk = -1
- self._bos = -1
- self._eos = -1
-
- with tf.gfile.Open(filename) as f:
- idx = 0
- for line in f:
- word_name = line.strip()
- if word_name == '':
- self._bos = idx
- elif word_name == '':
- self._eos = idx
- elif word_name == '':
- self._unk = idx
- if word_name == '!!!MAXTERMID':
- continue
-
- self._id_to_word.append(word_name)
- self._word_to_id[word_name] = idx
- idx += 1
-
- @property
- def bos(self):
- return self._bos
-
- @property
- def eos(self):
- return self._eos
-
- @property
- def unk(self):
- return self._unk
-
- @property
- def size(self):
- return len(self._id_to_word)
-
- def word_to_id(self, word):
- if word in self._word_to_id:
- return self._word_to_id[word]
- return self.unk
-
- def id_to_word(self, cur_id):
- if cur_id < self.size:
- return self._id_to_word[cur_id]
- return 'ERROR'
-
- def decode(self, cur_ids):
- """Convert a list of ids to a sentence, with space inserted."""
- return ' '.join([self.id_to_word(cur_id) for cur_id in cur_ids])
-
- def encode(self, sentence):
- """Convert a sentence to a list of ids, with special tokens added."""
- word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
- return np.array([self.bos] + word_ids + [self.eos], dtype=np.int32)
-
-
-class CharsVocabulary(Vocabulary):
- """Vocabulary containing character-level information."""
-
- def __init__(self, filename, max_word_length):
- super(CharsVocabulary, self).__init__(filename)
- self._max_word_length = max_word_length
- chars_set = set()
-
- for word in self._id_to_word:
- chars_set |= set(word)
-
- free_ids = []
- for i in range(256):
- if chr(i) in chars_set:
- continue
- free_ids.append(chr(i))
-
- if len(free_ids) < 5:
- raise ValueError('Not enough free char ids: %d' % len(free_ids))
-
- self.bos_char = free_ids[0] #
- self.eos_char = free_ids[1] #
- self.bow_char = free_ids[2] #
- self.eow_char = free_ids[3] #
- self.pad_char = free_ids[4] #
-
- chars_set |= {self.bos_char, self.eos_char, self.bow_char, self.eow_char,
- self.pad_char}
-
- self._char_set = chars_set
- num_words = len(self._id_to_word)
-
- self._word_char_ids = np.zeros([num_words, max_word_length], dtype=np.int32)
-
- self.bos_chars = self._convert_word_to_char_ids(self.bos_char)
- self.eos_chars = self._convert_word_to_char_ids(self.eos_char)
-
- for i, word in enumerate(self._id_to_word):
- self._word_char_ids[i] = self._convert_word_to_char_ids(word)
-
- @property
- def word_char_ids(self):
- return self._word_char_ids
-
- @property
- def max_word_length(self):
- return self._max_word_length
-
- def _convert_word_to_char_ids(self, word):
- code = np.zeros([self.max_word_length], dtype=np.int32)
- code[:] = ord(self.pad_char)
-
- if len(word) > self.max_word_length - 2:
- word = word[:self.max_word_length-2]
- cur_word = self.bow_char + word + self.eow_char
- for j in range(len(cur_word)):
- code[j] = ord(cur_word[j])
- return code
-
- def word_to_char_ids(self, word):
- if word in self._word_to_id:
- return self._word_char_ids[self._word_to_id[word]]
- else:
- return self._convert_word_to_char_ids(word)
-
- def encode_chars(self, sentence):
- chars_ids = [self.word_to_char_ids(cur_word)
- for cur_word in sentence.split()]
- return np.vstack([self.bos_chars] + chars_ids + [self.eos_chars])
-
-
-def get_batch(generator, batch_size, num_steps, max_word_length, pad=False):
- """Read batches of input."""
- cur_stream = [None] * batch_size
-
- inputs = np.zeros([batch_size, num_steps], np.int32)
- char_inputs = np.zeros([batch_size, num_steps, max_word_length], np.int32)
- global_word_ids = np.zeros([batch_size, num_steps], np.int32)
- targets = np.zeros([batch_size, num_steps], np.int32)
- weights = np.ones([batch_size, num_steps], np.float32)
-
- no_more_data = False
- while True:
- inputs[:] = 0
- char_inputs[:] = 0
- global_word_ids[:] = 0
- targets[:] = 0
- weights[:] = 0.0
-
- for i in range(batch_size):
- cur_pos = 0
-
- while cur_pos < num_steps:
- if cur_stream[i] is None or len(cur_stream[i][0]) <= 1:
- try:
- cur_stream[i] = list(generator.next())
- except StopIteration:
- # No more data, exhaust current streams and quit
- no_more_data = True
- break
-
- how_many = min(len(cur_stream[i][0]) - 1, num_steps - cur_pos)
- next_pos = cur_pos + how_many
-
- inputs[i, cur_pos:next_pos] = cur_stream[i][0][:how_many]
- char_inputs[i, cur_pos:next_pos] = cur_stream[i][1][:how_many]
- global_word_ids[i, cur_pos:next_pos] = cur_stream[i][2][:how_many]
- targets[i, cur_pos:next_pos] = cur_stream[i][0][1:how_many+1]
- weights[i, cur_pos:next_pos] = 1.0
-
- cur_pos = next_pos
- cur_stream[i][0] = cur_stream[i][0][how_many:]
- cur_stream[i][1] = cur_stream[i][1][how_many:]
- cur_stream[i][2] = cur_stream[i][2][how_many:]
-
- if pad:
- break
-
- if no_more_data and np.sum(weights) == 0:
- # There is no more data and this is an empty batch. Done!
- break
- yield inputs, char_inputs, global_word_ids, targets, weights
-
-
-class LM1BDataset(object):
- """Utility class for 1B word benchmark dataset.
-
- The current implementation reads the data from the tokenized text files.
- """
-
- def __init__(self, filepattern, vocab):
- """Initialize LM1BDataset reader.
-
- Args:
- filepattern: Dataset file pattern.
- vocab: Vocabulary.
- """
- self._vocab = vocab
- self._all_shards = tf.gfile.Glob(filepattern)
- tf.logging.info('Found %d shards at %s', len(self._all_shards), filepattern)
-
- def _load_random_shard(self):
- """Randomly select a file and read it."""
- return self._load_shard(random.choice(self._all_shards))
-
- def _load_shard(self, shard_name):
- """Read one file and convert to ids.
-
- Args:
- shard_name: file path.
-
- Returns:
- list of (id, char_id, global_word_id) tuples.
- """
- tf.logging.info('Loading data from: %s', shard_name)
- with tf.gfile.Open(shard_name) as f:
- sentences = f.readlines()
- chars_ids = [self.vocab.encode_chars(sentence) for sentence in sentences]
- ids = [self.vocab.encode(sentence) for sentence in sentences]
-
- global_word_ids = []
- current_idx = 0
- for word_ids in ids:
- current_size = len(word_ids) - 1 # without symbol
- cur_ids = np.arange(current_idx, current_idx + current_size)
- global_word_ids.append(cur_ids)
- current_idx += current_size
-
- tf.logging.info('Loaded %d words.', current_idx)
- tf.logging.info('Finished loading')
- return zip(ids, chars_ids, global_word_ids)
-
- def _get_sentence(self, forever=True):
- while True:
- ids = self._load_random_shard()
- for current_ids in ids:
- yield current_ids
- if not forever:
- break
-
- def get_batch(self, batch_size, num_steps, pad=False, forever=True):
- return get_batch(self._get_sentence(forever), batch_size, num_steps,
- self.vocab.max_word_length, pad=pad)
-
- @property
- def vocab(self):
- return self._vocab
diff --git a/research/lm_1b/lm_1b_eval.py b/research/lm_1b/lm_1b_eval.py
deleted file mode 100644
index ce8634757558c135ba137a9b9e09a733977adc3a..0000000000000000000000000000000000000000
--- a/research/lm_1b/lm_1b_eval.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Eval pre-trained 1 billion word language model.
-"""
-import os
-import sys
-
-import numpy as np
-from six.moves import xrange
-import tensorflow as tf
-
-from google.protobuf import text_format
-import data_utils
-
-FLAGS = tf.flags.FLAGS
-# General flags.
-tf.flags.DEFINE_string('mode', 'eval',
- 'One of [sample, eval, dump_emb, dump_lstm_emb]. '
- '"sample" mode samples future word predictions, using '
- 'FLAGS.prefix as prefix (prefix could be left empty). '
- '"eval" mode calculates perplexity of the '
- 'FLAGS.input_data. '
- '"dump_emb" mode dumps word and softmax embeddings to '
- 'FLAGS.save_dir. embeddings are dumped in the same '
- 'order as words in vocabulary. All words in vocabulary '
- 'are dumped.'
- 'dump_lstm_emb dumps lstm embeddings of FLAGS.sentence '
- 'to FLAGS.save_dir.')
-tf.flags.DEFINE_string('pbtxt', '',
- 'GraphDef proto text file used to construct model '
- 'structure.')
-tf.flags.DEFINE_string('ckpt', '',
- 'Checkpoint directory used to fill model values.')
-tf.flags.DEFINE_string('vocab_file', '', 'Vocabulary file.')
-tf.flags.DEFINE_string('save_dir', '',
- 'Used for "dump_emb" mode to save word embeddings.')
-# sample mode flags.
-tf.flags.DEFINE_string('prefix', '',
- 'Used for "sample" mode to predict next words.')
-tf.flags.DEFINE_integer('max_sample_words', 100,
- 'Sampling stops either when is met or this number '
- 'of steps has passed.')
-tf.flags.DEFINE_integer('num_samples', 3,
- 'Number of samples to generate for the prefix.')
-# dump_lstm_emb mode flags.
-tf.flags.DEFINE_string('sentence', '',
- 'Used as input for "dump_lstm_emb" mode.')
-# eval mode flags.
-tf.flags.DEFINE_string('input_data', '',
- 'Input data files for eval model.')
-tf.flags.DEFINE_integer('max_eval_steps', 1000000,
- 'Maximum mumber of steps to run "eval" mode.')
-
-
-# For saving demo resources, use batch size 1 and step 1.
-BATCH_SIZE = 1
-NUM_TIMESTEPS = 1
-MAX_WORD_LEN = 50
-
-
-def _LoadModel(gd_file, ckpt_file):
- """Load the model from GraphDef and Checkpoint.
-
- Args:
- gd_file: GraphDef proto text file.
- ckpt_file: TensorFlow Checkpoint file.
-
- Returns:
- TensorFlow session and tensors dict.
- """
- with tf.Graph().as_default():
- sys.stderr.write('Recovering graph.\n')
- with tf.gfile.FastGFile(gd_file, 'r') as f:
- s = f.read().decode()
- gd = tf.GraphDef()
- text_format.Merge(s, gd)
-
- tf.logging.info('Recovering Graph %s', gd_file)
- t = {}
- [t['states_init'], t['lstm/lstm_0/control_dependency'],
- t['lstm/lstm_1/control_dependency'], t['softmax_out'], t['class_ids_out'],
- t['class_weights_out'], t['log_perplexity_out'], t['inputs_in'],
- t['targets_in'], t['target_weights_in'], t['char_inputs_in'],
- t['all_embs'], t['softmax_weights'], t['global_step']
- ] = tf.import_graph_def(gd, {}, ['states_init',
- 'lstm/lstm_0/control_dependency:0',
- 'lstm/lstm_1/control_dependency:0',
- 'softmax_out:0',
- 'class_ids_out:0',
- 'class_weights_out:0',
- 'log_perplexity_out:0',
- 'inputs_in:0',
- 'targets_in:0',
- 'target_weights_in:0',
- 'char_inputs_in:0',
- 'all_embs_out:0',
- 'Reshape_3:0',
- 'global_step:0'], name='')
-
- sys.stderr.write('Recovering checkpoint %s\n' % ckpt_file)
- sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
- sess.run('save/restore_all', {'save/Const:0': ckpt_file})
- sess.run(t['states_init'])
-
- return sess, t
-
-
-def _EvalModel(dataset):
- """Evaluate model perplexity using provided dataset.
-
- Args:
- dataset: LM1BDataset object.
- """
- sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt)
-
- current_step = t['global_step'].eval(session=sess)
- sys.stderr.write('Loaded step %d.\n' % current_step)
-
- data_gen = dataset.get_batch(BATCH_SIZE, NUM_TIMESTEPS, forever=False)
- sum_num = 0.0
- sum_den = 0.0
- perplexity = 0.0
- for i, (inputs, char_inputs, _, targets, weights) in enumerate(data_gen):
- input_dict = {t['inputs_in']: inputs,
- t['targets_in']: targets,
- t['target_weights_in']: weights}
- if 'char_inputs_in' in t:
- input_dict[t['char_inputs_in']] = char_inputs
- log_perp = sess.run(t['log_perplexity_out'], feed_dict=input_dict)
-
- if np.isnan(log_perp):
- sys.stderr.error('log_perplexity is Nan.\n')
- else:
- sum_num += log_perp * weights.mean()
- sum_den += weights.mean()
- if sum_den > 0:
- perplexity = np.exp(sum_num / sum_den)
-
- sys.stderr.write('Eval Step: %d, Average Perplexity: %f.\n' %
- (i, perplexity))
-
- if i > FLAGS.max_eval_steps:
- break
-
-
-def _SampleSoftmax(softmax):
- return min(np.sum(np.cumsum(softmax) < np.random.rand()), len(softmax) - 1)
-
-
-def _SampleModel(prefix_words, vocab):
- """Predict next words using the given prefix words.
-
- Args:
- prefix_words: Prefix words.
- vocab: Vocabulary. Contains max word chard id length and converts between
- words and ids.
- """
- targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32)
-
- sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt)
-
- if prefix_words.find('') != 0:
- prefix_words = ' ' + prefix_words
-
- prefix = [vocab.word_to_id(w) for w in prefix_words.split()]
- prefix_char_ids = [vocab.word_to_char_ids(w) for w in prefix_words.split()]
- for _ in xrange(FLAGS.num_samples):
- inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- char_ids_inputs = np.zeros(
- [BATCH_SIZE, NUM_TIMESTEPS, vocab.max_word_length], np.int32)
- samples = prefix[:]
- char_ids_samples = prefix_char_ids[:]
- sent = ''
- while True:
- inputs[0, 0] = samples[0]
- char_ids_inputs[0, 0, :] = char_ids_samples[0]
- samples = samples[1:]
- char_ids_samples = char_ids_samples[1:]
-
- softmax = sess.run(t['softmax_out'],
- feed_dict={t['char_inputs_in']: char_ids_inputs,
- t['inputs_in']: inputs,
- t['targets_in']: targets,
- t['target_weights_in']: weights})
-
- sample = _SampleSoftmax(softmax[0])
- sample_char_ids = vocab.word_to_char_ids(vocab.id_to_word(sample))
-
- if not samples:
- samples = [sample]
- char_ids_samples = [sample_char_ids]
- sent += vocab.id_to_word(samples[0]) + ' '
- sys.stderr.write('%s\n' % sent)
-
- if (vocab.id_to_word(samples[0]) == '' or
- len(sent) > FLAGS.max_sample_words):
- break
-
-
-def _DumpEmb(vocab):
- """Dump the softmax weights and word embeddings to files.
-
- Args:
- vocab: Vocabulary. Contains vocabulary size and converts word to ids.
- """
- assert FLAGS.save_dir, 'Must specify FLAGS.save_dir for dump_emb.'
- inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32)
-
- sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt)
-
- softmax_weights = sess.run(t['softmax_weights'])
- fname = FLAGS.save_dir + '/embeddings_softmax.npy'
- with tf.gfile.Open(fname, mode='w') as f:
- np.save(f, softmax_weights)
- sys.stderr.write('Finished softmax weights\n')
-
- all_embs = np.zeros([vocab.size, 1024])
- for i in xrange(vocab.size):
- input_dict = {t['inputs_in']: inputs,
- t['targets_in']: targets,
- t['target_weights_in']: weights}
- if 'char_inputs_in' in t:
- input_dict[t['char_inputs_in']] = (
- vocab.word_char_ids[i].reshape([-1, 1, MAX_WORD_LEN]))
- embs = sess.run(t['all_embs'], input_dict)
- all_embs[i, :] = embs
- sys.stderr.write('Finished word embedding %d/%d\n' % (i, vocab.size))
-
- fname = FLAGS.save_dir + '/embeddings_char_cnn.npy'
- with tf.gfile.Open(fname, mode='w') as f:
- np.save(f, all_embs)
- sys.stderr.write('Embedding file saved\n')
-
-
-def _DumpSentenceEmbedding(sentence, vocab):
- """Predict next words using the given prefix words.
-
- Args:
- sentence: Sentence words.
- vocab: Vocabulary. Contains max word chard id length and converts between
- words and ids.
- """
- targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32)
-
- sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt)
-
- if sentence.find('') != 0:
- sentence = ' ' + sentence
-
- word_ids = [vocab.word_to_id(w) for w in sentence.split()]
- char_ids = [vocab.word_to_char_ids(w) for w in sentence.split()]
-
- inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32)
- char_ids_inputs = np.zeros(
- [BATCH_SIZE, NUM_TIMESTEPS, vocab.max_word_length], np.int32)
- for i in xrange(len(word_ids)):
- inputs[0, 0] = word_ids[i]
- char_ids_inputs[0, 0, :] = char_ids[i]
-
- # Add 'lstm/lstm_0/control_dependency' if you want to dump previous layer
- # LSTM.
- lstm_emb = sess.run(t['lstm/lstm_1/control_dependency'],
- feed_dict={t['char_inputs_in']: char_ids_inputs,
- t['inputs_in']: inputs,
- t['targets_in']: targets,
- t['target_weights_in']: weights})
-
- fname = os.path.join(FLAGS.save_dir, 'lstm_emb_step_%d.npy' % i)
- with tf.gfile.Open(fname, mode='w') as f:
- np.save(f, lstm_emb)
- sys.stderr.write('LSTM embedding step %d file saved\n' % i)
-
-
-def main(unused_argv):
- vocab = data_utils.CharsVocabulary(FLAGS.vocab_file, MAX_WORD_LEN)
-
- if FLAGS.mode == 'eval':
- dataset = data_utils.LM1BDataset(FLAGS.input_data, vocab)
- _EvalModel(dataset)
- elif FLAGS.mode == 'sample':
- _SampleModel(FLAGS.prefix, vocab)
- elif FLAGS.mode == 'dump_emb':
- _DumpEmb(vocab)
- elif FLAGS.mode == 'dump_lstm_emb':
- _DumpSentenceEmbedding(FLAGS.sentence, vocab)
- else:
- raise Exception('Mode not supported.')
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lm_commonsense/README.md b/research/lm_commonsense/README.md
deleted file mode 100644
index 78c8f53ca226f09c4b185490d6966f98bf584889..0000000000000000000000000000000000000000
--- a/research/lm_commonsense/README.md
+++ /dev/null
@@ -1,170 +0,0 @@
-
-
-
-
-# A Simple Method for Commonsense Reasoning
-
-This repository contains code to reproduce results from [*A Simple Method for Commonsense Reasoning*](https://arxiv.org/abs/1806.02847).
-
-Authors and contact:
-
-* Trieu H. Trinh (thtrieu@google.com, github: thtrieu)
-* Quoc V. Le (qvl@google.com)
-
-## TL;DR
-
-Commonsense reasoning is a long-standing challenge for deep learning. For example,
-it is difficult to use neural networks to tackle the Winograd Schema dataset - a difficult subset of Pronoun Disambiguation problems. In this work, we use language models to score substitued sentences to decide the correct reference of the ambiguous pronoun (see Figure below for an example).
-
-
-
-This simple unsupervised method achieves new state-of-the-art (*as of June 1st, 2018*) results on both benchmark PDP-60 and WSC-273 (See Table below), without using rule-based reasoning nor expensive annotated knowledge bases.
-
-| Commonsense-reasoning test | Previous best result | Ours |
-| ----------------------------|:----------------------:|:-----:|
-| Pronoun Disambiguation | 66.7% | 70% |
-| Winograd Schema Challenge | 52.8% | 63.7% |
-
-
-
-## Citation
-
-If you use our released models below in your publication, please cite the original paper:
-
-@article{TBD}
-
-
-## Requirements
-* Python >=2.6
-* Tensorflow >= v1.4
-* Numpy >= 1.12.1
-
-## Details of this release
-
-The open-sourced components include:
-
-* Test sets from Pronoun Disambiguation Problem (PDP-60) and Winograd Schema Challenges (WSC-273).
-* Tensorflow metagraph and checkpoints of 14 language models (See Appendix A in the paper).
-* A vocabulary file.
-* Code to reproduce results from the original paper.
-
-## How to run
-
-### 1. Download data files
-
-Download all files from the [Google Cloud Storage of this project](https://console.cloud.google.com/storage/browser/commonsense-reasoning/). The easiest way is to install and use `gsutil cp` command-line tool (See [install gsutil](https://cloud.google.com/storage/docs/gsutil_install)).
-
-
-```shell
-# Download everything from the project gs://commonsense-reasoning
-$ gsutil cp -R gs://commonsense-reasoning/* .
-Copying gs://commonsense-reasoning/reproduce/vocab.txt...
-Copying gs://commonsense-reasoning/reproduce/commonsense_test/pdp60.json...
-Copying gs://commonsense-reasoning/reproduce/commonsense_test/wsc273.json...
-
-...(omitted)
-```
-
-All downloaded content should be in `./reproduce/`. This includes two tests `pdp60.json` and `wsc273.json`, a vocabulary file `vocab.txt` and checkpoints for all 14 language models, each includes three files (`.data`, `.index` and `.meta`). All checkpoint names start with `ckpt-best` since they are saved at the best perplexity on a hold-out text corpus.
-
-```shell
-# Check for the content
-$ ls reproduce/*
-reproduce/vocab.txt
-
-reproduce/commonsense_test:
-pdp60.json wsc273.json
-
-reproduce/lm01:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm02:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm03:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm04:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm05:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm06:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm07:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm08:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm09:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm10:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm11:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm12:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm13:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-
-reproduce/lm14:
-ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta
-```
-
-### 2. Run evaluation code
-
-To reproduce results from the paper, simply run `eval.py` script.
-
-```shell
-$ python eval.py --data_dir=reproduce
-
-Restored from ./reproduce/lm01
-Reset RNN states.
-Processing patch (1, 1) / (2, 4)
-Probs for
-[['Then' 'Dad' 'figured' ..., 'man' "'s" 'board-bill']
- ['Then' 'Dad' 'figured' ..., 'man' "'s" 'board-bill']
- ['Always' 'before' ',' ..., 'now' ',' 'for']
- ...,
- ['Mark' 'was' 'close' ..., 'promising' 'him' ',']
- ['Mark' 'was' 'close' ..., 'promising' 'him' ',']
- ['Mark' 'was' 'close' ..., 'promising' 'him' ',']]
-=
-[[ 1.64250596e-05 1.77780055e-06 4.14267970e-06 ..., 1.87315454e-03
- 1.57723188e-01 6.31845817e-02]
- [ 1.64250596e-05 1.77780055e-06 4.14267970e-06 ..., 1.87315454e-03
- 1.57723188e-01 6.31845817e-02]
- [ 1.28243030e-07 3.80435935e-03 1.12383246e-01 ..., 9.67682712e-03
- 2.17407525e-01 1.08243264e-01]
- ...,
- [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05
- 3.37066874e-02 7.89367408e-02]
- [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05
- 3.37066874e-02 7.89367408e-02]
- [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05
- 3.37066874e-02 7.89367408e-02]]
-Processing patch (1, 2) / (2, 4)
-
-...(omitted)
-
-Accuracy of 1 LM(s) on pdp60 = 0.6
-
-...(omitted)
-
-Accuracy of 5 LM(s) on pdp60 = 0.7
-
-...(omitted)
-
-Accuracy of 10 LM(s) on wsc273 = 0.615
-
-...(omitted)
-
-Accuracy of 14 LM(s) on wsc273 = 0.637
-```
diff --git a/research/lm_commonsense/eval.py b/research/lm_commonsense/eval.py
deleted file mode 100644
index e5b7ff98b50a5af4e066d3d9f82c1acae81c3e93..0000000000000000000000000000000000000000
--- a/research/lm_commonsense/eval.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import pickle as pkl
-import numpy as np
-import tensorflow as tf
-import utils
-
-tf.app.flags.DEFINE_string(
- 'data_dir', 'reproduce',
- 'Path to directory containing data and model checkpoints.')
-
-
-FLAGS = tf.app.flags.FLAGS
-
-
-class EnsembleLM(object):
- """Ensemble of language models."""
-
- def __init__(self, test_data_name='wsc273'):
- vocab_file = os.path.join(FLAGS.data_dir, 'vocab.txt')
- self.vocab = utils.CharsVocabulary(vocab_file, 50)
- assert test_data_name in ['pdp60', 'wsc273'], (
- 'Test data must be pdp60 or wsc273, got {}'.format(test_data_name))
- self.test_data_name = test_data_name
-
- test_data = utils.parse_commonsense_reasoning_test(test_data_name)
- self.question_ids, self.sentences, self.labels = test_data
- self.all_probs = [] # aggregate single-model prediction here.
-
- def add_single_model(self, model_name='lm1'):
- """Add a single model into the current ensemble."""
- # Create single LM
- single_lm = SingleRecurrentLanguageModel(self.vocab, model_name)
-
- # Add the single LM prediction.
- probs = single_lm.assign_probs(self.sentences, self.test_data_name)
- self.all_probs.append(probs)
- print('Done adding {}'.format(model_name))
-
- def evaluate(self):
- """Evaluate the current ensemble."""
- # Attach word probabilities and correctness label to each substitution
- ensembled_probs = sum(self.all_probs) / len(self.all_probs)
- scorings = []
- for i, sentence in enumerate(self.sentences):
- correctness = self.labels[i]
- word_probs = ensembled_probs[i, :len(sentence)]
- joint_prob = np.prod(word_probs, dtype=np.float64)
-
- scorings.append(dict(
- correctness=correctness,
- sentence=sentence,
- joint_prob=joint_prob,
- word_probs=word_probs))
- scoring_mode = 'full' if self.test_data_name == 'pdp60' else 'partial'
- return utils.compare_substitutions(
- self.question_ids, scorings, scoring_mode)
-
-
-class SingleRecurrentLanguageModel(object):
- """Single Recurrent Language Model."""
-
- def __init__(self, vocab, model_name='lm01'):
- self.vocab = vocab
- self.log_dir = os.path.join(FLAGS.data_dir, model_name)
-
- def reset(self):
- self.sess.run(self.tensors['states_init'])
-
- def _score(self, word_patch):
- """Score a matrix of shape (batch_size, num_timesteps+1) str tokens."""
- word_ids = np.array(
- [[self.vocab.word_to_id(word) for word in row]
- for row in word_patch])
- char_ids = np.array(
- [[self.vocab.word_to_char_ids(word) for word in row]
- for row in word_patch])
- print('Probs for \n{}\n='.format(np.array(word_patch)[:, 1:]))
-
- input_ids, target_ids = word_ids[:, :-1], word_ids[:, 1:]
- input_char_ids = char_ids[:, :-1, :]
-
- softmax = self.sess.run(self.tensors['softmax_out'], feed_dict={
- self.tensors['inputs_in']: input_ids,
- self.tensors['char_inputs_in']: input_char_ids
- })
-
- batch_size, num_timesteps = self.shape
- softmax = softmax.reshape((num_timesteps, batch_size, -1))
- softmax = np.transpose(softmax, [1, 0, 2])
- probs = np.array([[softmax[row, col, target_ids[row, col]]
- for col in range(num_timesteps)]
- for row in range(batch_size)])
- print(probs)
- return probs
-
- def _score_patches(self, word_patches):
- """Score a 2D matrix of word_patches and stitch results together."""
- batch_size, num_timesteps = self.shape
- nrow, ncol = len(word_patches), len(word_patches[0])
- max_len = num_timesteps * ncol
- probs = np.zeros([0, max_len]) # accumulate results into this.
-
- # Loop through the 2D matrix of word_patches and score each.
- for i, row in enumerate(word_patches):
- print('Reset RNN states.')
- self.reset() # reset states before processing each row.
- row_probs = np.zeros([batch_size, 0])
- for j, word_patch in enumerate(row):
- print('Processing patch '
- '({}, {}) / ({}, {})'.format(i+1, j+1, nrow, ncol))
- patch_probs = (self._score(word_patch) if word_patch else
- np.zeros([batch_size, num_timesteps]))
- row_probs = np.concatenate([row_probs, patch_probs], 1)
- probs = np.concatenate([probs, row_probs], 0)
- return probs
-
- def assign_probs(self, sentences, test_data_name='wsc273'):
- """Return prediction accuracy using this LM for a test."""
-
- probs_cache = os.path.join(self.log_dir, '{}.probs'.format(test_data_name))
- if os.path.exists(probs_cache):
- print('Reading cached result from {}'.format(probs_cache))
- with tf.gfile.Open(probs_cache, 'r') as f:
- probs = pkl.load(f)
- else:
- tf.reset_default_graph()
- self.sess = tf.Session()
- # Build the graph.
- saver = tf.train.import_meta_graph(
- os.path.join(self.log_dir, 'ckpt-best.meta'))
- saver.restore(self.sess, os.path.join(self.log_dir, 'ckpt-best'))
- print('Restored from {}'.format(self.log_dir))
- graph = tf.get_default_graph()
- self.tensors = dict(
- inputs_in=graph.get_tensor_by_name('test_inputs_in:0'),
- char_inputs_in=graph.get_tensor_by_name('test_char_inputs_in:0'),
- softmax_out=graph.get_tensor_by_name('SotaRNN_1/softmax_out:0'),
- states_init=graph.get_operation_by_name('SotaRNN_1/states_init'))
- self.shape = self.tensors['inputs_in'].shape.as_list()
-
- # Cut sentences into patches of shape processable by the LM.
- batch_size, num_timesteps = self.shape
- word_patches = utils.cut_to_patches(sentences, batch_size, num_timesteps)
- probs = self._score_patches(word_patches)
-
- # Cache the probs since they are expensive to evaluate
- with tf.gfile.Open(probs_cache, 'w') as f:
- pkl.dump(probs, f)
- return probs
-
-
-def evaluate_ensemble(test_data_name, number_of_lms):
- ensemble = EnsembleLM(test_data_name)
- model_list = ['lm{:02d}'.format(i+1) for i in range(number_of_lms)]
- for model_name in model_list:
- ensemble.add_single_model(model_name)
- accuracy = ensemble.evaluate()
- print('Accuracy of {} LM(s) on {} = {}'.format(
- number_of_lms, test_data_name, accuracy))
-
-
-def main(_):
- evaluate_ensemble('pdp60', 1) # 60%
- evaluate_ensemble('pdp60', 5) # 70%
- evaluate_ensemble('wsc273', 10) # 61.5%
- evaluate_ensemble('wsc273', 14) # 63.7%
-
-
-if __name__ == '__main__':
- tf.app.run(main)
diff --git a/research/lm_commonsense/method.jpg b/research/lm_commonsense/method.jpg
deleted file mode 100644
index ee8a5506fccca3cbb67f7bda0ccef78303cb228b..0000000000000000000000000000000000000000
Binary files a/research/lm_commonsense/method.jpg and /dev/null differ
diff --git a/research/lm_commonsense/utils.py b/research/lm_commonsense/utils.py
deleted file mode 100644
index d75f2b0fb72716860ea6d438e6b8ca2732d13c84..0000000000000000000000000000000000000000
--- a/research/lm_commonsense/utils.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-import os
-import numpy as np
-import tensorflow as tf
-
-FLAGS = tf.flags.FLAGS
-
-
-class Vocabulary(object):
- """Class that holds a vocabulary for the dataset."""
-
- def __init__(self, filename):
-
- self._id_to_word = []
- self._word_to_id = {}
- self._unk = -1
- self._bos = -1
- self._eos = -1
-
- with tf.gfile.Open(filename) as f:
- idx = 0
- for line in f:
- word_name = line.strip()
- if word_name == '':
- self._bos = idx
- elif word_name == '':
- self._eos = idx
- elif word_name == '':
- self._unk = idx
- if word_name == '!!!MAXTERMID':
- continue
-
- self._id_to_word.append(word_name)
- self._word_to_id[word_name] = idx
- idx += 1
-
- @property
- def bos(self):
- return self._bos
-
- @property
- def eos(self):
- return self._eos
-
- @property
- def unk(self):
- return self._unk
-
- @property
- def size(self):
- return len(self._id_to_word)
-
- def word_to_id(self, word):
- if word in self._word_to_id:
- return self._word_to_id[word]
- else:
- if word.lower() in self._word_to_id:
- return self._word_to_id[word.lower()]
- return self.unk
-
- def id_to_word(self, cur_id):
- if cur_id < self.size:
- return self._id_to_word[int(cur_id)]
- return ''
-
- def decode(self, cur_ids):
- return ' '.join([self.id_to_word(cur_id) for cur_id in cur_ids])
-
- def encode(self, sentence):
- word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()]
- return np.array([self.bos] + word_ids + [self.eos], dtype=np.int32)
-
-
-class CharsVocabulary(Vocabulary):
- """Vocabulary containing character-level information."""
-
- def __init__(self, filename, max_word_length):
- super(CharsVocabulary, self).__init__(filename)
-
- self._max_word_length = max_word_length
- chars_set = set()
-
- for word in self._id_to_word:
- chars_set |= set(word)
-
- free_ids = []
- for i in range(256):
- if chr(i) in chars_set:
- continue
- free_ids.append(chr(i))
-
- if len(free_ids) < 5:
- raise ValueError('Not enough free char ids: %d' % len(free_ids))
-
- self.bos_char = free_ids[0] #
- self.eos_char = free_ids[1] #
- self.bow_char = free_ids[2] #
- self.eow_char = free_ids[3] #
- self.pad_char = free_ids[4] #
-
- chars_set |= {self.bos_char, self.eos_char, self.bow_char, self.eow_char,
- self.pad_char}
-
- self._char_set = chars_set
- num_words = len(self._id_to_word)
-
- self._word_char_ids = np.zeros([num_words, max_word_length], dtype=np.int32)
-
- self.bos_chars = self._convert_word_to_char_ids(self.bos_char)
- self.eos_chars = self._convert_word_to_char_ids(self.eos_char)
-
- for i, word in enumerate(self._id_to_word):
- if i == self.bos:
- self._word_char_ids[i] = self.bos_chars
- elif i == self.eos:
- self._word_char_ids[i] = self.eos_chars
- else:
- self._word_char_ids[i] = self._convert_word_to_char_ids(word)
-
- @property
- def max_word_length(self):
- return self._max_word_length
-
- def _convert_word_to_char_ids(self, word):
- code = np.zeros([self.max_word_length], dtype=np.int32)
- code[:] = ord(self.pad_char)
-
- if len(word) > self.max_word_length - 2:
- word = word[:self.max_word_length-2]
- cur_word = self.bow_char + word + self.eow_char
- for j in range(len(cur_word)):
- code[j] = ord(cur_word[j])
- return code
-
- def word_to_char_ids(self, word):
- if word in self._word_to_id:
- return self._word_char_ids[self._word_to_id[word]]
- else:
- return self._convert_word_to_char_ids(word)
-
- def encode_chars(self, sentence):
- chars_ids = [self.word_to_char_ids(cur_word)
- for cur_word in sentence.split()]
- return np.vstack([self.bos_chars] + chars_ids + [self.eos_chars])
-
-
-_SPECIAL_CHAR_MAP = {
- '\xe2\x80\x98': '\'',
- '\xe2\x80\x99': '\'',
- '\xe2\x80\x9c': '"',
- '\xe2\x80\x9d': '"',
- '\xe2\x80\x93': '-',
- '\xe2\x80\x94': '-',
- '\xe2\x88\x92': '-',
- '\xce\x84': '\'',
- '\xc2\xb4': '\'',
- '`': '\''
-}
-
-_START_SPECIAL_CHARS = ['.', ',', '?', '!', ';', ':', '[', ']', '\'', '+', '/',
- '\xc2\xa3', '$', '~', '*', '%', '{', '}', '#', '&', '-',
- '"', '(', ')', '='] + list(_SPECIAL_CHAR_MAP.keys())
-_SPECIAL_CHARS = _START_SPECIAL_CHARS + [
- '\'s', '\'m', '\'t', '\'re', '\'d', '\'ve', '\'ll']
-
-
-def tokenize(sentence):
- """Tokenize a sentence."""
- sentence = str(sentence)
- words = sentence.strip().split()
- tokenized = [] # return this
-
- for word in words:
- if word.lower() in ['mr.', 'ms.']:
- tokenized.append(word)
- continue
-
- # Split special chars at the start of word
- will_split = True
- while will_split:
- will_split = False
- for char in _START_SPECIAL_CHARS:
- if word.startswith(char):
- tokenized.append(char)
- word = word[len(char):]
- will_split = True
-
- # Split special chars at the end of word
- special_end_tokens = []
- will_split = True
- while will_split:
- will_split = False
- for char in _SPECIAL_CHARS:
- if word.endswith(char):
- special_end_tokens = [char] + special_end_tokens
- word = word[:-len(char)]
- will_split = True
-
- if word:
- tokenized.append(word)
- tokenized += special_end_tokens
-
- # Add necessary end of sentence token.
- if tokenized[-1] not in ['.', '!', '?']:
- tokenized += ['.']
- return tokenized
-
-
-def parse_commonsense_reasoning_test(test_data_name):
- """Read JSON test data."""
- with tf.gfile.Open(os.path.join(
- FLAGS.data_dir, 'commonsense_test',
- '{}.json'.format(test_data_name)), 'r') as f:
- data = json.load(f)
-
- question_ids = [d['question_id'] for d in data]
- sentences = [tokenize(d['substitution']) for d in data]
- labels = [d['correctness'] for d in data]
-
- return question_ids, sentences, labels
-
-
-PAD = ''
-
-
-def cut_to_patches(sentences, batch_size, num_timesteps):
- """Cut sentences into patches of shape (batch_size, num_timesteps).
-
- Args:
- sentences: a list of sentences, each sentence is a list of str token.
- batch_size: batch size
- num_timesteps: number of backprop step
-
- Returns:
- patches: A 2D matrix,
- each entry is a matrix of shape (batch_size, num_timesteps).
- """
- preprocessed = [['']+sentence+[''] for sentence in sentences]
- max_len = max([len(sent) for sent in preprocessed])
-
- # Pad to shape [height, width]
- # where height is a multiple of batch_size
- # and width is a multiple of num_timesteps
- nrow = int(np.ceil(len(preprocessed) * 1.0 / batch_size))
- ncol = int(np.ceil(max_len * 1.0 / num_timesteps))
- height, width = nrow * batch_size, ncol * num_timesteps + 1
- preprocessed = [sent + [PAD] * (width - len(sent)) for sent in preprocessed]
- preprocessed += [[PAD] * width] * (height - len(preprocessed))
-
- # Cut preprocessed into patches of shape [batch_size, num_timesteps]
- patches = []
- for row in range(nrow):
- patches.append([])
- for col in range(ncol):
- patch = [sent[col * num_timesteps:
- (col+1) * num_timesteps + 1]
- for sent in preprocessed[row * batch_size:
- (row+1) * batch_size]]
- if np.all(np.array(patch)[:, 1:] == PAD):
- patch = None # no need to process this patch.
- patches[-1].append(patch)
- return patches
-
-
-def _substitution_mask(sent1, sent2):
- """Binary mask identifying substituted part in two sentences.
-
- Example sentence and their mask:
- First sentence = "I like the cat 's color"
- 0 0 0 1 0 0
- Second sentence = "I like the yellow dog 's color"
- 0 0 0 1 1 0 0
-
- Args:
- sent1: first sentence
- sent2: second sentence
-
- Returns:
- mask1: mask for first sentence
- mask2: mask for second sentence
- """
- mask1_start, mask2_start = [], []
- while sent1[0] == sent2[0]:
- sent1 = sent1[1:]
- sent2 = sent2[1:]
- mask1_start.append(0.)
- mask2_start.append(0.)
-
- mask1_end, mask2_end = [], []
- while sent1[-1] == sent2[-1]:
- if (len(sent1) == 1) or (len(sent2) == 1):
- break
- sent1 = sent1[:-1]
- sent2 = sent2[:-1]
- mask1_end = [0.] + mask1_end
- mask2_end = [0.] + mask2_end
-
- assert sent1 or sent2, 'Two sentences are identical.'
- return (mask1_start + [1.] * len(sent1) + mask1_end,
- mask2_start + [1.] * len(sent2) + mask2_end)
-
-
-def _convert_to_partial(scoring1, scoring2):
- """Convert full scoring into partial scoring."""
- mask1, mask2 = _substitution_mask(
- scoring1['sentence'], scoring2['sentence'])
-
- def _partial_score(scoring, mask):
- word_probs = [max(_) for _ in zip(scoring['word_probs'], mask)]
- scoring.update(word_probs=word_probs,
- joint_prob=np.prod(word_probs))
-
- _partial_score(scoring1, mask1)
- _partial_score(scoring2, mask2)
-
-
-def compare_substitutions(question_ids, scorings, mode='full'):
- """Return accuracy by comparing two consecutive scorings."""
- prediction_correctness = []
- # Compare two consecutive substitutions
- for i in range(len(scorings) // 2):
- scoring1, scoring2 = scorings[2*i: 2*i+2]
- if mode == 'partial': # fix joint prob into partial prob
- _convert_to_partial(scoring1, scoring2)
-
- prediction_correctness.append(
- (scoring2['joint_prob'] > scoring1['joint_prob']) ==
- scoring2['correctness'])
-
- # Two consecutive substitutions always belong to the same question
- question_ids = [qid for i, qid in enumerate(question_ids) if i % 2 == 0]
- assert len(question_ids) == len(prediction_correctness)
- num_questions = len(set(question_ids))
-
- # Question is correctly answered only if
- # all predictions of the same question_id is correct
- num_correct_answer = 0
- previous_qid = None
- correctly_answered = False
- for predict, qid in zip(prediction_correctness, question_ids):
- if qid != previous_qid:
- previous_qid = qid
- num_correct_answer += int(correctly_answered)
- correctly_answered = True
- correctly_answered = correctly_answered and predict
- num_correct_answer += int(correctly_answered)
-
- return num_correct_answer / num_questions
diff --git a/research/lstm_object_detection/README.md b/research/lstm_object_detection/README.md
deleted file mode 100644
index a696ba3df306768cfa28223ad957ef564667c7dd..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Tensorflow Mobile Video Object Detection
-
-Tensorflow mobile video object detection implementation proposed in the
-following papers:
-
-
-
-
-
-```
-"Mobile Video Object Detection with Temporally-Aware Feature Maps",
-Liu, Mason and Zhu, Menglong, CVPR 2018.
-```
-\[[link](http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf)\]\[[bibtex](
-https://scholar.googleusercontent.com/scholar.bib?q=info:hq5rcMUUXysJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAXLdwXcU5g_wiMQ40EvbHQ9kTyvfUxffh&scisf=4&ct=citation&cd=-1&hl=en)\]
-
-
-
-
-
-
-```
-"Looking Fast and Slow: Memory-Guided Mobile Video Object Detection",
-Liu, Mason and Zhu, Menglong and White, Marie and Li, Yinxiao and Kalenichenko, Dmitry
-```
-\[[link](https://arxiv.org/abs/1903.10172)\]\[[bibtex](
-https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAXLdwNf-LJlm2M1ymQHbq2wYA995MHpJu&scisf=4&ct=citation&cd=-1&hl=en)\]
-
-
-## Maintainers
-* masonliuw@gmail.com
-* yinxiao@google.com
-* menglong@google.com
-* yongzhe@google.com
-* lzyuan@google.com
-
-
-## Table of Contents
-
- * Exporting a trained model
diff --git a/research/lstm_object_detection/__init__.py b/research/lstm_object_detection/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/builders/__init__.py b/research/lstm_object_detection/builders/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/builders/graph_rewriter_builder.py b/research/lstm_object_detection/builders/graph_rewriter_builder.py
deleted file mode 100644
index accced2f0fccec190894348d5518bd991332fc71..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/builders/graph_rewriter_builder.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Custom version for quantized training and evaluation functions.
-
-The main difference between this and the third_party graph_rewriter_builder.py
-is that this version uses experimental_create_training_graph which allows the
-customization of freeze_bn_delay.
-"""
-
-import re
-import tensorflow.compat.v1 as tf
-from tensorflow.contrib import layers as contrib_layers
-from tensorflow.contrib import quantize as contrib_quantize
-from tensorflow.contrib.quantize.python import common
-from tensorflow.contrib.quantize.python import input_to_ops
-from tensorflow.contrib.quantize.python import quant_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-
-
-def build(graph_rewriter_config,
- quant_overrides_config=None,
- is_training=True,
- is_export=False):
- """Returns a function that modifies default graph based on options.
-
- Args:
- graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto.
- quant_overrides_config: quant_overrides_pb2.QuantOverrides proto.
- is_training: whether in training or eval mode.
- is_export: whether exporting the graph.
- """
- def graph_rewrite_fn():
- """Function to quantize weights and activation of the default graph."""
- if (graph_rewriter_config.quantization.weight_bits != 8 or
- graph_rewriter_config.quantization.activation_bits != 8):
- raise ValueError('Only 8bit quantization is supported')
-
- graph = tf.get_default_graph()
-
- # Insert custom quant ops.
- if quant_overrides_config is not None:
- input_to_ops_map = input_to_ops.InputToOps(graph)
- for q in quant_overrides_config.quant_configs:
- producer = graph.get_operation_by_name(q.op_name)
- if producer is None:
- raise ValueError('Op name does not exist in graph.')
- context = _get_context_from_op(producer)
- consumers = input_to_ops_map.ConsumerOperations(producer)
- if q.fixed_range:
- _insert_fixed_quant_op(
- context,
- q.quant_op_name,
- producer,
- consumers,
- init_min=q.min,
- init_max=q.max,
- quant_delay=q.delay if is_training else 0)
- else:
- raise ValueError('Learned ranges are not yet supported.')
-
- # Quantize the graph by inserting quantize ops for weights and activations
- if is_training:
- contrib_quantize.experimental_create_training_graph(
- input_graph=graph,
- quant_delay=graph_rewriter_config.quantization.delay,
- freeze_bn_delay=graph_rewriter_config.quantization.delay)
- else:
- contrib_quantize.experimental_create_eval_graph(
- input_graph=graph,
- quant_delay=graph_rewriter_config.quantization.delay
- if not is_export else 0)
-
- contrib_layers.summarize_collection('quant_vars')
-
- return graph_rewrite_fn
-
-
-def _get_context_from_op(op):
- """Gets the root context name from the op name."""
- context_re = re.search(r'^(.*)/([^/]+)', op.name)
- if context_re:
- return context_re.group(1)
- return ''
-
-
-def _insert_fixed_quant_op(context,
- name,
- producer,
- consumers,
- init_min=-6.0,
- init_max=6.0,
- quant_delay=None):
- """Adds a fake quant op with fixed ranges.
-
- Args:
- context: The parent scope of the op to be quantized.
- name: The name of the fake quant op.
- producer: The producer op to be quantized.
- consumers: The consumer ops to the producer op.
- init_min: The minimum range for the fake quant op.
- init_max: The maximum range for the fake quant op.
- quant_delay: Number of steps to wait before activating the fake quant op.
-
- Raises:
- ValueError: When producer operation is not directly connected to the
- consumer operation.
- """
- name_prefix = name if not context else context + '/' + name
- inputs = producer.outputs[0]
- quant = quant_ops.FixedQuantize(
- inputs, init_min=init_min, init_max=init_max, scope=name_prefix)
-
- if quant_delay and quant_delay > 0:
- activate_quant = math_ops.greater_equal(
- common.CreateOrGetQuantizationStep(),
- quant_delay,
- name=name_prefix + '/activate_quant')
- quant = control_flow_ops.cond(
- activate_quant,
- lambda: quant,
- lambda: inputs,
- name=name_prefix + '/delayed_quant')
-
- if consumers:
- tensors_modified_count = common.RerouteTensor(
- quant, inputs, can_modify=consumers)
- # Some operations can have multiple output tensors going to the same
- # consumer. Since consumers is a set, we need to ensure that
- # tensors_modified_count is greater than or equal to the length of the set
- # of consumers.
- if tensors_modified_count < len(consumers):
- raise ValueError('No inputs quantized for ops: [%s]' % ', '.join(
- [consumer.name for consumer in consumers]))
diff --git a/research/lstm_object_detection/builders/graph_rewriter_builder_test.py b/research/lstm_object_detection/builders/graph_rewriter_builder_test.py
deleted file mode 100644
index e06a9f5a3d729fe122bc00e74e2d158b3d06482e..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/builders/graph_rewriter_builder_test.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for graph_rewriter_builder."""
-import mock
-import tensorflow.compat.v1 as tf
-from tensorflow.contrib import layers as contrib_layers
-from tensorflow.contrib import quantize as contrib_quantize
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from lstm_object_detection.builders import graph_rewriter_builder
-from lstm_object_detection.protos import quant_overrides_pb2
-from object_detection.protos import graph_rewriter_pb2
-
-
-class QuantizationBuilderTest(tf.test.TestCase):
-
- def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
- with mock.patch.object(
- contrib_quantize,
- 'experimental_create_training_graph') as mock_quant_fn:
- with mock.patch.object(contrib_layers,
- 'summarize_collection') as mock_summarize_col:
- graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
- graph_rewriter_proto.quantization.delay = 10
- graph_rewriter_proto.quantization.weight_bits = 8
- graph_rewriter_proto.quantization.activation_bits = 8
- graph_rewrite_fn = graph_rewriter_builder.build(
- graph_rewriter_proto, is_training=True)
- graph_rewrite_fn()
- _, kwargs = mock_quant_fn.call_args
- self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
- self.assertEqual(kwargs['quant_delay'], 10)
- mock_summarize_col.assert_called_with('quant_vars')
-
- def testQuantizationBuilderSetsUpCorrectEvalArguments(self):
- with mock.patch.object(contrib_quantize,
- 'experimental_create_eval_graph') as mock_quant_fn:
- with mock.patch.object(contrib_layers,
- 'summarize_collection') as mock_summarize_col:
- graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
- graph_rewriter_proto.quantization.delay = 10
- graph_rewrite_fn = graph_rewriter_builder.build(
- graph_rewriter_proto, is_training=False)
- graph_rewrite_fn()
- _, kwargs = mock_quant_fn.call_args
- self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
- mock_summarize_col.assert_called_with('quant_vars')
-
- def testQuantizationBuilderAddsQuantOverride(self):
- graph = ops.Graph()
- with graph.as_default():
- self._buildGraph()
-
- quant_overrides_proto = quant_overrides_pb2.QuantOverrides()
- quant_config = quant_overrides_proto.quant_configs.add()
- quant_config.op_name = 'test_graph/add_ab'
- quant_config.quant_op_name = 'act_quant'
- quant_config.fixed_range = True
- quant_config.min = 0
- quant_config.max = 6
- quant_config.delay = 100
-
- graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
- graph_rewriter_proto.quantization.delay = 10
- graph_rewriter_proto.quantization.weight_bits = 8
- graph_rewriter_proto.quantization.activation_bits = 8
-
- graph_rewrite_fn = graph_rewriter_builder.build(
- graph_rewriter_proto,
- quant_overrides_config=quant_overrides_proto,
- is_training=True)
- graph_rewrite_fn()
-
- act_quant_found = False
- quant_delay_found = False
- for op in graph.get_operations():
- if (quant_config.quant_op_name in op.name and
- op.type == 'FakeQuantWithMinMaxArgs'):
- act_quant_found = True
- min_val = op.get_attr('min')
- max_val = op.get_attr('max')
- self.assertEqual(min_val, quant_config.min)
- self.assertEqual(max_val, quant_config.max)
- if ('activate_quant' in op.name and
- quant_config.quant_op_name in op.name and op.type == 'Const'):
- tensor = op.get_attr('value')
- if tensor.int64_val[0] == quant_config.delay:
- quant_delay_found = True
-
- self.assertTrue(act_quant_found)
- self.assertTrue(quant_delay_found)
-
- def _buildGraph(self, scope='test_graph'):
- with ops.name_scope(scope):
- a = tf.constant(10, dtype=dtypes.float32, name='input_a')
- b = tf.constant(20, dtype=dtypes.float32, name='input_b')
- ab = tf.add(a, b, name='add_ab')
- c = tf.constant(30, dtype=dtypes.float32, name='input_c')
- abc = tf.multiply(ab, c, name='mul_ab_c')
- return abc
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config b/research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
deleted file mode 100644
index 536d7d5327114efa159475433f051c627043e64f..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/configs/lstm_ssd_interleaved_mobilenet_v2_imagenet.config
+++ /dev/null
@@ -1,239 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# For training on Imagenet Video with LSTM Interleaved Mobilenet V2
-
-[lstm_object_detection.protos.lstm_model] {
- train_unroll_length: 4
- eval_unroll_length: 4
- lstm_state_depth: 320
- depth_multipliers: 1.4
- depth_multipliers: 0.35
- pre_bottleneck: true
- low_res: true
- train_interleave_method: 'RANDOM_SKIP_SMALL'
- eval_interleave_method: 'SKIP3'
-}
-model {
- ssd {
- num_classes: 30 # Num of class for imagenet vid dataset.
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 5
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 3
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 3
- box_code_size: 4
- apply_sigmoid_to_scores: false
- use_depthwise: true
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- }
- feature_extractor {
- type: 'lstm_ssd_interleaved_mobilenet_v2'
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 4.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: -20.0
- iou_threshold: 0.5
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 8
- optimizer {
- use_moving_average: false
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.002
- decay_steps: 200000
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
- gradient_clipping_by_norm: 10.0
- batch_queue_capacity: 12
- prefetch_queue_capacity: 4
-}
-
-train_input_reader: {
- shuffle_buffer_size: 32
- queue_capacity: 12
- prefetch_size: 12
- min_after_dequeue: 4
- label_map_path: "path/to/label_map"
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- tf_record_video_input_reader: {
- input_path: '/data/lstm_detection/tfrecords/test.tfrecord'
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 4
- }
- }
- }
-}
-
-eval_config: {
- metrics_set: "coco_evaluation_all_frames"
- use_moving_averages: true
- min_score_threshold: 0.5
- max_num_boxes_to_visualize: 300
- visualize_groundtruth_boxes: true
- groundtruth_box_visualization_color: "red"
-}
-
-eval_input_reader {
- label_map_path: "path/to/label_map"
- shuffle: true
- num_epochs: 1
- num_parallel_batches: 1
- num_readers: 1
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- tf_record_video_input_reader: {
- input_path: "path/to/sequence_example/data"
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 10
- }
- }
- }
-}
-
-eval_input_reader: {
- label_map_path: "path/to/label_map"
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- tf_record_video_input_reader: {
- input_path: "path/to/sequence_example/data"
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 4
- }
- }
- }
- shuffle: true
- num_readers: 1
-}
diff --git a/research/lstm_object_detection/configs/lstm_ssd_mobilenet_v1_imagenet.config b/research/lstm_object_detection/configs/lstm_ssd_mobilenet_v1_imagenet.config
deleted file mode 100644
index cb357ec17eeb80795d48a5aea50f98f3934ff1ad..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/configs/lstm_ssd_mobilenet_v1_imagenet.config
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# For training on Imagenet Video with LSTM Mobilenet V1
-
-[lstm_object_detection.protos.lstm_model] {
- train_unroll_length: 4
- eval_unroll_length: 4
-}
-
-model {
- ssd {
- num_classes: 30 # Num of class for imagenet vid dataset.
- box_coder {
- faster_rcnn_box_coder {
- y_scale: 10.0
- x_scale: 10.0
- height_scale: 5.0
- width_scale: 5.0
- }
- }
- matcher {
- argmax_matcher {
- matched_threshold: 0.5
- unmatched_threshold: 0.5
- ignore_thresholds: false
- negatives_lower_than_unmatched: true
- force_match_for_each_row: true
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- num_layers: 5
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 256
- width: 256
- }
- }
- box_predictor {
- convolutional_box_predictor {
- min_depth: 0
- max_depth: 0
- num_layers_before_predictor: 3
- use_dropout: false
- dropout_keep_probability: 0.8
- kernel_size: 3
- box_code_size: 4
- apply_sigmoid_to_scores: false
- use_depthwise: true
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- }
- feature_extractor {
- type: 'lstm_mobilenet_v1'
- min_depth: 16
- depth_multiplier: 1.0
- use_depthwise: true
- conv_hyperparams {
- activation: RELU_6,
- regularizer {
- l2_regularizer {
- weight: 0.00004
- }
- }
- initializer {
- truncated_normal_initializer {
- stddev: 0.03
- mean: 0.0
- }
- }
- batch_norm {
- train: true,
- scale: true,
- center: true,
- decay: 0.9997,
- epsilon: 0.001,
- }
- }
- }
- loss {
- classification_loss {
- weighted_sigmoid {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- hard_example_miner {
- num_hard_examples: 3000
- iou_threshold: 0.99
- loss_type: CLASSIFICATION
- max_negatives_per_positive: 3
- min_negatives_per_image: 0
- }
- classification_weight: 1.0
- localization_weight: 4.0
- }
- normalize_loss_by_num_matches: true
- post_processing {
- batch_non_max_suppression {
- score_threshold: -20.0
- iou_threshold: 0.5
- max_detections_per_class: 100
- max_total_detections: 100
- }
- score_converter: SIGMOID
- }
- }
-}
-
-train_config: {
- batch_size: 8
- data_augmentation_options {
- random_horizontal_flip {
- }
- }
- data_augmentation_options {
- ssd_random_crop {
- }
- }
- optimizer {
- use_moving_average: false
- rms_prop_optimizer: {
- learning_rate: {
- exponential_decay_learning_rate {
- initial_learning_rate: 0.002
- decay_steps: 200000
- decay_factor: 0.95
- }
- }
- momentum_optimizer_value: 0.9
- decay: 0.9
- epsilon: 1.0
- }
- }
-
- from_detection_checkpoint: true
- gradient_clipping_by_norm: 10.0
- batch_queue_capacity: 12
- prefetch_queue_capacity: 4
- fine_tune_checkpoint: "/path/to/checkpoint/"
- fine_tune_checkpoint_type: "detection"
-}
-
-
-train_input_reader: {
- shuffle_buffer_size: 32
- queue_capacity: 12
- prefetch_size: 12
- min_after_dequeue: 4
- label_map_path: "path/to/label_map"
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- tf_record_video_input_reader: {
- input_path: "path/to/sequence_example/data"
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 4
- }
- }
- }
-}
-
-eval_config: {
- metrics_set: "coco_evaluation_all_frames"
- use_moving_averages: true
- min_score_threshold: 0.5
- max_num_boxes_to_visualize: 300
- visualize_groundtruth_boxes: true
- groundtruth_box_visualization_color: "red"
-}
-
-eval_input_reader: {
- label_map_path: "path/to/label_map"
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- tf_record_video_input_reader: {
- input_path: "path/to/sequence_example/data"
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 4
- }
- }
- }
- shuffle: true
- num_readers: 1
-}
diff --git a/research/lstm_object_detection/eval.py b/research/lstm_object_detection/eval.py
deleted file mode 100644
index aac25c1182bd354b710a7bb83c7bd68365f14fed..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/eval.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""Evaluation executable for detection models.
-
-This executable is used to evaluate DetectionModels. Example usage:
- ./eval \
- --logtostderr \
- --checkpoint_dir=path/to/checkpoint_dir \
- --eval_dir=path/to/eval_dir \
- --pipeline_config_path=pipeline_config.pbtxt
-"""
-
-import functools
-import os
-import tensorflow.compat.v1 as tf
-from google.protobuf import text_format
-from lstm_object_detection import evaluator
-from lstm_object_detection import model_builder
-from lstm_object_detection.inputs import seq_dataset_builder
-from lstm_object_detection.utils import config_util
-from object_detection.utils import label_map_util
-
-tf.logging.set_verbosity(tf.logging.INFO)
-flags = tf.app.flags
-flags.DEFINE_boolean('eval_training_data', False,
- 'If training data should be evaluated for this job.')
-flags.DEFINE_string('checkpoint_dir', '',
- 'Directory containing checkpoints to evaluate, typically '
- 'set to `train_dir` used in the training job.')
-flags.DEFINE_string('eval_dir', '', 'Directory to write eval summaries to.')
-flags.DEFINE_string('pipeline_config_path', '',
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file. If provided, other configs are ignored')
-flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
- 'evaluation. Overrides the `max_evals` parameter in the '
- 'provided config.')
-FLAGS = flags.FLAGS
-
-
-def main(unused_argv):
- assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
- assert FLAGS.eval_dir, '`eval_dir` is missing.'
- if FLAGS.pipeline_config_path:
- configs = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
- else:
- configs = config_util.get_configs_from_multiple_files(
- model_config_path=FLAGS.model_config_path,
- eval_config_path=FLAGS.eval_config_path,
- eval_input_config_path=FLAGS.input_config_path)
-
- pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
- config_text = text_format.MessageToString(pipeline_proto)
- tf.gfile.MakeDirs(FLAGS.eval_dir)
- with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'),
- 'wb') as f:
- f.write(config_text)
-
- model_config = configs['model']
- lstm_config = configs['lstm_model']
- eval_config = configs['eval_config']
- input_config = configs['eval_input_config']
-
- if FLAGS.eval_training_data:
- input_config.external_input_reader.CopyFrom(
- configs['train_input_config'].external_input_reader)
- lstm_config.eval_unroll_length = lstm_config.train_unroll_length
-
- model_fn = functools.partial(
- model_builder.build,
- model_config=model_config,
- lstm_config=lstm_config,
- is_training=False)
-
- def get_next(config, model_config, lstm_config, unroll_length):
- return seq_dataset_builder.build(config, model_config, lstm_config,
- unroll_length)
-
- create_input_dict_fn = functools.partial(get_next, input_config, model_config,
- lstm_config,
- lstm_config.eval_unroll_length)
-
- label_map = label_map_util.load_labelmap(input_config.label_map_path)
- max_num_classes = max([item.id for item in label_map.item])
- categories = label_map_util.convert_label_map_to_categories(
- label_map, max_num_classes)
-
- if FLAGS.run_once:
- eval_config.max_evals = 1
-
- evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
- FLAGS.checkpoint_dir, FLAGS.eval_dir)
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lstm_object_detection/evaluator.py b/research/lstm_object_detection/evaluator.py
deleted file mode 100644
index 6ed3e476e8e9bfd9c0d4cfe71925ccb7ff5f6b07..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/evaluator.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Detection model evaluator.
-
-This file provides a generic evaluation method that can be used to evaluate a
-DetectionModel.
-
-"""
-
-import tensorflow.compat.v1 as tf
-from tensorflow.contrib import tfprof as contrib_tfprof
-from lstm_object_detection.metrics import coco_evaluation_all_frames
-from object_detection import eval_util
-from object_detection.core import prefetcher
-from object_detection.core import standard_fields as fields
-from object_detection.metrics import coco_evaluation
-from object_detection.utils import object_detection_evaluation
-
-
-# A dictionary of metric names to classes that implement the metric. The classes
-# in the dictionary must implement
-# utils.object_detection_evaluation.DetectionEvaluator interface.
-EVAL_METRICS_CLASS_DICT = {
- 'pascal_voc_detection_metrics':
- object_detection_evaluation.PascalDetectionEvaluator,
- 'weighted_pascal_voc_detection_metrics':
- object_detection_evaluation.WeightedPascalDetectionEvaluator,
- 'pascal_voc_instance_segmentation_metrics':
- object_detection_evaluation.PascalInstanceSegmentationEvaluator,
- 'weighted_pascal_voc_instance_segmentation_metrics':
- object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator,
- 'open_images_detection_metrics':
- object_detection_evaluation.OpenImagesDetectionEvaluator,
- 'coco_detection_metrics':
- coco_evaluation.CocoDetectionEvaluator,
- 'coco_mask_metrics':
- coco_evaluation.CocoMaskEvaluator,
- 'coco_evaluation_all_frames':
- coco_evaluation_all_frames.CocoEvaluationAllFrames,
-}
-
-EVAL_DEFAULT_METRIC = 'pascal_voc_detection_metrics'
-
-
-def _create_detection_op(model, input_dict, batch):
- """Create detection ops.
-
- Args:
- model: model to perform predictions with.
- input_dict: A dict holds input data.
- batch: batch size for evaluation.
-
- Returns:
- Detection tensor ops.
- """
- video_tensor = tf.stack(list(input_dict[fields.InputDataFields.image]))
- preprocessed_video, true_image_shapes = model.preprocess(
- tf.to_float(video_tensor))
- if batch is not None:
- prediction_dict = model.predict(preprocessed_video, true_image_shapes,
- batch)
- else:
- prediction_dict = model.predict(preprocessed_video, true_image_shapes)
-
- return model.postprocess(prediction_dict, true_image_shapes)
-
-
-def _extract_prediction_tensors(model,
- create_input_dict_fn,
- ignore_groundtruth=False):
- """Restores the model in a tensorflow session.
-
- Args:
- model: model to perform predictions with.
- create_input_dict_fn: function to create input tensor dictionaries.
- ignore_groundtruth: whether groundtruth should be ignored.
-
-
- Returns:
- tensor_dict: A tensor dictionary with evaluations.
- """
- input_dict = create_input_dict_fn()
- batch = None
- if 'batch' in input_dict:
- batch = input_dict.pop('batch')
- else:
- prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
- input_dict = prefetch_queue.dequeue()
- # consistent format for images and videos
- for key, value in input_dict.iteritems():
- input_dict[key] = (value,)
-
- detections = _create_detection_op(model, input_dict, batch)
-
- # Print out anaylsis of the model.
- contrib_tfprof.model_analyzer.print_model_analysis(
- tf.get_default_graph(),
- tfprof_options=contrib_tfprof.model_analyzer
- .TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
- contrib_tfprof.model_analyzer.print_model_analysis(
- tf.get_default_graph(),
- tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
-
- num_frames = len(input_dict[fields.InputDataFields.image])
- ret = []
- for i in range(num_frames):
- original_image = tf.expand_dims(input_dict[fields.InputDataFields.image][i],
- 0)
- groundtruth = None
- if not ignore_groundtruth:
- groundtruth = {
- fields.InputDataFields.groundtruth_boxes:
- input_dict[fields.InputDataFields.groundtruth_boxes][i],
- fields.InputDataFields.groundtruth_classes:
- input_dict[fields.InputDataFields.groundtruth_classes][i],
- }
- optional_keys = (
- fields.InputDataFields.groundtruth_area,
- fields.InputDataFields.groundtruth_is_crowd,
- fields.InputDataFields.groundtruth_difficult,
- fields.InputDataFields.groundtruth_group_of,
- )
- for opt_key in optional_keys:
- if opt_key in input_dict:
- groundtruth[opt_key] = input_dict[opt_key][i]
- if fields.DetectionResultFields.detection_masks in detections:
- groundtruth[fields.InputDataFields.groundtruth_instance_masks] = (
- input_dict[fields.InputDataFields.groundtruth_instance_masks][i])
-
- detections_frame = {
- key: tf.expand_dims(value[i], 0)
- for key, value in detections.iteritems()
- }
-
- source_id = (
- batch.key[0] if batch is not None else
- input_dict[fields.InputDataFields.source_id][i])
- ret.append(
- eval_util.result_dict_for_single_example(
- original_image,
- source_id,
- detections_frame,
- groundtruth,
- class_agnostic=(fields.DetectionResultFields.detection_classes
- not in detections),
- scale_to_absolute=True))
- return ret
-
-
-def get_evaluators(eval_config, categories):
- """Returns the evaluator class according to eval_config, valid for categories.
-
- Args:
- eval_config: evaluation configurations.
- categories: a list of categories to evaluate.
- Returns:
- An list of instances of DetectionEvaluator.
-
- Raises:
- ValueError: if metric is not in the metric class dictionary.
- """
- eval_metric_fn_keys = eval_config.metrics_set
- if not eval_metric_fn_keys:
- eval_metric_fn_keys = [EVAL_DEFAULT_METRIC]
- evaluators_list = []
- for eval_metric_fn_key in eval_metric_fn_keys:
- if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT:
- raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
- else:
- evaluators_list.append(
- EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](categories=categories))
- return evaluators_list
-
-
-def evaluate(create_input_dict_fn,
- create_model_fn,
- eval_config,
- categories,
- checkpoint_dir,
- eval_dir,
- graph_hook_fn=None):
- """Evaluation function for detection models.
-
- Args:
- create_input_dict_fn: a function to create a tensor input dictionary.
- create_model_fn: a function that creates a DetectionModel.
- eval_config: a eval_pb2.EvalConfig protobuf.
- categories: a list of category dictionaries. Each dict in the list should
- have an integer 'id' field and string 'name' field.
- checkpoint_dir: directory to load the checkpoints to evaluate from.
- eval_dir: directory to write evaluation metrics summary to.
- graph_hook_fn: Optional function that is called after the training graph is
- completely built. This is helpful to perform additional changes to the
- training graph such as optimizing batchnorm. The function should modify
- the default graph.
-
- Returns:
- metrics: A dictionary containing metric names and values from the latest
- run.
- """
-
- model = create_model_fn()
-
- if eval_config.ignore_groundtruth and not eval_config.export_path:
- tf.logging.fatal('If ignore_groundtruth=True then an export_path is '
- 'required. Aborting!!!')
-
- tensor_dicts = _extract_prediction_tensors(
- model=model,
- create_input_dict_fn=create_input_dict_fn,
- ignore_groundtruth=eval_config.ignore_groundtruth)
-
- def _process_batch(tensor_dicts,
- sess,
- batch_index,
- counters,
- losses_dict=None):
- """Evaluates tensors in tensor_dicts, visualizing the first K examples.
-
- This function calls sess.run on tensor_dicts, evaluating the original_image
- tensor only on the first K examples and visualizing detections overlaid
- on this original_image.
-
- Args:
- tensor_dicts: a dictionary of tensors
- sess: tensorflow session
- batch_index: the index of the batch amongst all batches in the run.
- counters: a dictionary holding 'success' and 'skipped' fields which can
- be updated to keep track of number of successful and failed runs,
- respectively. If these fields are not updated, then the success/skipped
- counter values shown at the end of evaluation will be incorrect.
- losses_dict: Optional dictonary of scalar loss tensors. Necessary only
- for matching function signiture in third_party eval_util.py.
-
- Returns:
- result_dict: a dictionary of numpy arrays
- result_losses_dict: a dictionary of scalar losses. This is empty if input
- losses_dict is None. Necessary only for matching function signiture in
- third_party eval_util.py.
- """
- if batch_index % 10 == 0:
- tf.logging.info('Running eval ops batch %d', batch_index)
- if not losses_dict:
- losses_dict = {}
- try:
- result_dicts, result_losses_dict = sess.run([tensor_dicts, losses_dict])
- counters['success'] += 1
- except tf.errors.InvalidArgumentError:
- tf.logging.info('Skipping image')
- counters['skipped'] += 1
- return {}
- num_images = len(tensor_dicts)
- for i in range(num_images):
- result_dict = result_dicts[i]
- global_step = tf.train.global_step(sess, tf.train.get_global_step())
- tag = 'image-%d' % (batch_index * num_images + i)
- if batch_index < eval_config.num_visualizations / num_images:
- eval_util.visualize_detection_results(
- result_dict,
- tag,
- global_step,
- categories=categories,
- summary_dir=eval_dir,
- export_dir=eval_config.visualization_export_dir,
- show_groundtruth=eval_config.visualize_groundtruth_boxes,
- groundtruth_box_visualization_color=eval_config.
- groundtruth_box_visualization_color,
- min_score_thresh=eval_config.min_score_threshold,
- max_num_predictions=eval_config.max_num_boxes_to_visualize,
- skip_scores=eval_config.skip_scores,
- skip_labels=eval_config.skip_labels,
- keep_image_id_for_visualization_export=eval_config.
- keep_image_id_for_visualization_export)
- if num_images > 1:
- return result_dicts, result_losses_dict
- else:
- return result_dicts[0], result_losses_dict
-
- variables_to_restore = tf.global_variables()
- global_step = tf.train.get_or_create_global_step()
- variables_to_restore.append(global_step)
-
- if graph_hook_fn:
- graph_hook_fn()
-
- if eval_config.use_moving_averages:
- variable_averages = tf.train.ExponentialMovingAverage(0.0)
- variables_to_restore = variable_averages.variables_to_restore()
- for key in variables_to_restore.keys():
- if 'moving_mean' in key:
- variables_to_restore[key.replace(
- 'moving_mean', 'moving_mean/ExponentialMovingAverage')] = (
- variables_to_restore[key])
- del variables_to_restore[key]
- if 'moving_variance' in key:
- variables_to_restore[key.replace(
- 'moving_variance', 'moving_variance/ExponentialMovingAverage')] = (
- variables_to_restore[key])
- del variables_to_restore[key]
-
- saver = tf.train.Saver(variables_to_restore)
-
- def _restore_latest_checkpoint(sess):
- latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
- saver.restore(sess, latest_checkpoint)
-
- metrics = eval_util.repeated_checkpoint_run(
- tensor_dict=tensor_dicts,
- summary_dir=eval_dir,
- evaluators=get_evaluators(eval_config, categories),
- batch_processor=_process_batch,
- checkpoint_dirs=[checkpoint_dir],
- variables_to_restore=None,
- restore_fn=_restore_latest_checkpoint,
- num_batches=eval_config.num_examples,
- eval_interval_secs=eval_config.eval_interval_secs,
- max_number_of_evaluations=(1 if eval_config.ignore_groundtruth else
- eval_config.max_evals
- if eval_config.max_evals else None),
- master=eval_config.eval_master,
- save_graph=eval_config.save_graph,
- save_graph_dir=(eval_dir if eval_config.save_graph else ''))
-
- return metrics
diff --git a/research/lstm_object_detection/export_tflite_lstd_graph.py b/research/lstm_object_detection/export_tflite_lstd_graph.py
deleted file mode 100644
index 7e933fb480d04aefa66ec6c4c8ec38f91dee9cb6..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/export_tflite_lstd_graph.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Exports an LSTM detection model to use with tf-lite.
-
-Outputs file:
-* A tflite compatible frozen graph - $output_directory/tflite_graph.pb
-
-The exported graph has the following input and output nodes.
-
-Inputs:
-'input_video_tensor': a float32 tensor of shape
-[unroll_length, height, width, 3] containing the normalized input image.
-Note that the height and width must be compatible with the height and
-width configured in the fixed_shape_image resizer options in the pipeline
-config proto.
-
-Outputs:
-If add_postprocessing_op is true: frozen graph adds a
- TFLite_Detection_PostProcess custom op node has four outputs:
- detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
- locations
- detection_classes: a float32 tensor of shape [1, num_boxes]
- with class indices
- detection_scores: a float32 tensor of shape [1, num_boxes]
- with class scores
- num_boxes: a float32 tensor of size 1 containing the number of detected boxes
-else:
- the graph has three outputs:
- 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
- containing the encoded box predictions.
- 'raw_outputs/class_predictions': a float32 tensor of shape
- [1, num_anchors, num_classes] containing the class scores for each anchor
- after applying score conversion.
- 'anchors': a float32 constant tensor of shape [num_anchors, 4]
- containing the anchor boxes.
-
-Example Usage:
---------------
-python lstm_object_detection/export_tflite_lstd_graph.py \
- --pipeline_config_path path/to/lstm_pipeline.config \
- --trained_checkpoint_prefix path/to/model.ckpt \
- --output_directory path/to/exported_model_directory
-
-The expected output would be in the directory
-path/to/exported_model_directory (which is created if it does not exist)
-with contents:
- - tflite_graph.pbtxt
- - tflite_graph.pb
-Config overrides (see the `config_override` flag) are text protobufs
-(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
-certain fields in the provided pipeline_config_path. These are useful for
-making small changes to the inference graph that differ from the training or
-eval config.
-
-Example Usage (in which we change the NMS iou_threshold to be 0.5 and
-NMS score_threshold to be 0.0):
-python lstm_object_detection/export_tflite_lstd_graph.py \
- --pipeline_config_path path/to/lstm_pipeline.config \
- --trained_checkpoint_prefix path/to/model.ckpt \
- --output_directory path/to/exported_model_directory
- --config_override " \
- model{ \
- ssd{ \
- post_processing { \
- batch_non_max_suppression { \
- score_threshold: 0.0 \
- iou_threshold: 0.5 \
- } \
- } \
- } \
- } \
- "
-"""
-
-import tensorflow.compat.v1 as tf
-
-from lstm_object_detection import export_tflite_lstd_graph_lib
-from lstm_object_detection.utils import config_util
-
-flags = tf.app.flags
-flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
-flags.DEFINE_string(
- 'pipeline_config_path', None,
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
- 'file.')
-flags.DEFINE_string('trained_checkpoint_prefix', None, 'Checkpoint prefix.')
-flags.DEFINE_integer('max_detections', 10,
- 'Maximum number of detections (boxes) to show.')
-flags.DEFINE_integer('max_classes_per_detection', 1,
- 'Maximum number of classes to output per detection box.')
-flags.DEFINE_integer(
- 'detections_per_class', 100,
- 'Number of anchors used per class in Regular Non-Max-Suppression.')
-flags.DEFINE_bool('add_postprocessing_op', True,
- 'Add TFLite custom op for postprocessing to the graph.')
-flags.DEFINE_bool(
- 'use_regular_nms', False,
- 'Flag to set postprocessing op to use Regular NMS instead of Fast NMS.')
-flags.DEFINE_string(
- 'config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig '
- 'text proto to override pipeline_config_path.')
-
-FLAGS = flags.FLAGS
-
-
-def main(argv):
- del argv # Unused.
- flags.mark_flag_as_required('output_directory')
- flags.mark_flag_as_required('pipeline_config_path')
- flags.mark_flag_as_required('trained_checkpoint_prefix')
-
- pipeline_config = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
-
- export_tflite_lstd_graph_lib.export_tflite_graph(
- pipeline_config,
- FLAGS.trained_checkpoint_prefix,
- FLAGS.output_directory,
- FLAGS.add_postprocessing_op,
- FLAGS.max_detections,
- FLAGS.max_classes_per_detection,
- use_regular_nms=FLAGS.use_regular_nms)
-
-
-if __name__ == '__main__':
- tf.app.run(main)
diff --git a/research/lstm_object_detection/export_tflite_lstd_graph_lib.py b/research/lstm_object_detection/export_tflite_lstd_graph_lib.py
deleted file mode 100644
index e066f11b45f2bd4608b08656040abba2632b4aa2..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/export_tflite_lstd_graph_lib.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Exports detection models to use with tf-lite.
-
-See export_tflite_lstd_graph.py for usage.
-"""
-import os
-import tempfile
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-from tensorflow.core.framework import attr_value_pb2
-from tensorflow.core.framework import types_pb2
-from tensorflow.core.protobuf import saver_pb2
-from tensorflow.tools.graph_transforms import TransformGraph
-from lstm_object_detection import model_builder
-from object_detection import exporter
-from object_detection.builders import graph_rewriter_builder
-from object_detection.builders import post_processing_builder
-from object_detection.core import box_list
-
-_DEFAULT_NUM_CHANNELS = 3
-_DEFAULT_NUM_COORD_BOX = 4
-
-
-def get_const_center_size_encoded_anchors(anchors):
- """Exports center-size encoded anchors as a constant tensor.
-
- Args:
- anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor
- boxes
-
- Returns:
- encoded_anchors: a float32 constant tensor of shape [num_anchors, 4]
- containing the anchor boxes.
- """
- anchor_boxlist = box_list.BoxList(anchors)
- y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes()
- num_anchors = y.get_shape().as_list()
-
- with tf.Session() as sess:
- y_out, x_out, h_out, w_out = sess.run([y, x, h, w])
- encoded_anchors = tf.constant(
- np.transpose(np.stack((y_out, x_out, h_out, w_out))),
- dtype=tf.float32,
- shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX],
- name='anchors')
- return encoded_anchors
-
-
-def append_postprocessing_op(frozen_graph_def,
- max_detections,
- max_classes_per_detection,
- nms_score_threshold,
- nms_iou_threshold,
- num_classes,
- scale_values,
- detections_per_class=100,
- use_regular_nms=False):
- """Appends postprocessing custom op.
-
- Args:
- frozen_graph_def: Frozen GraphDef for SSD model after freezing the
- checkpoint
- max_detections: Maximum number of detections (boxes) to show
- max_classes_per_detection: Number of classes to display per detection
- nms_score_threshold: Score threshold used in Non-maximal suppression in
- post-processing
- nms_iou_threshold: Intersection-over-union threshold used in Non-maximal
- suppression in post-processing
- num_classes: number of classes in SSD detector
- scale_values: scale values is a dict with following key-value pairs
- {y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode
- centersize boxes
- detections_per_class: In regular NonMaxSuppression, number of anchors used
- for NonMaxSuppression per class
- use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of
- Fast NMS.
-
- Returns:
- transformed_graph_def: Frozen GraphDef with postprocessing custom op
- appended
- TFLite_Detection_PostProcess custom op node has four outputs:
- detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
- locations
- detection_classes: a float32 tensor of shape [1, num_boxes]
- with class indices
- detection_scores: a float32 tensor of shape [1, num_boxes]
- with class scores
- num_boxes: a float32 tensor of size 1 containing the number of detected
- boxes
- """
- new_output = frozen_graph_def.node.add()
- new_output.op = 'TFLite_Detection_PostProcess'
- new_output.name = 'TFLite_Detection_PostProcess'
- new_output.attr['_output_quantized'].CopyFrom(
- attr_value_pb2.AttrValue(b=True))
- new_output.attr['_output_types'].list.type.extend([
- types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, types_pb2.DT_FLOAT,
- types_pb2.DT_FLOAT
- ])
- new_output.attr['_support_output_type_float_in_quantized_op'].CopyFrom(
- attr_value_pb2.AttrValue(b=True))
- new_output.attr['max_detections'].CopyFrom(
- attr_value_pb2.AttrValue(i=max_detections))
- new_output.attr['max_classes_per_detection'].CopyFrom(
- attr_value_pb2.AttrValue(i=max_classes_per_detection))
- new_output.attr['nms_score_threshold'].CopyFrom(
- attr_value_pb2.AttrValue(f=nms_score_threshold.pop()))
- new_output.attr['nms_iou_threshold'].CopyFrom(
- attr_value_pb2.AttrValue(f=nms_iou_threshold.pop()))
- new_output.attr['num_classes'].CopyFrom(
- attr_value_pb2.AttrValue(i=num_classes))
-
- new_output.attr['y_scale'].CopyFrom(
- attr_value_pb2.AttrValue(f=scale_values['y_scale'].pop()))
- new_output.attr['x_scale'].CopyFrom(
- attr_value_pb2.AttrValue(f=scale_values['x_scale'].pop()))
- new_output.attr['h_scale'].CopyFrom(
- attr_value_pb2.AttrValue(f=scale_values['h_scale'].pop()))
- new_output.attr['w_scale'].CopyFrom(
- attr_value_pb2.AttrValue(f=scale_values['w_scale'].pop()))
- new_output.attr['detections_per_class'].CopyFrom(
- attr_value_pb2.AttrValue(i=detections_per_class))
- new_output.attr['use_regular_nms'].CopyFrom(
- attr_value_pb2.AttrValue(b=use_regular_nms))
-
- new_output.input.extend(
- ['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors'])
- # Transform the graph to append new postprocessing op
- input_names = []
- output_names = ['TFLite_Detection_PostProcess']
- transforms = ['strip_unused_nodes']
- transformed_graph_def = TransformGraph(frozen_graph_def, input_names,
- output_names, transforms)
- return transformed_graph_def
-
-
-def export_tflite_graph(pipeline_config,
- trained_checkpoint_prefix,
- output_dir,
- add_postprocessing_op,
- max_detections,
- max_classes_per_detection,
- detections_per_class=100,
- use_regular_nms=False,
- binary_graph_name='tflite_graph.pb',
- txt_graph_name='tflite_graph.pbtxt'):
- """Exports a tflite compatible graph and anchors for ssd detection model.
-
- Anchors are written to a tensor and tflite compatible graph
- is written to output_dir/tflite_graph.pb.
-
- Args:
- pipeline_config: Dictionary of configuration objects. Keys are `model`,
- `train_config`, `train_input_config`, `eval_config`, `eval_input_config`,
- `lstm_model`. Value are the corresponding config objects.
- trained_checkpoint_prefix: a file prefix for the checkpoint containing the
- trained parameters of the SSD model.
- output_dir: A directory to write the tflite graph and anchor file to.
- add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
- TFLite_Detection_PostProcess custom op
- max_detections: Maximum number of detections (boxes) to show
- max_classes_per_detection: Number of classes to display per detection
- detections_per_class: In regular NonMaxSuppression, number of anchors used
- for NonMaxSuppression per class
- use_regular_nms: Flag to set postprocessing op to use Regular NMS instead of
- Fast NMS.
- binary_graph_name: Name of the exported graph file in binary format.
- txt_graph_name: Name of the exported graph file in text format.
-
- Raises:
- ValueError: if the pipeline config contains models other than ssd or uses an
- fixed_shape_resizer and provides a shape as well.
- """
- model_config = pipeline_config['model']
- lstm_config = pipeline_config['lstm_model']
- eval_config = pipeline_config['eval_config']
- tf.gfile.MakeDirs(output_dir)
- if model_config.WhichOneof('model') != 'ssd':
- raise ValueError('Only ssd models are supported in tflite. '
- 'Found {} in config'.format(
- model_config.WhichOneof('model')))
-
- num_classes = model_config.ssd.num_classes
- nms_score_threshold = {
- model_config.ssd.post_processing.batch_non_max_suppression.score_threshold
- }
- nms_iou_threshold = {
- model_config.ssd.post_processing.batch_non_max_suppression.iou_threshold
- }
- scale_values = {}
- scale_values['y_scale'] = {
- model_config.ssd.box_coder.faster_rcnn_box_coder.y_scale
- }
- scale_values['x_scale'] = {
- model_config.ssd.box_coder.faster_rcnn_box_coder.x_scale
- }
- scale_values['h_scale'] = {
- model_config.ssd.box_coder.faster_rcnn_box_coder.height_scale
- }
- scale_values['w_scale'] = {
- model_config.ssd.box_coder.faster_rcnn_box_coder.width_scale
- }
-
- image_resizer_config = model_config.ssd.image_resizer
- image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
- num_channels = _DEFAULT_NUM_CHANNELS
- if image_resizer == 'fixed_shape_resizer':
- height = image_resizer_config.fixed_shape_resizer.height
- width = image_resizer_config.fixed_shape_resizer.width
- if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
- num_channels = 1
-
- shape = [lstm_config.eval_unroll_length, height, width, num_channels]
- else:
- raise ValueError(
- 'Only fixed_shape_resizer'
- 'is supported with tflite. Found {}'.format(
- image_resizer_config.WhichOneof('image_resizer_oneof')))
-
- video_tensor = tf.placeholder(
- tf.float32, shape=shape, name='input_video_tensor')
-
- detection_model = model_builder.build(
- model_config, lstm_config, is_training=False)
- preprocessed_video, true_image_shapes = detection_model.preprocess(
- tf.to_float(video_tensor))
- predicted_tensors = detection_model.predict(preprocessed_video,
- true_image_shapes)
- # predicted_tensors = detection_model.postprocess(predicted_tensors,
- # true_image_shapes)
- # The score conversion occurs before the post-processing custom op
- _, score_conversion_fn = post_processing_builder.build(
- model_config.ssd.post_processing)
- class_predictions = score_conversion_fn(
- predicted_tensors['class_predictions_with_background'])
-
- with tf.name_scope('raw_outputs'):
- # 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
- # containing the encoded box predictions. Note that these are raw
- # predictions and no Non-Max suppression is applied on them and
- # no decode center size boxes is applied to them.
- tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
- # 'raw_outputs/class_predictions': a float32 tensor of shape
- # [1, num_anchors, num_classes] containing the class scores for each anchor
- # after applying score conversion.
- tf.identity(class_predictions, name='class_predictions')
- # 'anchors': a float32 tensor of shape
- # [4, num_anchors] containing the anchors as a constant node.
- tf.identity(
- get_const_center_size_encoded_anchors(predicted_tensors['anchors']),
- name='anchors')
-
- # Add global step to the graph, so we know the training step number when we
- # evaluate the model.
- tf.train.get_or_create_global_step()
-
- # graph rewriter
- is_quantized = ('graph_rewriter' in pipeline_config)
- if is_quantized:
- graph_rewriter_config = pipeline_config['graph_rewriter']
- graph_rewriter_fn = graph_rewriter_builder.build(
- graph_rewriter_config, is_training=False, is_export=True)
- graph_rewriter_fn()
-
- if model_config.ssd.feature_extractor.HasField('fpn'):
- exporter.rewrite_nn_resize_op(is_quantized)
-
- # freeze the graph
- saver_kwargs = {}
- if eval_config.use_moving_averages:
- saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
- moving_average_checkpoint = tempfile.NamedTemporaryFile()
- exporter.replace_variable_values_with_moving_averages(
- tf.get_default_graph(), trained_checkpoint_prefix,
- moving_average_checkpoint.name)
- checkpoint_to_use = moving_average_checkpoint.name
- else:
- checkpoint_to_use = trained_checkpoint_prefix
-
- saver = tf.train.Saver(**saver_kwargs)
- input_saver_def = saver.as_saver_def()
- frozen_graph_def = exporter.freeze_graph_with_def_protos(
- input_graph_def=tf.get_default_graph().as_graph_def(),
- input_saver_def=input_saver_def,
- input_checkpoint=checkpoint_to_use,
- output_node_names=','.join([
- 'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
- 'anchors'
- ]),
- restore_op_name='save/restore_all',
- filename_tensor_name='save/Const:0',
- clear_devices=True,
- output_graph='',
- initializer_nodes='')
-
- # Add new operation to do post processing in a custom op (TF Lite only)
-
- if add_postprocessing_op:
- transformed_graph_def = append_postprocessing_op(
- frozen_graph_def, max_detections, max_classes_per_detection,
- nms_score_threshold, nms_iou_threshold, num_classes, scale_values,
- detections_per_class, use_regular_nms)
- else:
- # Return frozen without adding post-processing custom op
- transformed_graph_def = frozen_graph_def
-
- binary_graph = os.path.join(output_dir, binary_graph_name)
- with tf.gfile.GFile(binary_graph, 'wb') as f:
- f.write(transformed_graph_def.SerializeToString())
- txt_graph = os.path.join(output_dir, txt_graph_name)
- with tf.gfile.GFile(txt_graph, 'w') as f:
- f.write(str(transformed_graph_def))
diff --git a/research/lstm_object_detection/export_tflite_lstd_model.py b/research/lstm_object_detection/export_tflite_lstd_model.py
deleted file mode 100644
index 58c674728b5b0e274ae112d66abe3ff72f63b86e..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/export_tflite_lstd_model.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Export a LSTD model in tflite format."""
-
-import os
-from absl import flags
-import tensorflow.compat.v1 as tf
-
-from lstm_object_detection.utils import config_util
-
-flags.DEFINE_string('export_path', None, 'Path to export model.')
-flags.DEFINE_string('frozen_graph_path', None, 'Path to frozen graph.')
-flags.DEFINE_string(
- 'pipeline_config_path', '',
- 'Path to a pipeline_pb2.TrainEvalPipelineConfig config file.')
-
-FLAGS = flags.FLAGS
-
-
-def main(_):
- flags.mark_flag_as_required('export_path')
- flags.mark_flag_as_required('frozen_graph_path')
- flags.mark_flag_as_required('pipeline_config_path')
-
- configs = config_util.get_configs_from_pipeline_file(
- FLAGS.pipeline_config_path)
- lstm_config = configs['lstm_model']
-
- input_arrays = ['input_video_tensor']
- output_arrays = [
- 'TFLite_Detection_PostProcess',
- 'TFLite_Detection_PostProcess:1',
- 'TFLite_Detection_PostProcess:2',
- 'TFLite_Detection_PostProcess:3',
- ]
- input_shapes = {
- 'input_video_tensor': [lstm_config.eval_unroll_length, 320, 320, 3],
- }
-
- converter = tf.lite.TFLiteConverter.from_frozen_graph(
- FLAGS.frozen_graph_path,
- input_arrays,
- output_arrays,
- input_shapes=input_shapes)
- converter.allow_custom_ops = True
- tflite_model = converter.convert()
- ofilename = os.path.join(FLAGS.export_path)
- open(ofilename, 'wb').write(tflite_model)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lstm_object_detection/g3doc/Interleaved_Intro.png b/research/lstm_object_detection/g3doc/Interleaved_Intro.png
deleted file mode 100644
index 2b829c997bc75e807c0982b1d71334966452b122..0000000000000000000000000000000000000000
Binary files a/research/lstm_object_detection/g3doc/Interleaved_Intro.png and /dev/null differ
diff --git a/research/lstm_object_detection/g3doc/exporting_models.md b/research/lstm_object_detection/g3doc/exporting_models.md
deleted file mode 100644
index 7d501d97efdfb8d259e867164aa04f275b56a036..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/g3doc/exporting_models.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Exporting a tflite model from a checkpoint
-
-Starting from a trained model checkpoint, creating a tflite model requires 2
-steps:
-
-* exporting a tflite frozen graph from a checkpoint
-* exporting a tflite model from a frozen graph
-
-## Exporting a tflite frozen graph from a checkpoint
-
-With a candidate checkpoint to export, run the following command from
-tensorflow/models/research:
-
-```bash
-# from tensorflow/models/research
-PIPELINE_CONFIG_PATH={path to pipeline config}
-TRAINED_CKPT_PREFIX=/{path to model.ckpt}
-EXPORT_DIR={path to folder that will be used for export}
-python lstm_object_detection/export_tflite_lstd_graph.py \
- --pipeline_config_path ${PIPELINE_CONFIG_PATH} \
- --trained_checkpoint_prefix ${TRAINED_CKPT_PREFIX} \
- --output_directory ${EXPORT_DIR} \
- --add_preprocessing_op
-```
-
-After export, you should see the directory ${EXPORT_DIR} containing the
-following files:
-
-* `tflite_graph.pb`
-* `tflite_graph.pbtxt`
-
-## Exporting a tflite model from a frozen graph
-
-We then take the exported tflite-compatable tflite model, and convert it to a
-TFLite FlatBuffer file by running the following:
-
-```bash
-# from tensorflow/models/research
-FROZEN_GRAPH_PATH={path to exported tflite_graph.pb}
-EXPORT_PATH={path to filename that will be used for export}
-PIPELINE_CONFIG_PATH={path to pipeline config}
-python lstm_object_detection/export_tflite_lstd_model.py \
- --export_path ${EXPORT_PATH} \
- --frozen_graph_path ${FROZEN_GRAPH_PATH} \
- --pipeline_config_path ${PIPELINE_CONFIG_PATH}
-```
-
-After export, you should see the file ${EXPORT_PATH} containing the FlatBuffer
-model to be used by an application.
diff --git a/research/lstm_object_detection/g3doc/lstm_ssd_intro.png b/research/lstm_object_detection/g3doc/lstm_ssd_intro.png
deleted file mode 100644
index fa62eb533b9190bcf05094d12781808dc85f1107..0000000000000000000000000000000000000000
Binary files a/research/lstm_object_detection/g3doc/lstm_ssd_intro.png and /dev/null differ
diff --git a/research/lstm_object_detection/inputs/__init__.py b/research/lstm_object_detection/inputs/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/inputs/seq_dataset_builder.py b/research/lstm_object_detection/inputs/seq_dataset_builder.py
deleted file mode 100644
index 55e24820f60d24d14db64f2aea21e462ee278ff2..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/inputs/seq_dataset_builder.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-r"""tf.data.Dataset builder.
-
-Creates data sources for DetectionModels from an InputReader config. See
-input_reader.proto for options.
-
-Note: If users wishes to also use their own InputReaders with the Object
-Detection configuration framework, they should define their own builder function
-that wraps the build function.
-"""
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-
-from tensorflow.contrib.training.python.training import sequence_queueing_state_saver as sqss
-from lstm_object_detection.inputs import tf_sequence_example_decoder
-from lstm_object_detection.protos import input_reader_google_pb2
-from object_detection.core import preprocessor
-from object_detection.core import preprocessor_cache
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-from object_detection.utils import ops as util_ops
-
-parallel_reader = slim.parallel_reader
-# TODO(yinxiao): Make the following variable into configurable proto.
-# Padding size for the labeled objects in each frame. Here we assume each
-# frame has a total number of objects less than _PADDING_SIZE.
-_PADDING_SIZE = 30
-
-
-def _build_training_batch_dict(batch_sequences_with_states, unroll_length,
- batch_size):
- """Builds training batch samples.
-
- Args:
- batch_sequences_with_states: A batch_sequences_with_states object.
- unroll_length: Unrolled length for LSTM training.
- batch_size: Batch size for queue outputs.
-
- Returns:
- A dictionary of tensors based on items in input_reader_config.
- """
- seq_tensors_dict = {
- fields.InputDataFields.image: [],
- fields.InputDataFields.groundtruth_boxes: [],
- fields.InputDataFields.groundtruth_classes: [],
- 'batch': batch_sequences_with_states,
- }
- for i in range(unroll_length):
- for j in range(batch_size):
- filtered_dict = util_ops.filter_groundtruth_with_nan_box_coordinates({
- fields.InputDataFields.groundtruth_boxes: (
- batch_sequences_with_states.sequences['groundtruth_boxes'][j][i]),
- fields.InputDataFields.groundtruth_classes: (
- batch_sequences_with_states.sequences['groundtruth_classes'][j][i]
- ),
- })
- filtered_dict = util_ops.retain_groundtruth_with_positive_classes(
- filtered_dict)
- seq_tensors_dict[fields.InputDataFields.image].append(
- batch_sequences_with_states.sequences['image'][j][i])
- seq_tensors_dict[fields.InputDataFields.groundtruth_boxes].append(
- filtered_dict[fields.InputDataFields.groundtruth_boxes])
- seq_tensors_dict[fields.InputDataFields.groundtruth_classes].append(
- filtered_dict[fields.InputDataFields.groundtruth_classes])
- seq_tensors_dict[fields.InputDataFields.image] = tuple(
- seq_tensors_dict[fields.InputDataFields.image])
- seq_tensors_dict[fields.InputDataFields.groundtruth_boxes] = tuple(
- seq_tensors_dict[fields.InputDataFields.groundtruth_boxes])
- seq_tensors_dict[fields.InputDataFields.groundtruth_classes] = tuple(
- seq_tensors_dict[fields.InputDataFields.groundtruth_classes])
-
- return seq_tensors_dict
-
-
-def build(input_reader_config,
- model_config,
- lstm_config,
- unroll_length,
- data_augmentation_options=None,
- batch_size=1):
- """Builds a tensor dictionary based on the InputReader config.
-
- Args:
- input_reader_config: An input_reader_builder.InputReader object.
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- lstm_config: LSTM specific configs.
- unroll_length: Unrolled length for LSTM training.
- data_augmentation_options: A list of tuples, where each tuple contains a
- data augmentation function and a dictionary containing arguments and their
- values (see preprocessor.py).
- batch_size: Batch size for queue outputs.
-
- Returns:
- A dictionary of tensors based on items in the input_reader_config.
-
- Raises:
- ValueError: On invalid input reader proto.
- ValueError: If no input paths are specified.
- """
- if not isinstance(input_reader_config, input_reader_pb2.InputReader):
- raise ValueError('input_reader_config not of type '
- 'input_reader_pb2.InputReader.')
-
- external_reader_config = input_reader_config.external_input_reader
- external_input_reader_config = external_reader_config.Extensions[
- input_reader_google_pb2.GoogleInputReader.google_input_reader]
- input_reader_type = external_input_reader_config.WhichOneof('input_reader')
-
- if input_reader_type == 'tf_record_video_input_reader':
- config = external_input_reader_config.tf_record_video_input_reader
- reader_type_class = tf.TFRecordReader
- else:
- raise ValueError(
- 'Unsupported reader in input_reader_config: %s' % input_reader_type)
-
- if not config.input_path:
- raise ValueError('At least one input path must be specified in '
- '`input_reader_config`.')
- key, value = parallel_reader.parallel_read(
- config.input_path[:], # Convert `RepeatedScalarContainer` to list.
- reader_class=reader_type_class,
- num_epochs=(input_reader_config.num_epochs
- if input_reader_config.num_epochs else None),
- num_readers=input_reader_config.num_readers,
- shuffle=input_reader_config.shuffle,
- dtypes=[tf.string, tf.string],
- capacity=input_reader_config.queue_capacity,
- min_after_dequeue=input_reader_config.min_after_dequeue)
-
- # TODO(yinxiao): Add loading instance mask option.
- decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder()
-
- keys_to_decode = [
- fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes,
- fields.InputDataFields.groundtruth_classes
- ]
- tensor_dict = decoder.decode(value, items=keys_to_decode)
-
- tensor_dict['image'].set_shape([None, None, None, 3])
- tensor_dict['groundtruth_boxes'].set_shape([None, None, 4])
-
- height = model_config.ssd.image_resizer.fixed_shape_resizer.height
- width = model_config.ssd.image_resizer.fixed_shape_resizer.width
-
- # If data augmentation is specified in the config file, the preprocessor
- # will be called here to augment the data as specified. Most common
- # augmentations include horizontal flip and cropping.
- if data_augmentation_options:
- images_pre = tf.split(tensor_dict['image'], config.video_length, axis=0)
- bboxes_pre = tf.split(
- tensor_dict['groundtruth_boxes'], config.video_length, axis=0)
- labels_pre = tf.split(
- tensor_dict['groundtruth_classes'], config.video_length, axis=0)
- images_proc, bboxes_proc, labels_proc = [], [], []
- cache = preprocessor_cache.PreprocessorCache()
-
- for i, _ in enumerate(images_pre):
- image_dict = {
- fields.InputDataFields.image:
- images_pre[i],
- fields.InputDataFields.groundtruth_boxes:
- tf.squeeze(bboxes_pre[i], axis=0),
- fields.InputDataFields.groundtruth_classes:
- tf.squeeze(labels_pre[i], axis=0),
- }
- image_dict = preprocessor.preprocess(
- image_dict,
- data_augmentation_options,
- func_arg_map=preprocessor.get_default_func_arg_map(),
- preprocess_vars_cache=cache)
- # Pads detection count to _PADDING_SIZE.
- image_dict[fields.InputDataFields.groundtruth_boxes] = tf.pad(
- image_dict[fields.InputDataFields.groundtruth_boxes],
- [[0, _PADDING_SIZE], [0, 0]])
- image_dict[fields.InputDataFields.groundtruth_boxes] = tf.slice(
- image_dict[fields.InputDataFields.groundtruth_boxes], [0, 0],
- [_PADDING_SIZE, -1])
- image_dict[fields.InputDataFields.groundtruth_classes] = tf.pad(
- image_dict[fields.InputDataFields.groundtruth_classes],
- [[0, _PADDING_SIZE]])
- image_dict[fields.InputDataFields.groundtruth_classes] = tf.slice(
- image_dict[fields.InputDataFields.groundtruth_classes], [0],
- [_PADDING_SIZE])
- images_proc.append(image_dict[fields.InputDataFields.image])
- bboxes_proc.append(image_dict[fields.InputDataFields.groundtruth_boxes])
- labels_proc.append(image_dict[fields.InputDataFields.groundtruth_classes])
- tensor_dict['image'] = tf.concat(images_proc, axis=0)
- tensor_dict['groundtruth_boxes'] = tf.stack(bboxes_proc, axis=0)
- tensor_dict['groundtruth_classes'] = tf.stack(labels_proc, axis=0)
- else:
- # Pads detection count to _PADDING_SIZE per frame.
- tensor_dict['groundtruth_boxes'] = tf.pad(
- tensor_dict['groundtruth_boxes'], [[0, 0], [0, _PADDING_SIZE], [0, 0]])
- tensor_dict['groundtruth_boxes'] = tf.slice(
- tensor_dict['groundtruth_boxes'], [0, 0, 0], [-1, _PADDING_SIZE, -1])
- tensor_dict['groundtruth_classes'] = tf.pad(
- tensor_dict['groundtruth_classes'], [[0, 0], [0, _PADDING_SIZE]])
- tensor_dict['groundtruth_classes'] = tf.slice(
- tensor_dict['groundtruth_classes'], [0, 0], [-1, _PADDING_SIZE])
-
- tensor_dict['image'], _ = preprocessor.resize_image(
- tensor_dict['image'], new_height=height, new_width=width)
-
- num_steps = config.video_length / unroll_length
-
- init_states = {
- 'lstm_state_c':
- tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
- 'lstm_state_h':
- tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]),
- 'lstm_state_step':
- tf.constant(num_steps, shape=[]),
- }
-
- batch = sqss.batch_sequences_with_states(
- input_key=key,
- input_sequences=tensor_dict,
- input_context={},
- input_length=None,
- initial_states=init_states,
- num_unroll=unroll_length,
- batch_size=batch_size,
- num_threads=batch_size,
- make_keys_unique=True,
- capacity=batch_size * batch_size)
-
- return _build_training_batch_dict(batch, unroll_length, batch_size)
diff --git a/research/lstm_object_detection/inputs/seq_dataset_builder_test.py b/research/lstm_object_detection/inputs/seq_dataset_builder_test.py
deleted file mode 100644
index 4b894d24f71fea1c5c372ec0ead9141af6d5ef6f..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/inputs/seq_dataset_builder_test.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for dataset_builder."""
-
-import os
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-from google.protobuf import text_format
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from lstm_object_detection.inputs import seq_dataset_builder
-from lstm_object_detection.protos import pipeline_pb2 as internal_pipeline_pb2
-from object_detection.builders import preprocessor_builder
-from object_detection.core import standard_fields as fields
-from object_detection.protos import input_reader_pb2
-from object_detection.protos import pipeline_pb2
-from object_detection.protos import preprocessor_pb2
-
-
-class DatasetBuilderTest(tf.test.TestCase):
-
- def _create_tf_record(self):
- path = os.path.join(self.get_temp_dir(), 'tfrecord')
- writer = tf.python_io.TFRecordWriter(path)
-
- image_tensor = np.random.randint(255, size=(16, 16, 3)).astype(np.uint8)
- with self.test_session():
- encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
-
- sequence_example = example_pb2.SequenceExample(
- context=feature_pb2.Features(
- feature={
- 'image/format':
- feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(
- value=['jpeg'.encode('utf-8')])),
- 'image/height':
- feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[16])),
- 'image/width':
- feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[16])),
- }),
- feature_lists=feature_pb2.FeatureLists(
- feature_list={
- 'image/encoded':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(
- value=[encoded_jpeg])),
- ]),
- 'image/object/bbox/xmin':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- ]),
- 'image/object/bbox/xmax':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0]))
- ]),
- 'image/object/bbox/ymin':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- ]),
- 'image/object/bbox/ymax':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0]))
- ]),
- 'image/object/class/label':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- int64_list=feature_pb2.Int64List(value=[2]))
- ]),
- }))
-
- writer.write(sequence_example.SerializeToString())
- writer.close()
-
- return path
-
- def _get_model_configs_from_proto(self):
- """Creates a model text proto for testing.
-
- Returns:
- A dictionary of model configs.
- """
-
- model_text_proto = """
- [lstm_object_detection.protos.lstm_model] {
- train_unroll_length: 4
- eval_unroll_length: 4
- }
- model {
- ssd {
- feature_extractor {
- type: 'lstm_mobilenet_v1_fpn'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- negative_class_weight: 2.0
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 32
- width: 32
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }
- }"""
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- text_format.Merge(model_text_proto, pipeline_config)
- configs = {}
- configs['model'] = pipeline_config.model
- configs['lstm_model'] = pipeline_config.Extensions[
- internal_pipeline_pb2.lstm_model]
-
- return configs
-
- def _get_data_augmentation_preprocessor_proto(self):
- preprocessor_text_proto = """
- random_horizontal_flip {
- }
- """
- preprocessor_proto = preprocessor_pb2.PreprocessingStep()
- text_format.Merge(preprocessor_text_proto, preprocessor_proto)
- return preprocessor_proto
-
- def _create_training_dict(self, tensor_dict):
- image_dict = {}
- all_dict = {}
- all_dict['batch'] = tensor_dict.pop('batch')
- for i, _ in enumerate(tensor_dict[fields.InputDataFields.image]):
- for key, val in tensor_dict.items():
- image_dict[key] = val[i]
-
- image_dict[fields.InputDataFields.image] = tf.to_float(
- tf.expand_dims(image_dict[fields.InputDataFields.image], 0))
- suffix = str(i)
- for key, val in image_dict.items():
- all_dict[key + suffix] = val
- return all_dict
-
- def _get_input_proto(self, input_reader):
- return """
- external_input_reader {
- [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
- %s: {
- input_path: '{0}'
- data_type: TF_SEQUENCE_EXAMPLE
- video_length: 4
- }
- }
- }
- """ % input_reader
-
- def test_video_input_reader(self):
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(
- self._get_input_proto('tf_record_video_input_reader'),
- input_reader_proto)
-
- configs = self._get_model_configs_from_proto()
- tensor_dict = seq_dataset_builder.build(
- input_reader_proto,
- configs['model'],
- configs['lstm_model'],
- unroll_length=1)
-
- all_dict = self._create_training_dict(tensor_dict)
-
- self.assertEqual((1, 32, 32, 3), all_dict['image0'].shape)
- self.assertEqual(4, all_dict['groundtruth_boxes0'].shape[1])
-
- def test_build_with_data_augmentation(self):
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(
- self._get_input_proto('tf_record_video_input_reader'),
- input_reader_proto)
-
- configs = self._get_model_configs_from_proto()
- data_augmentation_options = [
- preprocessor_builder.build(
- self._get_data_augmentation_preprocessor_proto())
- ]
- tensor_dict = seq_dataset_builder.build(
- input_reader_proto,
- configs['model'],
- configs['lstm_model'],
- unroll_length=1,
- data_augmentation_options=data_augmentation_options)
-
- all_dict = self._create_training_dict(tensor_dict)
- self.assertEqual((1, 32, 32, 3), all_dict['image0'].shape)
- self.assertEqual(4, all_dict['groundtruth_boxes0'].shape[1])
-
- def test_raises_error_without_input_paths(self):
- input_reader_text_proto = """
- shuffle: false
- num_readers: 1
- load_instance_masks: true
- """
- input_reader_proto = input_reader_pb2.InputReader()
- text_format.Merge(input_reader_text_proto, input_reader_proto)
-
- configs = self._get_model_configs_from_proto()
- with self.assertRaises(ValueError):
- _ = seq_dataset_builder.build(
- input_reader_proto,
- configs['model'],
- configs['lstm_model'],
- unroll_length=1)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/inputs/tf_sequence_example_decoder.py b/research/lstm_object_detection/inputs/tf_sequence_example_decoder.py
deleted file mode 100644
index def945b3f07d5c0ef35c454c495405971e04574a..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/inputs/tf_sequence_example_decoder.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tensorflow Sequence Example proto decoder.
-
-A decoder to decode string tensors containing serialized
-tensorflow.SequenceExample protos.
-"""
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-from object_detection.core import data_decoder
-from object_detection.core import standard_fields as fields
-
-tfexample_decoder = slim.tfexample_decoder
-
-
-class BoundingBoxSequence(tfexample_decoder.ItemHandler):
- """An ItemHandler that concatenates SparseTensors to Bounding Boxes.
- """
-
- def __init__(self, keys=None, prefix=None, return_dense=True,
- default_value=-1.0):
- """Initialize the bounding box handler.
-
- Args:
- keys: A list of four key names representing the ymin, xmin, ymax, xmax
- in the Example or SequenceExample.
- prefix: An optional prefix for each of the bounding box keys in the
- Example or SequenceExample. If provided, `prefix` is prepended to each
- key in `keys`.
- return_dense: if True, returns a dense tensor; if False, returns as
- sparse tensor.
- default_value: The value used when the `tensor_key` is not found in a
- particular `TFExample`.
-
- Raises:
- ValueError: if keys is not `None` and also not a list of exactly 4 keys
- """
- if keys is None:
- keys = ['ymin', 'xmin', 'ymax', 'xmax']
- elif len(keys) != 4:
- raise ValueError('BoundingBoxSequence expects 4 keys but got {}'.format(
- len(keys)))
- self._prefix = prefix
- self._keys = keys
- self._full_keys = [prefix + k for k in keys]
- self._return_dense = return_dense
- self._default_value = default_value
- super(BoundingBoxSequence, self).__init__(self._full_keys)
-
- def tensors_to_item(self, keys_to_tensors):
- """Maps the given dictionary of tensors to a concatenated list of bboxes.
-
- Args:
- keys_to_tensors: a mapping of TF-Example keys to parsed tensors.
-
- Returns:
- [time, num_boxes, 4] tensor of bounding box coordinates, in order
- [y_min, x_min, y_max, x_max]. Whether the tensor is a SparseTensor
- or a dense Tensor is determined by the return_dense parameter. Empty
- positions in the sparse tensor are filled with -1.0 values.
- """
- sides = []
- for key in self._full_keys:
- value = keys_to_tensors[key]
- expanded_dims = tf.concat(
- [tf.to_int64(tf.shape(value)),
- tf.constant([1], dtype=tf.int64)], 0)
- side = tf.sparse_reshape(value, expanded_dims)
- sides.append(side)
- bounding_boxes = tf.sparse_concat(2, sides)
- if self._return_dense:
- bounding_boxes = tf.sparse_tensor_to_dense(
- bounding_boxes, default_value=self._default_value)
- return bounding_boxes
-
-
-class TFSequenceExampleDecoder(data_decoder.DataDecoder):
- """Tensorflow Sequence Example proto decoder."""
-
- def __init__(self):
- """Constructor sets keys_to_features and items_to_handlers."""
- self.keys_to_context_features = {
- 'image/format':
- tf.FixedLenFeature((), tf.string, default_value='jpeg'),
- 'image/filename':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/key/sha256':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/source_id':
- tf.FixedLenFeature((), tf.string, default_value=''),
- 'image/height':
- tf.FixedLenFeature((), tf.int64, 1),
- 'image/width':
- tf.FixedLenFeature((), tf.int64, 1),
- }
- self.keys_to_features = {
- 'image/encoded': tf.FixedLenSequenceFeature((), tf.string),
- 'bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
- 'bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
- 'bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
- 'bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
- 'bbox/label/index': tf.VarLenFeature(dtype=tf.int64),
- 'bbox/label/string': tf.VarLenFeature(tf.string),
- 'area': tf.VarLenFeature(tf.float32),
- 'is_crowd': tf.VarLenFeature(tf.int64),
- 'difficult': tf.VarLenFeature(tf.int64),
- 'group_of': tf.VarLenFeature(tf.int64),
- }
- self.items_to_handlers = {
- fields.InputDataFields.image:
- tfexample_decoder.Image(
- image_key='image/encoded',
- format_key='image/format',
- channels=3,
- repeated=True),
- fields.InputDataFields.source_id: (
- tfexample_decoder.Tensor('image/source_id')),
- fields.InputDataFields.key: (
- tfexample_decoder.Tensor('image/key/sha256')),
- fields.InputDataFields.filename: (
- tfexample_decoder.Tensor('image/filename')),
- # Object boxes and classes.
- fields.InputDataFields.groundtruth_boxes:
- BoundingBoxSequence(prefix='bbox/'),
- fields.InputDataFields.groundtruth_classes: (
- tfexample_decoder.Tensor('bbox/label/index')),
- fields.InputDataFields.groundtruth_area:
- tfexample_decoder.Tensor('area'),
- fields.InputDataFields.groundtruth_is_crowd: (
- tfexample_decoder.Tensor('is_crowd')),
- fields.InputDataFields.groundtruth_difficult: (
- tfexample_decoder.Tensor('difficult')),
- fields.InputDataFields.groundtruth_group_of: (
- tfexample_decoder.Tensor('group_of'))
- }
-
- def decode(self, tf_seq_example_string_tensor, items=None):
- """Decodes serialized tf.SequenceExample and returns a tensor dictionary.
-
- Args:
- tf_seq_example_string_tensor: A string tensor holding a serialized
- tensorflow example proto.
- items: The list of items to decode. These must be a subset of the item
- keys in self._items_to_handlers. If `items` is left as None, then all
- of the items in self._items_to_handlers are decoded.
-
- Returns:
- A dictionary of the following tensors.
- fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, seq]
- containing image(s).
- fields.InputDataFields.source_id - string tensor containing original
- image id.
- fields.InputDataFields.key - string tensor with unique sha256 hash key.
- fields.InputDataFields.filename - string tensor with original dataset
- filename.
- fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
- [None, 4] containing box corners.
- fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
- [None] containing classes for the boxes.
- fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
- [None] containing object mask area in pixel squared.
- fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
- [None] indicating if the boxes enclose a crowd.
- fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
- [None] indicating if the boxes represent `difficult` instances.
- """
- serialized_example = tf.reshape(tf_seq_example_string_tensor, shape=[])
- decoder = TFSequenceExampleDecoderHelper(self.keys_to_context_features,
- self.keys_to_features,
- self.items_to_handlers)
- if not items:
- items = decoder.list_items()
- tensors = decoder.decode(serialized_example, items=items)
- tensor_dict = dict(zip(items, tensors))
-
- return tensor_dict
-
-
-class TFSequenceExampleDecoderHelper(data_decoder.DataDecoder):
- """A decoder helper class for TensorFlow SequenceExamples.
-
- To perform this decoding operation, a SequenceExampleDecoder is given a list
- of ItemHandlers. Each ItemHandler indicates the set of features.
- """
-
- def __init__(self, keys_to_context_features, keys_to_sequence_features,
- items_to_handlers):
- """Constructs the decoder.
-
- Args:
- keys_to_context_features: A dictionary from TF-SequenceExample context
- keys to either tf.VarLenFeature or tf.FixedLenFeature instances.
- See tensorflow's parsing_ops.py.
- keys_to_sequence_features: A dictionary from TF-SequenceExample sequence
- keys to either tf.VarLenFeature or tf.FixedLenSequenceFeature instances.
- items_to_handlers: A dictionary from items (strings) to ItemHandler
- instances. Note that the ItemHandler's are provided the keys that they
- use to return the final item Tensors.
- Raises:
- ValueError: If the same key is present for context features and sequence
- features.
- """
- unique_keys = set()
- unique_keys.update(keys_to_context_features)
- unique_keys.update(keys_to_sequence_features)
- if len(unique_keys) != (
- len(keys_to_context_features) + len(keys_to_sequence_features)):
- # This situation is ambiguous in the decoder's keys_to_tensors variable.
- raise ValueError('Context and sequence keys are not unique. \n'
- ' Context keys: %s \n Sequence keys: %s' %
- (list(keys_to_context_features.keys()),
- list(keys_to_sequence_features.keys())))
- self._keys_to_context_features = keys_to_context_features
- self._keys_to_sequence_features = keys_to_sequence_features
- self._items_to_handlers = items_to_handlers
-
- def list_items(self):
- """Returns keys of items."""
- return self._items_to_handlers.keys()
-
- def decode(self, serialized_example, items=None):
- """Decodes the given serialized TF-SequenceExample.
-
- Args:
- serialized_example: A serialized TF-SequenceExample tensor.
- items: The list of items to decode. These must be a subset of the item
- keys in self._items_to_handlers. If `items` is left as None, then all
- of the items in self._items_to_handlers are decoded.
- Returns:
- The decoded items, a list of tensor.
- """
- context, feature_list = tf.parse_single_sequence_example(
- serialized_example, self._keys_to_context_features,
- self._keys_to_sequence_features)
- # Reshape non-sparse elements just once:
- for k in self._keys_to_context_features:
- v = self._keys_to_context_features[k]
- if isinstance(v, tf.FixedLenFeature):
- context[k] = tf.reshape(context[k], v.shape)
- if not items:
- items = self._items_to_handlers.keys()
- outputs = []
- for item in items:
- handler = self._items_to_handlers[item]
- keys_to_tensors = {
- key: context[key] if key in context else feature_list[key]
- for key in handler.keys
- }
- outputs.append(handler.tensors_to_item(keys_to_tensors))
- return outputs
diff --git a/research/lstm_object_detection/inputs/tf_sequence_example_decoder_test.py b/research/lstm_object_detection/inputs/tf_sequence_example_decoder_test.py
deleted file mode 100644
index dbbb8d3c7443dabcfc0df08638e2a381eca2cc31..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/inputs/tf_sequence_example_decoder_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for lstm_object_detection.tf_sequence_example_decoder."""
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-from tensorflow.core.example import example_pb2
-from tensorflow.core.example import feature_pb2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import parsing_ops
-from lstm_object_detection.inputs import tf_sequence_example_decoder
-from object_detection.core import standard_fields as fields
-
-
-class TFSequenceExampleDecoderTest(tf.test.TestCase):
- """Tests for sequence example decoder."""
-
- def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_encoded
-
- def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
- with self.test_session():
- if encoding_type == 'jpeg':
- image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
- else:
- raise ValueError('Invalid encoding type.')
- return image_decoded
-
- def testDecodeJpegImageAndBoundingBox(self):
- """Test if the decoder can correctly decode the image and bounding box.
-
- A set of random images (represented as an image tensor) is first decoded as
- the groundtrue image. Meanwhile, the image tensor will be encoded and pass
- through the sequence example, and then decoded as images. The groundtruth
- image and the decoded image are expected to be equal. Similar tests are
- also applied to labels such as bounding box.
- """
- image_tensor = np.random.randint(256, size=(256, 256, 3)).astype(np.uint8)
- encoded_jpeg = self._EncodeImage(image_tensor)
- decoded_jpeg = self._DecodeImage(encoded_jpeg)
-
- sequence_example = example_pb2.SequenceExample(
- feature_lists=feature_pb2.FeatureLists(
- feature_list={
- 'image/encoded':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- bytes_list=feature_pb2.BytesList(
- value=[encoded_jpeg])),
- ]),
- 'bbox/xmin':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- ]),
- 'bbox/xmax':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0]))
- ]),
- 'bbox/ymin':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[0.0])),
- ]),
- 'bbox/ymax':
- feature_pb2.FeatureList(feature=[
- feature_pb2.Feature(
- float_list=feature_pb2.FloatList(value=[1.0]))
- ]),
- })).SerializeToString()
-
- example_decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder()
- tensor_dict = example_decoder.decode(tf.convert_to_tensor(sequence_example))
-
- # Test tensor dict image dimension.
- self.assertAllEqual(
- (tensor_dict[fields.InputDataFields.image].get_shape().as_list()),
- [None, None, None, 3])
- with self.test_session() as sess:
- tensor_dict[fields.InputDataFields.image] = tf.squeeze(
- tensor_dict[fields.InputDataFields.image])
- tensor_dict[fields.InputDataFields.groundtruth_boxes] = tf.squeeze(
- tensor_dict[fields.InputDataFields.groundtruth_boxes])
- tensor_dict = sess.run(tensor_dict)
-
- # Test decoded image.
- self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
- # Test decoded bounding box.
- self.assertAllEqual([0.0, 0.0, 1.0, 1.0],
- tensor_dict[fields.InputDataFields.groundtruth_boxes])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/lstm/__init__.py b/research/lstm_object_detection/lstm/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/lstm/lstm_cells.py b/research/lstm_object_detection/lstm/lstm_cells.py
deleted file mode 100644
index a553073d978b4b61e6f550fa65e2a2ccc7bfe92d..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/lstm_cells.py
+++ /dev/null
@@ -1,734 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""BottleneckConvLSTMCell implementation."""
-import functools
-
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-
-from tensorflow.contrib import rnn as contrib_rnn
-from tensorflow.contrib.framework.python.ops import variables as contrib_variables
-import lstm_object_detection.lstm.utils as lstm_utils
-
-
-class BottleneckConvLSTMCell(contrib_rnn.RNNCell):
- """Basic LSTM recurrent network cell using separable convolutions.
-
- The implementation is based on:
- Mobile Video Object Detection with Temporally-Aware Feature Maps
- https://arxiv.org/abs/1711.06368.
-
- We add forget_bias (default: 1) to the biases of the forget gate in order to
- reduce the scale of forgetting in the beginning of the training.
-
- This LSTM first projects inputs to the size of the output before doing gate
- computations. This saves params unless the input is less than a third of the
- state size channel-wise.
- """
-
- def __init__(self,
- filter_size,
- output_size,
- num_units,
- forget_bias=1.0,
- activation=tf.tanh,
- flatten_state=False,
- clip_state=False,
- output_bottleneck=False,
- pre_bottleneck=False,
- visualize_gates=False):
- """Initializes the basic LSTM cell.
-
- Args:
- filter_size: collection, conv filter size.
- output_size: collection, the width/height dimensions of the cell/output.
- num_units: int, The number of channels in the LSTM cell.
- forget_bias: float, The bias added to forget gates (see above).
- activation: Activation function of the inner states.
- flatten_state: if True, state tensor will be flattened and stored as a 2-d
- tensor. Use for exporting the model to tfmini.
- clip_state: if True, clip state between [-6, 6].
- output_bottleneck: if True, the cell bottleneck will be concatenated to
- the cell output.
- pre_bottleneck: if True, cell assumes that bottlenecking was performing
- before the function was called.
- visualize_gates: if True, add histogram summaries of all gates and outputs
- to tensorboard.
- """
- self._filter_size = list(filter_size)
- self._output_size = list(output_size)
- self._num_units = num_units
- self._forget_bias = forget_bias
- self._activation = activation
- self._viz_gates = visualize_gates
- self._flatten_state = flatten_state
- self._clip_state = clip_state
- self._output_bottleneck = output_bottleneck
- self._pre_bottleneck = pre_bottleneck
- self._param_count = self._num_units
- for dim in self._output_size:
- self._param_count *= dim
-
- @property
- def state_size(self):
- return contrib_rnn.LSTMStateTuple(self._output_size + [self._num_units],
- self._output_size + [self._num_units])
-
- @property
- def state_size_flat(self):
- return contrib_rnn.LSTMStateTuple([self._param_count], [self._param_count])
-
- @property
- def output_size(self):
- return self._output_size + [self._num_units]
-
- def __call__(self, inputs, state, scope=None):
- """Long short-term memory cell (LSTM) with bottlenecking.
-
- Args:
- inputs: Input tensor at the current timestep.
- state: Tuple of tensors, the state and output at the previous timestep.
- scope: Optional scope.
-
- Returns:
- A tuple where the first element is the LSTM output and the second is
- a LSTMStateTuple of the state at the current timestep.
- """
- scope = scope or 'conv_lstm_cell'
- with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
- c, h = state
-
- # unflatten state if necessary
- if self._flatten_state:
- c = tf.reshape(c, [-1] + self.output_size)
- h = tf.reshape(h, [-1] + self.output_size)
-
- # summary of input passed into cell
- if self._viz_gates:
- slim.summaries.add_histogram_summary(inputs, 'cell_input')
- if self._pre_bottleneck:
- bottleneck = inputs
- else:
- bottleneck = slim.separable_conv2d(
- tf.concat([inputs, h], 3),
- self._num_units,
- self._filter_size,
- depth_multiplier=1,
- activation_fn=self._activation,
- normalizer_fn=None,
- scope='bottleneck')
-
- if self._viz_gates:
- slim.summaries.add_histogram_summary(bottleneck, 'bottleneck')
-
- concat = slim.separable_conv2d(
- bottleneck,
- 4 * self._num_units,
- self._filter_size,
- depth_multiplier=1,
- activation_fn=None,
- normalizer_fn=None,
- scope='gates')
-
- i, j, f, o = tf.split(concat, 4, 3)
-
- new_c = (
- c * tf.sigmoid(f + self._forget_bias) +
- tf.sigmoid(i) * self._activation(j))
- if self._clip_state:
- new_c = tf.clip_by_value(new_c, -6, 6)
- new_h = self._activation(new_c) * tf.sigmoid(o)
- # summary of cell output and new state
- if self._viz_gates:
- slim.summaries.add_histogram_summary(new_h, 'cell_output')
- slim.summaries.add_histogram_summary(new_c, 'cell_state')
-
- output = new_h
- if self._output_bottleneck:
- output = tf.concat([new_h, bottleneck], axis=3)
-
- # reflatten state to store it
- if self._flatten_state:
- new_c = tf.reshape(new_c, [-1, self._param_count])
- new_h = tf.reshape(new_h, [-1, self._param_count])
-
- return output, contrib_rnn.LSTMStateTuple(new_c, new_h)
-
- def init_state(self, state_name, batch_size, dtype, learned_state=False):
- """Creates an initial state compatible with this cell.
-
- Args:
- state_name: name of the state tensor
- batch_size: model batch size
- dtype: dtype for the tensor values i.e. tf.float32
- learned_state: whether the initial state should be learnable. If false,
- the initial state is set to all 0's
-
- Returns:
- The created initial state.
- """
- state_size = (
- self.state_size_flat if self._flatten_state else self.state_size)
- # list of 2 zero tensors or variables tensors, depending on if
- # learned_state is true
- # pylint: disable=g-long-ternary,g-complex-comprehension
- ret_flat = [(contrib_variables.model_variable(
- state_name + str(i),
- shape=s,
- dtype=dtype,
- initializer=tf.truncated_normal_initializer(stddev=0.03))
- if learned_state else tf.zeros(
- [batch_size] + s, dtype=dtype, name=state_name))
- for i, s in enumerate(state_size)]
-
- # duplicates initial state across the batch axis if it's learned
- if learned_state:
- ret_flat = [
- tf.stack([tensor
- for i in range(int(batch_size))])
- for tensor in ret_flat
- ]
- for s, r in zip(state_size, ret_flat):
- r.set_shape([None] + s)
- return tf.nest.pack_sequence_as(structure=[1, 1], flat_sequence=ret_flat)
-
- def pre_bottleneck(self, inputs, state, input_index):
- """Apply pre-bottleneck projection to inputs.
-
- Pre-bottleneck operation maps features of different channels into the same
- dimension. The purpose of this op is to share the features from both large
- and small models in the same LSTM cell.
-
- Args:
- inputs: 4D Tensor with shape [batch_size x width x height x input_size].
- state: 4D Tensor with shape [batch_size x width x height x state_size].
- input_index: integer index indicating which base features the inputs
- correspoding to.
-
- Returns:
- inputs: pre-bottlenecked inputs.
- Raises:
- ValueError: If pre_bottleneck is not set or inputs is not rank 4.
- """
- # Sometimes state is a tuple, in which case it cannot be modified, e.g.
- # during training, tf.contrib.training.SequenceQueueingStateSaver
- # returns the state as a tuple. This should not be an issue since we
- # only need to modify state[1] during export, when state should be a
- # list.
- if len(inputs.shape) != 4:
- raise ValueError('Expect rank 4 feature tensor.')
- if not self._flatten_state and len(state.shape) != 4:
- raise ValueError('Expect rank 4 state tensor.')
- if self._flatten_state and len(state.shape) != 2:
- raise ValueError('Expect rank 2 state tensor when flatten_state is set.')
-
- with tf.name_scope(None):
- state = tf.identity(state, name='raw_inputs/init_lstm_h')
- if self._flatten_state:
- batch_size = inputs.shape[0]
- height = inputs.shape[1]
- width = inputs.shape[2]
- state = tf.reshape(state, [batch_size, height, width, -1])
- with tf.variable_scope('conv_lstm_cell', reuse=tf.AUTO_REUSE):
- scope_name = 'bottleneck_%d' % input_index
- inputs = slim.separable_conv2d(
- tf.concat([inputs, state], 3),
- self.output_size[-1],
- self._filter_size,
- depth_multiplier=1,
- activation_fn=tf.nn.relu6,
- normalizer_fn=None,
- scope=scope_name)
- # For exporting inference graph, we only mark the first timestep.
- with tf.name_scope(None):
- inputs = tf.identity(
- inputs, name='raw_outputs/base_endpoint_%d' % (input_index + 1))
- return inputs
-
-
-class GroupedConvLSTMCell(contrib_rnn.RNNCell):
- """Basic LSTM recurrent network cell using separable convolutions.
-
- The implementation is based on: https://arxiv.org/abs/1903.10172.
-
- We add forget_bias (default: 1) to the biases of the forget gate in order to
- reduce the scale of forgetting in the beginning of the training.
-
- This LSTM first projects inputs to the size of the output before doing gate
- computations. This saves params unless the input is less than a third of the
- state size channel-wise. Computation of bottlenecks and gates is divided
- into independent groups for further savings.
- """
-
- def __init__(self,
- filter_size,
- output_size,
- num_units,
- is_training,
- forget_bias=1.0,
- activation=tf.tanh,
- use_batch_norm=False,
- flatten_state=False,
- groups=4,
- clip_state=False,
- scale_state=False,
- output_bottleneck=False,
- pre_bottleneck=False,
- is_quantized=False,
- visualize_gates=False,
- conv_op_overrides=None):
- """Initialize the basic LSTM cell.
-
- Args:
- filter_size: collection, conv filter size
- output_size: collection, the width/height dimensions of the cell/output
- num_units: int, The number of channels in the LSTM cell.
- is_training: Whether the LSTM is in training mode.
- forget_bias: float, The bias added to forget gates (see above).
- activation: Activation function of the inner states.
- use_batch_norm: if True, use batch norm after convolution
- flatten_state: if True, state tensor will be flattened and stored as a 2-d
- tensor. Use for exporting the model to tfmini
- groups: Number of groups to split the state into. Must evenly divide
- num_units.
- clip_state: if True, clips state between [-6, 6].
- scale_state: if True, scales state so that all values are under 6 at all
- times.
- output_bottleneck: if True, the cell bottleneck will be concatenated to
- the cell output.
- pre_bottleneck: if True, cell assumes that bottlenecking was performing
- before the function was called.
- is_quantized: if True, the model is in quantize mode, which requires
- quantization friendly concat and separable_conv2d ops.
- visualize_gates: if True, add histogram summaries of all gates and outputs
- to tensorboard
- conv_op_overrides: A list of convolutional operations that override the
- 'bottleneck' and 'convolution' layers before lstm gates. If None, the
- original implementation of seperable_conv will be used. The length of
- the list should be two.
-
- Raises:
- ValueError: when both clip_state and scale_state are enabled.
- """
- if clip_state and scale_state:
- raise ValueError('clip_state and scale_state cannot both be enabled.')
-
- self._filter_size = list(filter_size)
- self._output_size = list(output_size)
- self._num_units = num_units
- self._is_training = is_training
- self._forget_bias = forget_bias
- self._activation = activation
- self._use_batch_norm = use_batch_norm
- self._viz_gates = visualize_gates
- self._flatten_state = flatten_state
- self._param_count = self._num_units
- self._groups = groups
- self._scale_state = scale_state
- self._clip_state = clip_state
- self._output_bottleneck = output_bottleneck
- self._pre_bottleneck = pre_bottleneck
- self._is_quantized = is_quantized
- for dim in self._output_size:
- self._param_count *= dim
- self._conv_op_overrides = conv_op_overrides
- if self._conv_op_overrides and len(self._conv_op_overrides) != 2:
- raise ValueError('Bottleneck and Convolutional layer should be overriden'
- 'together')
-
- @property
- def state_size(self):
- return contrib_rnn.LSTMStateTuple(self._output_size + [self._num_units],
- self._output_size + [self._num_units])
-
- @property
- def state_size_flat(self):
- return contrib_rnn.LSTMStateTuple([self._param_count], [self._param_count])
-
- @property
- def output_size(self):
- return self._output_size + [self._num_units]
-
- @property
- def filter_size(self):
- return self._filter_size
-
- @property
- def num_groups(self):
- return self._groups
-
- def __call__(self, inputs, state, scope=None):
- """Long short-term memory cell (LSTM) with bottlenecking.
-
- Includes logic for quantization-aware training. Note that all concats and
- activations use fixed ranges unless stated otherwise.
-
- Args:
- inputs: Input tensor at the current timestep.
- state: Tuple of tensors, the state at the previous timestep.
- scope: Optional scope.
-
- Returns:
- A tuple where the first element is the LSTM output and the second is
- a LSTMStateTuple of the state at the current timestep.
- """
- scope = scope or 'conv_lstm_cell'
- with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
- c, h = state
-
- # Set nodes to be under raw_inputs/ name scope for tfmini export.
- with tf.name_scope(None):
- c = tf.identity(c, name='raw_inputs/init_lstm_c')
- # When pre_bottleneck is enabled, input h handle is in rnn_decoder.py
- if not self._pre_bottleneck:
- h = tf.identity(h, name='raw_inputs/init_lstm_h')
-
- # unflatten state if necessary
- if self._flatten_state:
- c = tf.reshape(c, [-1] + self.output_size)
- h = tf.reshape(h, [-1] + self.output_size)
-
- c_list = tf.split(c, self._groups, axis=3)
- if self._pre_bottleneck:
- inputs_list = tf.split(inputs, self._groups, axis=3)
- else:
- h_list = tf.split(h, self._groups, axis=3)
- out_bottleneck = []
- out_c = []
- out_h = []
- # summary of input passed into cell
- if self._viz_gates:
- slim.summaries.add_histogram_summary(inputs, 'cell_input')
-
- for k in range(self._groups):
- if self._pre_bottleneck:
- bottleneck = inputs_list[k]
- else:
- if self._conv_op_overrides:
- bottleneck_fn = self._conv_op_overrides[0]
- else:
- bottleneck_fn = functools.partial(
- lstm_utils.quantizable_separable_conv2d,
- kernel_size=self._filter_size,
- activation_fn=self._activation)
- if self._use_batch_norm:
- b_x = bottleneck_fn(
- inputs=inputs,
- num_outputs=self._num_units // self._groups,
- is_quantized=self._is_quantized,
- depth_multiplier=1,
- normalizer_fn=None,
- scope='bottleneck_%d_x' % k)
- b_h = bottleneck_fn(
- inputs=h_list[k],
- num_outputs=self._num_units // self._groups,
- is_quantized=self._is_quantized,
- depth_multiplier=1,
- normalizer_fn=None,
- scope='bottleneck_%d_h' % k)
- b_x = slim.batch_norm(
- b_x,
- scale=True,
- is_training=self._is_training,
- scope='BatchNorm_%d_X' % k)
- b_h = slim.batch_norm(
- b_h,
- scale=True,
- is_training=self._is_training,
- scope='BatchNorm_%d_H' % k)
- bottleneck = b_x + b_h
- else:
- # All concats use fixed quantization ranges to prevent rescaling
- # at inference. Both |inputs| and |h_list| are tensors resulting
- # from Relu6 operations so we fix the ranges to [0, 6].
- bottleneck_concat = lstm_utils.quantizable_concat(
- [inputs, h_list[k]],
- axis=3,
- is_training=False,
- is_quantized=self._is_quantized,
- scope='bottleneck_%d/quantized_concat' % k)
- bottleneck = bottleneck_fn(
- inputs=bottleneck_concat,
- num_outputs=self._num_units // self._groups,
- is_quantized=self._is_quantized,
- depth_multiplier=1,
- normalizer_fn=None,
- scope='bottleneck_%d' % k)
-
- if self._conv_op_overrides:
- conv_fn = self._conv_op_overrides[1]
- else:
- conv_fn = functools.partial(
- lstm_utils.quantizable_separable_conv2d,
- kernel_size=self._filter_size,
- activation_fn=None)
- concat = conv_fn(
- inputs=bottleneck,
- num_outputs=4 * self._num_units // self._groups,
- is_quantized=self._is_quantized,
- depth_multiplier=1,
- normalizer_fn=None,
- scope='concat_conv_%d' % k)
-
- # Since there is no activation in the previous separable conv, we
- # quantize here. A starting range of [-6, 6] is used because the
- # tensors are input to a Sigmoid function that saturates at these
- # ranges.
- concat = lstm_utils.quantize_op(
- concat,
- is_training=self._is_training,
- default_min=-6,
- default_max=6,
- is_quantized=self._is_quantized,
- scope='gates_%d/act_quant' % k)
-
- # i = input_gate, j = new_input, f = forget_gate, o = output_gate
- i, j, f, o = tf.split(concat, 4, 3)
-
- f_add = f + self._forget_bias
- f_add = lstm_utils.quantize_op(
- f_add,
- is_training=self._is_training,
- default_min=-6,
- default_max=6,
- is_quantized=self._is_quantized,
- scope='forget_gate_%d/add_quant' % k)
- f_act = tf.sigmoid(f_add)
-
- a = c_list[k] * f_act
- a = lstm_utils.quantize_op(
- a,
- is_training=self._is_training,
- is_quantized=self._is_quantized,
- scope='forget_gate_%d/mul_quant' % k)
-
- i_act = tf.sigmoid(i)
-
- j_act = self._activation(j)
- # The quantization range is fixed for the relu6 to ensure that zero
- # is exactly representable.
- j_act = lstm_utils.fixed_quantize_op(
- j_act,
- fixed_min=0.0,
- fixed_max=6.0,
- is_quantized=self._is_quantized,
- scope='new_input_%d/act_quant' % k)
-
- b = i_act * j_act
- b = lstm_utils.quantize_op(
- b,
- is_training=self._is_training,
- is_quantized=self._is_quantized,
- scope='input_gate_%d/mul_quant' % k)
-
- new_c = a + b
- # The quantization range is fixed to [0, 6] due to an optimization in
- # TFLite. The order of operations is as fllows:
- # Add -> FakeQuant -> Relu6 -> FakeQuant -> Concat.
- # The fakequant ranges to the concat must be fixed to ensure all inputs
- # to the concat have the same range, removing the need for rescaling.
- # The quantization ranges input to the relu6 are propagated to its
- # output. Any mismatch between these two ranges will cause an error.
- new_c = lstm_utils.fixed_quantize_op(
- new_c,
- fixed_min=0.0,
- fixed_max=6.0,
- is_quantized=self._is_quantized,
- scope='new_c_%d/add_quant' % k)
-
- if not self._is_quantized:
- if self._scale_state:
- normalizer = tf.maximum(1.0,
- tf.reduce_max(new_c, axis=(1, 2, 3)) / 6)
- new_c /= tf.reshape(normalizer, [tf.shape(new_c)[0], 1, 1, 1])
- elif self._clip_state:
- new_c = tf.clip_by_value(new_c, -6, 6)
-
- new_c_act = self._activation(new_c)
- # The quantization range is fixed for the relu6 to ensure that zero
- # is exactly representable.
- new_c_act = lstm_utils.fixed_quantize_op(
- new_c_act,
- fixed_min=0.0,
- fixed_max=6.0,
- is_quantized=self._is_quantized,
- scope='new_c_%d/act_quant' % k)
-
- o_act = tf.sigmoid(o)
-
- new_h = new_c_act * o_act
- # The quantization range is fixed since it is input to a concat.
- # A range of [0, 6] is used since |new_h| is a product of ranges [0, 6]
- # and [0, 1].
- new_h_act = lstm_utils.fixed_quantize_op(
- new_h,
- fixed_min=0.0,
- fixed_max=6.0,
- is_quantized=self._is_quantized,
- scope='new_h_%d/act_quant' % k)
-
- out_bottleneck.append(bottleneck)
- out_c.append(new_c_act)
- out_h.append(new_h_act)
-
- # Since all inputs to the below concats are already quantized, we can use
- # a regular concat operation.
- new_c = tf.concat(out_c, axis=3)
- new_h = tf.concat(out_h, axis=3)
-
- # |bottleneck| is input to a concat with |new_h|. We must use
- # quantizable_concat() with a fixed range that matches |new_h|.
- bottleneck = lstm_utils.quantizable_concat(
- out_bottleneck,
- axis=3,
- is_training=False,
- is_quantized=self._is_quantized,
- scope='out_bottleneck/quantized_concat')
-
- # summary of cell output and new state
- if self._viz_gates:
- slim.summaries.add_histogram_summary(new_h, 'cell_output')
- slim.summaries.add_histogram_summary(new_c, 'cell_state')
-
- output = new_h
- if self._output_bottleneck:
- output = lstm_utils.quantizable_concat(
- [new_h, bottleneck],
- axis=3,
- is_training=False,
- is_quantized=self._is_quantized,
- scope='new_output/quantized_concat')
-
- # reflatten state to store it
- if self._flatten_state:
- new_c = tf.reshape(new_c, [-1, self._param_count], name='lstm_c')
- new_h = tf.reshape(new_h, [-1, self._param_count], name='lstm_h')
-
- # Set nodes to be under raw_outputs/ name scope for tfmini export.
- with tf.name_scope(None):
- new_c = tf.identity(new_c, name='raw_outputs/lstm_c')
- new_h = tf.identity(new_h, name='raw_outputs/lstm_h')
- states_and_output = contrib_rnn.LSTMStateTuple(new_c, new_h)
-
- return output, states_and_output
-
- def init_state(self, state_name, batch_size, dtype, learned_state=False):
- """Creates an initial state compatible with this cell.
-
- Args:
- state_name: name of the state tensor
- batch_size: model batch size
- dtype: dtype for the tensor values i.e. tf.float32
- learned_state: whether the initial state should be learnable. If false,
- the initial state is set to all 0's
-
- Returns:
- ret: the created initial state
- """
- state_size = (
- self.state_size_flat if self._flatten_state else self.state_size)
- # list of 2 zero tensors or variables tensors,
- # depending on if learned_state is true
- # pylint: disable=g-long-ternary,g-complex-comprehension
- ret_flat = [(contrib_variables.model_variable(
- state_name + str(i),
- shape=s,
- dtype=dtype,
- initializer=tf.truncated_normal_initializer(stddev=0.03))
- if learned_state else tf.zeros(
- [batch_size] + s, dtype=dtype, name=state_name))
- for i, s in enumerate(state_size)]
-
- # duplicates initial state across the batch axis if it's learned
- if learned_state:
- ret_flat = [tf.stack([tensor for i in range(int(batch_size))])
- for tensor in ret_flat]
- for s, r in zip(state_size, ret_flat):
- r = tf.reshape(r, [-1] + s)
- ret = tf.nest.pack_sequence_as(structure=[1, 1], flat_sequence=ret_flat)
- return ret
-
- def pre_bottleneck(self, inputs, state, input_index):
- """Apply pre-bottleneck projection to inputs.
-
- Pre-bottleneck operation maps features of different channels into the same
- dimension. The purpose of this op is to share the features from both large
- and small models in the same LSTM cell.
-
- Args:
- inputs: 4D Tensor with shape [batch_size x width x height x input_size].
- state: 4D Tensor with shape [batch_size x width x height x state_size].
- input_index: integer index indicating which base features the inputs
- correspoding to.
-
- Returns:
- inputs: pre-bottlenecked inputs.
- Raises:
- ValueError: If pre_bottleneck is not set or inputs is not rank 4.
- """
- # Sometimes state is a tuple, in which case it cannot be modified, e.g.
- # during training, tf.contrib.training.SequenceQueueingStateSaver
- # returns the state as a tuple. This should not be an issue since we
- # only need to modify state[1] during export, when state should be a
- # list.
- if not self._pre_bottleneck:
- raise ValueError('Only applied when pre_bottleneck is set to true.')
- if len(inputs.shape) != 4:
- raise ValueError('Expect a rank 4 feature tensor.')
- if not self._flatten_state and len(state.shape) != 4:
- raise ValueError('Expect rank 4 state tensor.')
- if self._flatten_state and len(state.shape) != 2:
- raise ValueError('Expect rank 2 state tensor when flatten_state is set.')
-
- with tf.name_scope(None):
- state = tf.identity(
- state, name='raw_inputs/init_lstm_h_%d' % (input_index + 1))
- if self._flatten_state:
- batch_size = inputs.shape[0]
- height = inputs.shape[1]
- width = inputs.shape[2]
- state = tf.reshape(state, [batch_size, height, width, -1])
- with tf.variable_scope('conv_lstm_cell', reuse=tf.AUTO_REUSE):
- state_split = tf.split(state, self._groups, axis=3)
- with tf.variable_scope('bottleneck_%d' % input_index):
- bottleneck_out = []
- for k in range(self._groups):
- with tf.variable_scope('group_%d' % k):
- bottleneck_out.append(
- lstm_utils.quantizable_separable_conv2d(
- lstm_utils.quantizable_concat(
- [inputs, state_split[k]],
- axis=3,
- is_training=self._is_training,
- is_quantized=self._is_quantized,
- scope='quantized_concat'),
- self.output_size[-1] / self._groups,
- self._filter_size,
- is_quantized=self._is_quantized,
- depth_multiplier=1,
- activation_fn=tf.nn.relu6,
- normalizer_fn=None,
- scope='project'))
- inputs = lstm_utils.quantizable_concat(
- bottleneck_out,
- axis=3,
- is_training=self._is_training,
- is_quantized=self._is_quantized,
- scope='bottleneck_out/quantized_concat')
- # For exporting inference graph, we only mark the first timestep.
- with tf.name_scope(None):
- inputs = tf.identity(
- inputs, name='raw_outputs/base_endpoint_%d' % (input_index + 1))
- return inputs
diff --git a/research/lstm_object_detection/lstm/lstm_cells_test.py b/research/lstm_object_detection/lstm/lstm_cells_test.py
deleted file mode 100644
index b296310194dde2a10249c0af266d50ff762ec745..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/lstm_cells_test.py
+++ /dev/null
@@ -1,412 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for lstm_object_detection.lstm.lstm_cells."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-from lstm_object_detection.lstm import lstm_cells
-
-
-class BottleneckConvLstmCellsTest(tf.test.TestCase):
-
- def test_run_lstm_cell(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units)
- init_state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- output, state_tuple = cell(inputs, init_state)
- self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state_tuple[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state_tuple[1].shape.as_list())
-
- def test_run_lstm_cell_with_flattened_state(self):
- filter_size = [3, 3]
- output_dim = 10
- output_size = [output_dim] * 2
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- inputs = tf.zeros([batch_size, output_dim, output_dim, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- flatten_state=True)
- init_state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- output, state_tuple = cell(inputs, init_state)
- self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
- self.assertAllEqual([4, 1500], state_tuple[0].shape.as_list())
- self.assertAllEqual([4, 1500], state_tuple[1].shape.as_list())
-
- def test_run_lstm_cell_with_output_bottleneck(self):
- filter_size = [3, 3]
- output_dim = 10
- output_size = [output_dim] * 2
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- inputs = tf.zeros([batch_size, output_dim, output_dim, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- output_bottleneck=True)
- init_state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- output, state_tuple = cell(inputs, init_state)
- self.assertAllEqual([4, 10, 10, 30], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state_tuple[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state_tuple[1].shape.as_list())
-
- def test_get_init_state(self):
- filter_size = [3, 3]
- output_dim = 10
- output_size = [output_dim] * 2
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units)
- init_c, init_h = cell.init_state(
- state_name, batch_size, dtype, learned_state)
-
- self.assertEqual(tf.float32, init_c.dtype)
- self.assertEqual(tf.float32, init_h.dtype)
- with self.test_session() as sess:
- init_c_res, init_h_res = sess.run([init_c, init_h])
- self.assertAllClose(np.zeros((4, 10, 10, 15)), init_c_res)
- self.assertAllClose(np.zeros((4, 10, 10, 15)), init_h_res)
-
- def test_get_init_learned_state(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = True
-
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units)
- init_c, init_h = cell.init_state(
- state_name, batch_size, dtype, learned_state)
-
- self.assertEqual(tf.float32, init_c.dtype)
- self.assertEqual(tf.float32, init_h.dtype)
- self.assertAllEqual([4, 10, 10, 15], init_c.shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], init_h.shape.as_list())
-
- def test_unroll(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- output, state = cell(inputs, state)
- self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state[1].shape.as_list())
-
- def test_prebottleneck(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
- inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- pre_bottleneck=True)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- if step % 2 == 0:
- inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
- else:
- inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
- output, state = cell(inputs, state)
- self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 15], state[1].shape.as_list())
-
- def test_flatten_state(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 15
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
- inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- pre_bottleneck=True,
- flatten_state=True)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- if step % 2 == 0:
- inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
- else:
- inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
- output, state = cell(inputs, state)
- with self.test_session() as sess:
- sess.run(tf.global_variables_initializer())
- output_result, state_result = sess.run([output, state])
- self.assertAllEqual((4, 10, 10, 15), output_result.shape)
- self.assertAllEqual((4, 10*10*15), state_result[0].shape)
- self.assertAllEqual((4, 10*10*15), state_result[1].shape)
-
-
-class GroupedConvLstmCellsTest(tf.test.TestCase):
-
- def test_run_lstm_cell(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True)
- init_state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- output, state_tuple = cell(inputs, init_state)
- self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state_tuple[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state_tuple[1].shape.as_list())
-
- def test_run_lstm_cell_with_output_bottleneck(self):
- filter_size = [3, 3]
- output_dim = 10
- output_size = [output_dim] * 2
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- inputs = tf.zeros([batch_size, output_dim, output_dim, 3], dtype=tf.float32)
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True,
- output_bottleneck=True)
- init_state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- output, state_tuple = cell(inputs, init_state)
- self.assertAllEqual([4, 10, 10, 32], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state_tuple[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state_tuple[1].shape.as_list())
-
- def test_get_init_state(self):
- filter_size = [3, 3]
- output_dim = 10
- output_size = [output_dim] * 2
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = False
-
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True)
- init_c, init_h = cell.init_state(
- state_name, batch_size, dtype, learned_state)
-
- self.assertEqual(tf.float32, init_c.dtype)
- self.assertEqual(tf.float32, init_h.dtype)
- with self.test_session() as sess:
- init_c_res, init_h_res = sess.run([init_c, init_h])
- self.assertAllClose(np.zeros((4, 10, 10, 16)), init_c_res)
- self.assertAllClose(np.zeros((4, 10, 10, 16)), init_h_res)
-
- def test_get_init_learned_state(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- learned_state = True
-
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True)
- init_c, init_h = cell.init_state(
- state_name, batch_size, dtype, learned_state)
-
- self.assertEqual(tf.float32, init_c.dtype)
- self.assertEqual(tf.float32, init_h.dtype)
- self.assertAllEqual([4, 10, 10, 16], init_c.shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], init_h.shape.as_list())
-
- def test_unroll(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- output, state = cell(inputs, state)
- self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state[1].shape.as_list())
-
- def test_prebottleneck(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
- inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True,
- pre_bottleneck=True)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- if step % 2 == 0:
- inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
- else:
- inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
- output, state = cell(inputs, state)
- self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state[0].shape.as_list())
- self.assertAllEqual([4, 10, 10, 16], state[1].shape.as_list())
-
- def test_flatten_state(self):
- filter_size = [3, 3]
- output_size = [10, 10]
- num_units = 16
- state_name = 'lstm_state'
- batch_size = 4
- dtype = tf.float32
- unroll = 10
- learned_state = False
-
- inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
- inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
- cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=filter_size,
- output_size=output_size,
- num_units=num_units,
- is_training=True,
- pre_bottleneck=True,
- flatten_state=True)
- state = cell.init_state(
- state_name, batch_size, dtype, learned_state)
- for step in range(unroll):
- if step % 2 == 0:
- inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
- else:
- inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
- output, state = cell(inputs, state)
- with self.test_session() as sess:
- sess.run(tf.global_variables_initializer())
- output_result, state_result = sess.run([output, state])
- self.assertAllEqual((4, 10, 10, 16), output_result.shape)
- self.assertAllEqual((4, 10*10*16), state_result[0].shape)
- self.assertAllEqual((4, 10*10*16), state_result[1].shape)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/lstm/rnn_decoder.py b/research/lstm_object_detection/lstm/rnn_decoder.py
deleted file mode 100644
index 185ca130396fa8687ba9359f91366b64d16d0255..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/rnn_decoder.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Custom RNN decoder."""
-
-import tensorflow.compat.v1 as tf
-import lstm_object_detection.lstm.utils as lstm_utils
-
-
-class _NoVariableScope(object):
-
- def __enter__(self):
- return
-
- def __exit__(self, exc_type, exc_value, traceback):
- return False
-
-
-def rnn_decoder(decoder_inputs,
- initial_state,
- cell,
- loop_function=None,
- scope=None):
- """RNN decoder for the LSTM-SSD model.
-
- This decoder returns a list of all states, rather than only the final state.
- Args:
- decoder_inputs: A list of 4D Tensors with shape [batch_size x input_size].
- initial_state: 2D Tensor with shape [batch_size x cell.state_size].
- cell: rnn_cell.RNNCell defining the cell function and size.
- loop_function: If not None, this function will be applied to the i-th output
- in order to generate the i+1-st input, and decoder_inputs will be ignored,
- except for the first element ("GO" symbol). This can be used for decoding,
- but also for training to emulate http://arxiv.org/abs/1506.03099.
- Signature -- loop_function(prev, i) = next
- * prev is a 2D Tensor of shape [batch_size x output_size],
- * i is an integer, the step number (when advanced control is needed),
- * next is a 2D Tensor of shape [batch_size x input_size].
- scope: optional VariableScope for the created subgraph.
- Returns:
- A tuple of the form (outputs, state), where:
- outputs: A list of the same length as decoder_inputs of 4D Tensors with
- shape [batch_size x output_size] containing generated outputs.
- states: A list of the same length as decoder_inputs of the state of each
- cell at each time-step. It is a 2D Tensor of shape
- [batch_size x cell.state_size].
- """
- with tf.variable_scope(scope) if scope else _NoVariableScope():
- state_tuple = initial_state
- outputs = []
- states = []
- prev = None
- for local_step, decoder_input in enumerate(decoder_inputs):
- if loop_function is not None and prev is not None:
- with tf.variable_scope('loop_function', reuse=True):
- decoder_input = loop_function(prev, local_step)
- output, state_tuple = cell(decoder_input, state_tuple)
- outputs.append(output)
- states.append(state_tuple)
- if loop_function is not None:
- prev = output
- return outputs, states
-
-def multi_input_rnn_decoder(decoder_inputs,
- initial_state,
- cell,
- sequence_step,
- selection_strategy='RANDOM',
- is_training=None,
- is_quantized=False,
- preprocess_fn_list=None,
- pre_bottleneck=False,
- flatten_state=False,
- scope=None):
- """RNN decoder for the Interleaved LSTM-SSD model.
-
- This decoder takes multiple sequences of inputs and selects the input to feed
- to the rnn at each timestep using its selection_strategy, which can be random,
- learned, or deterministic.
- This decoder returns a list of all states, rather than only the final state.
- Args:
- decoder_inputs: A list of lists of 2D Tensors [batch_size x input_size].
- initial_state: 2D Tensor with shape [batch_size x cell.state_size].
- cell: rnn_cell.RNNCell defining the cell function and size.
- sequence_step: Tensor [batch_size] of the step number of the first elements
- in the sequence.
- selection_strategy: Method for picking the decoder_input to use at each
- timestep. Must be 'RANDOM', 'SKIPX' for integer X, where X is the number
- of times to use the second input before using the first.
- is_training: boolean, whether the network is training. When using learned
- selection, attempts exploration if training.
- is_quantized: flag to enable/disable quantization mode.
- preprocess_fn_list: List of functions accepting two tensor arguments: one
- timestep of decoder_inputs and the lstm state. If not None,
- decoder_inputs[i] will be updated with preprocess_fn[i] at the start of
- each timestep.
- pre_bottleneck: if True, use separate bottleneck weights for each sequence.
- Useful when input sequences have differing numbers of channels. Final
- bottlenecks will have the same dimension.
- flatten_state: Whether the LSTM state is flattened.
- scope: optional VariableScope for the created subgraph.
- Returns:
- A tuple of the form (outputs, state), where:
- outputs: A list of the same length as decoder_inputs of 2D Tensors with
- shape [batch_size x output_size] containing generated outputs.
- states: A list of the same length as decoder_inputs of the state of each
- cell at each time-step. It is a 2D Tensor of shape
- [batch_size x cell.state_size].
- Raises:
- ValueError: If selection_strategy is not recognized or unexpected unroll
- length.
- """
- if flatten_state and len(decoder_inputs[0]) > 1:
- raise ValueError('In export mode, unroll length should not be more than 1')
- with tf.variable_scope(scope) if scope else _NoVariableScope():
- state_tuple = initial_state
- outputs = []
- states = []
- batch_size = decoder_inputs[0][0].shape[0].value
- num_sequences = len(decoder_inputs)
- sequence_length = len(decoder_inputs[0])
-
- for local_step in range(sequence_length):
- for sequence_index in range(num_sequences):
- if preprocess_fn_list is not None:
- decoder_inputs[sequence_index][local_step] = (
- preprocess_fn_list[sequence_index](
- decoder_inputs[sequence_index][local_step], state_tuple[0]))
- if pre_bottleneck:
- decoder_inputs[sequence_index][local_step] = cell.pre_bottleneck(
- inputs=decoder_inputs[sequence_index][local_step],
- state=state_tuple[1],
- input_index=sequence_index)
-
- action = generate_action(selection_strategy, local_step, sequence_step,
- [batch_size, 1, 1, 1])
- inputs, _ = (
- select_inputs(decoder_inputs, action, local_step, is_training,
- is_quantized))
- # Mark base network endpoints under raw_inputs/
- with tf.name_scope(None):
- inputs = tf.identity(inputs, 'raw_inputs/base_endpoint')
- output, state_tuple_out = cell(inputs, state_tuple)
- state_tuple = select_state(state_tuple, state_tuple_out, action)
-
- outputs.append(output)
- states.append(state_tuple)
- return outputs, states
-
-
-def generate_action(selection_strategy, local_step, sequence_step,
- action_shape):
- """Generate current (binary) action based on selection strategy.
-
- Args:
- selection_strategy: Method for picking the decoder_input to use at each
- timestep. Must be 'RANDOM', 'SKIPX' for integer X, where X is the number
- of times to use the second input before using the first.
- local_step: Tensor [batch_size] of the step number within the current
- unrolled batch.
- sequence_step: Tensor [batch_size] of the step number of the first elements
- in the sequence.
- action_shape: The shape of action tensor to be generated.
-
- Returns:
- A tensor of shape action_shape, each element is an individual action.
-
- Raises:
- ValueError: if selection_strategy is not supported or if 'SKIP' is not
- followed by numerics.
- """
- if selection_strategy.startswith('RANDOM'):
- action = tf.random.uniform(action_shape, maxval=2, dtype=tf.int32)
- action = tf.minimum(action, 1)
-
- # First step always runs large network.
- if local_step == 0 and sequence_step is not None:
- action *= tf.minimum(
- tf.reshape(tf.cast(sequence_step, tf.int32), action_shape), 1)
- elif selection_strategy.startswith('SKIP'):
- inter_count = int(selection_strategy[4:])
- if local_step % (inter_count + 1) == 0:
- action = tf.zeros(action_shape)
- else:
- action = tf.ones(action_shape)
- else:
- raise ValueError('Selection strategy %s not recognized' %
- selection_strategy)
- return tf.cast(action, tf.int32)
-
-
-def select_inputs(decoder_inputs, action, local_step, is_training, is_quantized,
- get_alt_inputs=False):
- """Selects sequence from decoder_inputs based on 1D actions.
-
- Given multiple input batches, creates a single output batch by
- selecting from the action[i]-ith input for the i-th batch element.
-
- Args:
- decoder_inputs: A 2-D list of tensor inputs.
- action: A tensor of shape [batch_size]. Each element corresponds to an index
- of decoder_inputs to choose.
- local_step: The current timestep.
- is_training: boolean, whether the network is training. When using learned
- selection, attempts exploration if training.
- is_quantized: flag to enable/disable quantization mode.
- get_alt_inputs: Whether the non-chosen inputs should also be returned.
-
- Returns:
- The constructed output. Also outputs the elements that were not chosen
- if get_alt_inputs is True, otherwise None.
-
- Raises:
- ValueError: if the decoder inputs contains other than two sequences.
- """
- num_seqs = len(decoder_inputs)
- if not num_seqs == 2:
- raise ValueError('Currently only supports two sets of inputs.')
- stacked_inputs = tf.stack(
- [decoder_inputs[seq_index][local_step] for seq_index in range(num_seqs)],
- axis=-1)
- action_index = tf.one_hot(action, num_seqs)
- selected_inputs = (
- lstm_utils.quantize_op(stacked_inputs * action_index, is_training,
- is_quantized, scope='quant_selected_inputs'))
- inputs = tf.reduce_sum(selected_inputs, axis=-1)
- inputs_alt = None
- # Only works for 2 models.
- if get_alt_inputs:
- # Reverse of action_index.
- action_index_alt = tf.one_hot(action, num_seqs, on_value=0.0, off_value=1.0)
- selected_inputs = (
- lstm_utils.quantize_op(stacked_inputs * action_index_alt, is_training,
- is_quantized, scope='quant_selected_inputs_alt'))
- inputs_alt = tf.reduce_sum(selected_inputs, axis=-1)
- return inputs, inputs_alt
-
-def select_state(previous_state, new_state, action):
- """Select state given action.
-
- Currently only supports binary action. If action is 0, it means the state is
- generated from the large model, and thus we will update the state. Otherwise,
- if the action is 1, it means the state is generated from the small model, and
- in interleaved model, we skip this state update.
-
- Args:
- previous_state: A state tuple representing state from previous step.
- new_state: A state tuple representing newly computed state.
- action: A tensor the same shape as state.
-
- Returns:
- A state tuple selected based on the given action.
- """
- action = tf.cast(action, tf.float32)
- state_c = previous_state[0] * action + new_state[0] * (1 - action)
- state_h = previous_state[1] * action + new_state[1] * (1 - action)
- return (state_c, state_h)
diff --git a/research/lstm_object_detection/lstm/rnn_decoder_test.py b/research/lstm_object_detection/lstm/rnn_decoder_test.py
deleted file mode 100644
index 480694f6fde57332b2f72357d5d6903ec7a12f87..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/rnn_decoder_test.py
+++ /dev/null
@@ -1,306 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for lstm_object_detection.lstm.rnn_decoder."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-from tensorflow.contrib import layers as contrib_layers
-from tensorflow.contrib import rnn as contrib_rnn
-from lstm_object_detection.lstm import rnn_decoder
-
-
-class MockRnnCell(contrib_rnn.RNNCell):
-
- def __init__(self, input_size, num_units):
- self._input_size = input_size
- self._num_units = num_units
- self._filter_size = [3, 3]
-
- def __call__(self, inputs, state_tuple):
- outputs = tf.concat([inputs, state_tuple[0]], axis=3)
- new_state_tuple = (tf.multiply(state_tuple[0], 2), state_tuple[1])
- return outputs, new_state_tuple
-
- def state_size(self):
- return self._num_units
-
- def output_size(self):
- return self._input_size + self._num_units
-
- def pre_bottleneck(self, inputs, state, input_index):
- with tf.variable_scope('bottleneck_%d' % input_index, reuse=tf.AUTO_REUSE):
- inputs = contrib_layers.separable_conv2d(
- tf.concat([inputs, state], 3),
- self._input_size,
- self._filter_size,
- depth_multiplier=1,
- activation_fn=tf.nn.relu6,
- normalizer_fn=None)
- return inputs
-
-
-class RnnDecoderTest(tf.test.TestCase):
-
- def test_rnn_decoder_single_unroll(self):
- batch_size = 2
- num_unroll = 1
- num_units = 64
- width = 8
- height = 10
- input_channels = 128
-
- initial_state = tf.random_normal((batch_size, width, height, num_units))
- inputs = tf.random_normal([batch_size, width, height, input_channels])
-
- rnn_cell = MockRnnCell(input_channels, num_units)
- outputs, states = rnn_decoder.rnn_decoder(
- decoder_inputs=[inputs] * num_unroll,
- initial_state=(initial_state, initial_state),
- cell=rnn_cell)
-
- self.assertEqual(len(outputs), num_unroll)
- self.assertEqual(len(states), num_unroll)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- results = sess.run((outputs, states, inputs, initial_state))
- outputs_results = results[0]
- states_results = results[1]
- inputs_results = results[2]
- initial_states_results = results[3]
- self.assertEqual(outputs_results[0].shape,
- (batch_size, width, height, input_channels + num_units))
- self.assertAllEqual(
- outputs_results[0],
- np.concatenate((inputs_results, initial_states_results), axis=3))
- self.assertEqual(states_results[0][0].shape,
- (batch_size, width, height, num_units))
- self.assertEqual(states_results[0][1].shape,
- (batch_size, width, height, num_units))
- self.assertAllEqual(states_results[0][0],
- np.multiply(initial_states_results, 2.0))
- self.assertAllEqual(states_results[0][1], initial_states_results)
-
- def test_rnn_decoder_multiple_unroll(self):
- batch_size = 2
- num_unroll = 3
- num_units = 64
- width = 8
- height = 10
- input_channels = 128
-
- initial_state = tf.random_normal((batch_size, width, height, num_units))
- inputs = tf.random_normal([batch_size, width, height, input_channels])
-
- rnn_cell = MockRnnCell(input_channels, num_units)
- outputs, states = rnn_decoder.rnn_decoder(
- decoder_inputs=[inputs] * num_unroll,
- initial_state=(initial_state, initial_state),
- cell=rnn_cell)
-
- self.assertEqual(len(outputs), num_unroll)
- self.assertEqual(len(states), num_unroll)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- results = sess.run((outputs, states, inputs, initial_state))
- outputs_results = results[0]
- states_results = results[1]
- inputs_results = results[2]
- initial_states_results = results[3]
- for i in range(num_unroll):
- previous_state = ([initial_states_results, initial_states_results]
- if i == 0 else states_results[i - 1])
- self.assertEqual(
- outputs_results[i].shape,
- (batch_size, width, height, input_channels + num_units))
- self.assertAllEqual(
- outputs_results[i],
- np.concatenate((inputs_results, previous_state[0]), axis=3))
- self.assertEqual(states_results[i][0].shape,
- (batch_size, width, height, num_units))
- self.assertEqual(states_results[i][1].shape,
- (batch_size, width, height, num_units))
- self.assertAllEqual(states_results[i][0],
- np.multiply(previous_state[0], 2.0))
- self.assertAllEqual(states_results[i][1], previous_state[1])
-
-
-class MultiInputRnnDecoderTest(tf.test.TestCase):
-
- def test_rnn_decoder_single_unroll(self):
- batch_size = 2
- num_unroll = 1
- num_units = 12
- width = 8
- height = 10
- input_channels_large = 24
- input_channels_small = 12
- bottleneck_channels = 20
-
- initial_state_c = tf.random_normal((batch_size, width, height, num_units))
- initial_state_h = tf.random_normal((batch_size, width, height, num_units))
- initial_state = (initial_state_c, initial_state_h)
- inputs_large = tf.random_normal(
- [batch_size, width, height, input_channels_large])
- inputs_small = tf.random_normal(
- [batch_size, width, height, input_channels_small])
-
- rnn_cell = MockRnnCell(bottleneck_channels, num_units)
- outputs, states = rnn_decoder.multi_input_rnn_decoder(
- decoder_inputs=[[inputs_large] * num_unroll,
- [inputs_small] * num_unroll],
- initial_state=initial_state,
- cell=rnn_cell,
- sequence_step=tf.zeros([batch_size]),
- pre_bottleneck=True)
-
- self.assertEqual(len(outputs), num_unroll)
- self.assertEqual(len(states), num_unroll)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- results = sess.run(
- (outputs, states, inputs_large, inputs_small, initial_state))
- outputs_results = results[0]
- states_results = results[1]
- initial_states_results = results[4]
- self.assertEqual(
- outputs_results[0].shape,
- (batch_size, width, height, bottleneck_channels + num_units))
- self.assertEqual(states_results[0][0].shape,
- (batch_size, width, height, num_units))
- self.assertEqual(states_results[0][1].shape,
- (batch_size, width, height, num_units))
- # The first step should always update state.
- self.assertAllEqual(states_results[0][0],
- np.multiply(initial_states_results[0], 2))
- self.assertAllEqual(states_results[0][1], initial_states_results[1])
-
- def test_rnn_decoder_multiple_unroll(self):
- batch_size = 2
- num_unroll = 3
- num_units = 12
- width = 8
- height = 10
- input_channels_large = 24
- input_channels_small = 12
- bottleneck_channels = 20
-
- initial_state_c = tf.random_normal((batch_size, width, height, num_units))
- initial_state_h = tf.random_normal((batch_size, width, height, num_units))
- initial_state = (initial_state_c, initial_state_h)
- inputs_large = tf.random_normal(
- [batch_size, width, height, input_channels_large])
- inputs_small = tf.random_normal(
- [batch_size, width, height, input_channels_small])
-
- rnn_cell = MockRnnCell(bottleneck_channels, num_units)
- outputs, states = rnn_decoder.multi_input_rnn_decoder(
- decoder_inputs=[[inputs_large] * num_unroll,
- [inputs_small] * num_unroll],
- initial_state=initial_state,
- cell=rnn_cell,
- sequence_step=tf.zeros([batch_size]),
- pre_bottleneck=True)
-
- self.assertEqual(len(outputs), num_unroll)
- self.assertEqual(len(states), num_unroll)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- results = sess.run(
- (outputs, states, inputs_large, inputs_small, initial_state))
- outputs_results = results[0]
- states_results = results[1]
- initial_states_results = results[4]
-
- # The first step should always update state.
- self.assertAllEqual(states_results[0][0],
- np.multiply(initial_states_results[0], 2))
- self.assertAllEqual(states_results[0][1], initial_states_results[1])
- for i in range(num_unroll):
- self.assertEqual(
- outputs_results[i].shape,
- (batch_size, width, height, bottleneck_channels + num_units))
- self.assertEqual(states_results[i][0].shape,
- (batch_size, width, height, num_units))
- self.assertEqual(states_results[i][1].shape,
- (batch_size, width, height, num_units))
-
- def test_rnn_decoder_multiple_unroll_with_skip(self):
- batch_size = 2
- num_unroll = 5
- num_units = 12
- width = 8
- height = 10
- input_channels_large = 24
- input_channels_small = 12
- bottleneck_channels = 20
- skip = 2
-
- initial_state_c = tf.random_normal((batch_size, width, height, num_units))
- initial_state_h = tf.random_normal((batch_size, width, height, num_units))
- initial_state = (initial_state_c, initial_state_h)
- inputs_large = tf.random_normal(
- [batch_size, width, height, input_channels_large])
- inputs_small = tf.random_normal(
- [batch_size, width, height, input_channels_small])
-
- rnn_cell = MockRnnCell(bottleneck_channels, num_units)
- outputs, states = rnn_decoder.multi_input_rnn_decoder(
- decoder_inputs=[[inputs_large] * num_unroll,
- [inputs_small] * num_unroll],
- initial_state=initial_state,
- cell=rnn_cell,
- sequence_step=tf.zeros([batch_size]),
- pre_bottleneck=True,
- selection_strategy='SKIP%d' % skip)
-
- self.assertEqual(len(outputs), num_unroll)
- self.assertEqual(len(states), num_unroll)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- results = sess.run(
- (outputs, states, inputs_large, inputs_small, initial_state))
- outputs_results = results[0]
- states_results = results[1]
- initial_states_results = results[4]
-
- for i in range(num_unroll):
- self.assertEqual(
- outputs_results[i].shape,
- (batch_size, width, height, bottleneck_channels + num_units))
- self.assertEqual(states_results[i][0].shape,
- (batch_size, width, height, num_units))
- self.assertEqual(states_results[i][1].shape,
- (batch_size, width, height, num_units))
-
- previous_state = (
- initial_states_results if i == 0 else states_results[i - 1])
- # State only updates during key frames
- if i % (skip + 1) == 0:
- self.assertAllEqual(states_results[i][0],
- np.multiply(previous_state[0], 2))
- self.assertAllEqual(states_results[i][1], previous_state[1])
- else:
- self.assertAllEqual(states_results[i][0], previous_state[0])
- self.assertAllEqual(states_results[i][1], previous_state[1])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/lstm/utils.py b/research/lstm_object_detection/lstm/utils.py
deleted file mode 100644
index 0c87db4bb208ece5102df327e5487fbffb2fe2ce..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/utils.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Quantization related ops for LSTM."""
-
-from __future__ import absolute_import
-from __future__ import division
-
-import tensorflow.compat.v1 as tf
-from tensorflow.contrib import framework as contrib_framework
-from tensorflow.contrib import layers as contrib_layers
-from tensorflow.python.training import moving_averages
-
-
-def _quant_var(
- name,
- initializer_val,
- vars_collection=tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
-):
- """Create an var for storing the min/max quantization range."""
- return contrib_framework.model_variable(
- name,
- shape=[],
- initializer=tf.constant_initializer(initializer_val),
- collections=[vars_collection],
- trainable=False)
-
-
-def quantizable_concat(inputs,
- axis,
- is_training,
- is_quantized=True,
- default_min=0,
- default_max=6,
- ema_decay=0.999,
- scope='quantized_concat'):
- """Concat replacement with quantization option.
-
- Allows concat inputs to share the same min max ranges,
- from experimental/gazelle/synthetic/model/tpu/utils.py.
-
- Args:
- inputs: list of tensors to concatenate.
- axis: dimension along which to concatenate.
- is_training: true if the graph is a training graph.
- is_quantized: flag to enable/disable quantization.
- default_min: default min value for fake quant op.
- default_max: default max value for fake quant op.
- ema_decay: the moving average decay for the quantization variables.
- scope: Optional scope for variable_scope.
-
- Returns:
- Tensor resulting from concatenation of input tensors
- """
- if is_quantized:
- with tf.variable_scope(scope):
- tf.logging.info('inputs: {}'.format(inputs))
- for t in inputs:
- tf.logging.info(t)
-
- min_var = _quant_var('min', default_min)
- max_var = _quant_var('max', default_max)
- if not is_training:
- # If we are building an eval graph just use the values in the variables.
- quant_inputs = [
- tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs
- ]
- tf.logging.info('min_val: {}'.format(min_var))
- tf.logging.info('max_val: {}'.format(max_var))
- else:
- concat_tensors = tf.concat(inputs, axis=axis)
- tf.logging.info('concat_tensors: {}'.format(concat_tensors))
- # TFLite requires that 0.0 is always in the [min; max] range.
- range_min = tf.minimum(
- tf.reduce_min(concat_tensors), 0.0, name='SafeQuantRangeMin')
- range_max = tf.maximum(
- tf.reduce_max(concat_tensors), 0.0, name='SafeQuantRangeMax')
- # Otherwise we need to keep track of the moving averages of the min and
- # of the elements of the input tensor max.
- min_val = moving_averages.assign_moving_average(
- min_var,
- range_min,
- ema_decay,
- name='AssignMinEma')
- max_val = moving_averages.assign_moving_average(
- max_var,
- range_max,
- ema_decay,
- name='AssignMaxEma')
- tf.logging.info('min_val: {}'.format(min_val))
- tf.logging.info('max_val: {}'.format(max_val))
- quant_inputs = [
- tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs
- ]
- tf.logging.info('quant_inputs: {}'.format(quant_inputs))
- outputs = tf.concat(quant_inputs, axis=axis)
- tf.logging.info('outputs: {}'.format(outputs))
- else:
- outputs = tf.concat(inputs, axis=axis)
- return outputs
-
-
-def quantizable_separable_conv2d(inputs,
- num_outputs,
- kernel_size,
- is_quantized=True,
- depth_multiplier=1,
- stride=1,
- activation_fn=tf.nn.relu6,
- normalizer_fn=None,
- weights_initializer=None,
- pointwise_initializer=None,
- scope=None):
- """Quantization friendly backward compatible separable conv2d.
-
- This op has the same API is separable_conv2d. The main difference is that an
- additional BiasAdd is manually inserted after the depthwise conv, such that
- the depthwise bias will not have name conflict with pointwise bias. The
- motivation of this op is that quantization script need BiasAdd in order to
- recognize the op, in which a native call to separable_conv2d do not create
- for the depthwise conv.
-
- Args:
- inputs: A tensor of size [batch_size, height, width, channels].
- num_outputs: The number of pointwise convolution output filters. If is
- None, then we skip the pointwise convolution stage.
- kernel_size: A list of length 2: [kernel_height, kernel_width] of the
- filters. Can be an int if both values are the same.
- is_quantized: flag to enable/disable quantization.
- depth_multiplier: The number of depthwise convolution output channels for
- each input channel. The total number of depthwise convolution output
- channels will be equal to num_filters_in * depth_multiplier.
- stride: A list of length 2: [stride_height, stride_width], specifying the
- depthwise convolution stride. Can be an int if both strides are the same.
- activation_fn: Activation function. The default value is a ReLU function.
- Explicitly set it to None to skip it and maintain a linear activation.
- normalizer_fn: Normalization function to use instead of biases.
- weights_initializer: An initializer for the depthwise weights.
- pointwise_initializer: An initializer for the pointwise weights.
- scope: Optional scope for variable_scope.
-
- Returns:
- Tensor resulting from concatenation of input tensors
- """
- if is_quantized:
- outputs = contrib_layers.separable_conv2d(
- inputs,
- None,
- kernel_size,
- depth_multiplier=depth_multiplier,
- stride=1,
- activation_fn=None,
- normalizer_fn=None,
- biases_initializer=None,
- weights_initializer=weights_initializer,
- pointwise_initializer=None,
- scope=scope)
- outputs = contrib_layers.bias_add(
- outputs, trainable=True, scope='%s_bias' % scope)
- outputs = contrib_layers.conv2d(
- outputs,
- num_outputs, [1, 1],
- activation_fn=activation_fn,
- stride=stride,
- normalizer_fn=normalizer_fn,
- weights_initializer=pointwise_initializer,
- scope=scope)
- else:
- outputs = contrib_layers.separable_conv2d(
- inputs,
- num_outputs,
- kernel_size,
- depth_multiplier=depth_multiplier,
- stride=stride,
- activation_fn=activation_fn,
- normalizer_fn=normalizer_fn,
- weights_initializer=weights_initializer,
- pointwise_initializer=pointwise_initializer,
- scope=scope)
- return outputs
-
-
-def quantize_op(inputs,
- is_training=True,
- is_quantized=True,
- default_min=0,
- default_max=6,
- ema_decay=0.999,
- scope='quant'):
- """Inserts a fake quantization op after inputs.
-
- Args:
- inputs: A tensor of size [batch_size, height, width, channels].
- is_training: true if the graph is a training graph.
- is_quantized: flag to enable/disable quantization.
- default_min: default min value for fake quant op.
- default_max: default max value for fake quant op.
- ema_decay: the moving average decay for the quantization variables.
- scope: Optional scope for variable_scope.
-
- Returns:
- Tensor resulting from quantizing the input tensors.
- """
- if not is_quantized:
- return inputs
-
- with tf.variable_scope(scope):
- min_var = _quant_var('min', default_min)
- max_var = _quant_var('max', default_max)
- if not is_training:
- # Just use variables in the checkpoint.
- return tf.fake_quant_with_min_max_vars(inputs, min_var, max_var)
-
- # While training, collect EMAs of ranges seen, store in min_var, max_var.
- # TFLite requires that 0.0 is always in the [min; max] range.
- range_min = tf.minimum(tf.reduce_min(inputs), 0.0, 'SafeQuantRangeMin')
- # We set the lower_bound of max_range to prevent range collapse.
- range_max = tf.maximum(tf.reduce_max(inputs), 1e-5, 'SafeQuantRangeMax')
- min_val = moving_averages.assign_moving_average(
- min_var, range_min, ema_decay, name='AssignMinEma')
- max_val = moving_averages.assign_moving_average(
- max_var, range_max, ema_decay, name='AssignMaxEma')
- return tf.fake_quant_with_min_max_vars(inputs, min_val, max_val)
-
-
-def fixed_quantize_op(inputs, is_quantized=True,
- fixed_min=0.0, fixed_max=6.0, scope='quant'):
- """Inserts a fake quantization op with fixed range after inputs.
-
- Args:
- inputs: A tensor of size [batch_size, height, width, channels].
- is_quantized: flag to enable/disable quantization.
- fixed_min: fixed min value for fake quant op.
- fixed_max: fixed max value for fake quant op.
- scope: Optional scope for variable_scope.
-
- Returns:
- Tensor resulting from quantizing the input tensors.
- """
- if not is_quantized:
- return inputs
-
- with tf.variable_scope(scope):
- # Just use fixed quantization range.
- return tf.fake_quant_with_min_max_args(inputs, fixed_min, fixed_max)
diff --git a/research/lstm_object_detection/lstm/utils_test.py b/research/lstm_object_detection/lstm/utils_test.py
deleted file mode 100644
index f5f5bc75db8f7e7be44fc15898598e5179e51236..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/lstm/utils_test.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for lstm_object_detection.lstm.utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-from lstm_object_detection.lstm import utils
-
-
-class QuantizableUtilsTest(tf.test.TestCase):
-
- def test_quantizable_concat_is_training(self):
- inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
- inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
- concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
- axis=3,
- is_training=True)
- self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
- self._check_min_max_ema(tf.get_default_graph())
- self._check_min_max_vars(tf.get_default_graph())
-
- def test_quantizable_concat_inference(self):
- inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
- inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
- concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
- axis=3,
- is_training=False)
- self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
- self._check_no_min_max_ema(tf.get_default_graph())
- self._check_min_max_vars(tf.get_default_graph())
-
- def test_quantizable_concat_not_quantized_is_training(self):
- inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
- inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
- concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
- axis=3,
- is_training=True,
- is_quantized=False)
- self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
- self._check_no_min_max_ema(tf.get_default_graph())
- self._check_no_min_max_vars(tf.get_default_graph())
-
- def test_quantizable_concat_not_quantized_inference(self):
- inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
- inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
- concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
- axis=3,
- is_training=False,
- is_quantized=False)
- self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
- self._check_no_min_max_ema(tf.get_default_graph())
- self._check_no_min_max_vars(tf.get_default_graph())
-
- def test_quantize_op_is_training(self):
- inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
- outputs = utils.quantize_op(inputs)
- self.assertAllEqual(inputs.shape.as_list(), outputs.shape.as_list())
- self._check_min_max_ema(tf.get_default_graph())
- self._check_min_max_vars(tf.get_default_graph())
-
- def test_quantize_op_inference(self):
- inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
- outputs = utils.quantize_op(inputs, is_training=False)
- self.assertAllEqual(inputs.shape.as_list(), outputs.shape.as_list())
- self._check_no_min_max_ema(tf.get_default_graph())
- self._check_min_max_vars(tf.get_default_graph())
-
- def test_fixed_quantize_op(self):
- inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
- outputs = utils.fixed_quantize_op(inputs)
- self.assertAllEqual(inputs.shape.as_list(), outputs.shape.as_list())
- self._check_no_min_max_ema(tf.get_default_graph())
- self._check_no_min_max_vars(tf.get_default_graph())
-
- def _check_min_max_vars(self, graph):
- op_types = [op.type for op in graph.get_operations()]
- self.assertTrue(
- any('FakeQuantWithMinMaxVars' in op_type for op_type in op_types))
-
- def _check_min_max_ema(self, graph):
- op_names = [op.name for op in graph.get_operations()]
- self.assertTrue(any('AssignMinEma' in name for name in op_names))
- self.assertTrue(any('AssignMaxEma' in name for name in op_names))
- self.assertTrue(any('SafeQuantRangeMin' in name for name in op_names))
- self.assertTrue(any('SafeQuantRangeMax' in name for name in op_names))
-
- def _check_no_min_max_vars(self, graph):
- op_types = [op.type for op in graph.get_operations()]
- self.assertFalse(
- any('FakeQuantWithMinMaxVars' in op_type for op_type in op_types))
-
- def _check_no_min_max_ema(self, graph):
- op_names = [op.name for op in graph.get_operations()]
- self.assertFalse(any('AssignMinEma' in name for name in op_names))
- self.assertFalse(any('AssignMaxEma' in name for name in op_names))
- self.assertFalse(any('SafeQuantRangeMin' in name for name in op_names))
- self.assertFalse(any('SafeQuantRangeMax' in name for name in op_names))
-
-
-class QuantizableSeparableConv2dTest(tf.test.TestCase):
-
- def test_quantizable_separable_conv2d(self):
- inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
- num_outputs = 64
- kernel_size = [3, 3]
- scope = 'QuantSeparable'
- outputs = utils.quantizable_separable_conv2d(
- inputs, num_outputs, kernel_size, scope=scope)
- self.assertAllEqual([4, 10, 10, num_outputs], outputs.shape.as_list())
- self._check_depthwise_bias_add(tf.get_default_graph(), scope)
-
- def test_quantizable_separable_conv2d_not_quantized(self):
- inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
- num_outputs = 64
- kernel_size = [3, 3]
- scope = 'QuantSeparable'
- outputs = utils.quantizable_separable_conv2d(
- inputs, num_outputs, kernel_size, is_quantized=False, scope=scope)
- self.assertAllEqual([4, 10, 10, num_outputs], outputs.shape.as_list())
- self._check_no_depthwise_bias_add(tf.get_default_graph(), scope)
-
- def _check_depthwise_bias_add(self, graph, scope):
- op_names = [op.name for op in graph.get_operations()]
- self.assertTrue(
- any('%s_bias/BiasAdd' % scope in name for name in op_names))
-
- def _check_no_depthwise_bias_add(self, graph, scope):
- op_names = [op.name for op in graph.get_operations()]
- self.assertFalse(
- any('%s_bias/BiasAdd' % scope in name for name in op_names))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/meta_architectures/__init__.py b/research/lstm_object_detection/meta_architectures/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch.py b/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch.py
deleted file mode 100644
index 22edc97ee348df8a4a4ce8b885a4df6a6b891072..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch.py
+++ /dev/null
@@ -1,463 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""LSTM SSD Meta-architecture definition.
-
-General tensorflow implementation of convolutional Multibox/SSD detection
-models with LSTM states, for use on video data. This implementation supports
-both regular LSTM-SSD and interleaved LSTM-SSD framework.
-
-See https://arxiv.org/abs/1711.06368 and https://arxiv.org/abs/1903.10172
-for details.
-"""
-import abc
-import re
-import tensorflow.compat.v1 as tf
-
-from object_detection.core import box_list_ops
-from object_detection.core import matcher
-from object_detection.core import standard_fields as fields
-from object_detection.meta_architectures import ssd_meta_arch
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-
-
-class LSTMSSDMetaArch(ssd_meta_arch.SSDMetaArch):
- """LSTM Meta-architecture definition."""
-
- def __init__(self,
- is_training,
- anchor_generator,
- box_predictor,
- box_coder,
- feature_extractor,
- encode_background_as_zeros,
- image_resizer_fn,
- non_max_suppression_fn,
- score_conversion_fn,
- classification_loss,
- localization_loss,
- classification_loss_weight,
- localization_loss_weight,
- normalize_loss_by_num_matches,
- hard_example_miner,
- unroll_length,
- target_assigner_instance,
- add_summaries=True):
- super(LSTMSSDMetaArch, self).__init__(
- is_training=is_training,
- anchor_generator=anchor_generator,
- box_predictor=box_predictor,
- box_coder=box_coder,
- feature_extractor=feature_extractor,
- encode_background_as_zeros=encode_background_as_zeros,
- image_resizer_fn=image_resizer_fn,
- non_max_suppression_fn=non_max_suppression_fn,
- score_conversion_fn=score_conversion_fn,
- classification_loss=classification_loss,
- localization_loss=localization_loss,
- classification_loss_weight=classification_loss_weight,
- localization_loss_weight=localization_loss_weight,
- normalize_loss_by_num_matches=normalize_loss_by_num_matches,
- hard_example_miner=hard_example_miner,
- target_assigner_instance=target_assigner_instance,
- add_summaries=add_summaries)
- self._unroll_length = unroll_length
-
- @property
- def unroll_length(self):
- return self._unroll_length
-
- @unroll_length.setter
- def unroll_length(self, unroll_length):
- self._unroll_length = unroll_length
-
- def predict(self, preprocessed_inputs, true_image_shapes, states=None,
- state_name='lstm_state', feature_scope=None):
- with tf.variable_scope(self._extract_features_scope,
- values=[preprocessed_inputs], reuse=tf.AUTO_REUSE):
- feature_maps = self._feature_extractor.extract_features(
- preprocessed_inputs, states, state_name,
- unroll_length=self._unroll_length, scope=feature_scope)
- feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
- image_shape = shape_utils.combined_static_and_dynamic_shape(
- preprocessed_inputs)
- self._batch_size = preprocessed_inputs.shape[0].value / self._unroll_length
- self._states = states
- anchors = self._anchor_generator.generate(feature_map_spatial_dims,
- im_height=image_shape[1],
- im_width=image_shape[2])
- with tf.variable_scope('MultipleGridAnchorGenerator', reuse=tf.AUTO_REUSE):
- self._anchors = box_list_ops.concatenate(anchors)
- prediction_dict = self._box_predictor.predict(
- feature_maps, self._anchor_generator.num_anchors_per_location())
- with tf.variable_scope('Loss', reuse=tf.AUTO_REUSE):
- box_encodings = tf.concat(prediction_dict['box_encodings'], axis=1)
- if box_encodings.shape.ndims == 4 and box_encodings.shape[2] == 1:
- box_encodings = tf.squeeze(box_encodings, axis=2)
- class_predictions_with_background = tf.concat(
- prediction_dict['class_predictions_with_background'], axis=1)
- predictions_dict = {
- 'preprocessed_inputs': preprocessed_inputs,
- 'box_encodings': box_encodings,
- 'class_predictions_with_background': class_predictions_with_background,
- 'feature_maps': feature_maps,
- 'anchors': self._anchors.get(),
- 'states_and_outputs': self._feature_extractor.states_and_outputs,
- }
- # In cases such as exporting the model, the states is always zero. Thus the
- # step should be ignored.
- if states is not None:
- predictions_dict['step'] = self._feature_extractor.step
- return predictions_dict
-
- def loss(self, prediction_dict, true_image_shapes, scope=None):
- """Computes scalar loss tensors with respect to provided groundtruth.
-
- Calling this function requires that groundtruth tensors have been
- provided via the provide_groundtruth function.
-
- Args:
- prediction_dict: a dictionary holding prediction tensors with
- 1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
- box_code_dimension] containing predicted boxes.
- 2) class_predictions_with_background: 3-D float tensor of shape
- [batch_size, num_anchors, num_classes+1] containing class predictions
- (logits) for each of the anchors. Note that this tensor *includes*
- background class predictions.
- true_image_shapes: int32 tensor of shape [batch, 3] where each row is
- of the form [height, width, channels] indicating the shapes
- of true images in the resized images, as resized images can be padded
- with zeros.
- scope: Optional scope name.
-
- Returns:
- a dictionary mapping loss keys (`localization_loss` and
- `classification_loss`) to scalar tensors representing corresponding loss
- values.
- """
- with tf.name_scope(scope, 'Loss', prediction_dict.values()):
- keypoints = None
- if self.groundtruth_has_field(fields.BoxListFields.keypoints):
- keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
- weights = None
- if self.groundtruth_has_field(fields.BoxListFields.weights):
- weights = self.groundtruth_lists(fields.BoxListFields.weights)
- (batch_cls_targets, batch_cls_weights, batch_reg_targets,
- batch_reg_weights, batch_match) = self._assign_targets(
- self.groundtruth_lists(fields.BoxListFields.boxes),
- self.groundtruth_lists(fields.BoxListFields.classes),
- keypoints, weights)
- match_list = [matcher.Match(match) for match in tf.unstack(batch_match)]
- if self._add_summaries:
- self._summarize_target_assignment(
- self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
- location_losses = self._localization_loss(
- prediction_dict['box_encodings'],
- batch_reg_targets,
- ignore_nan_targets=True,
- weights=batch_reg_weights)
- cls_losses = ops.reduce_sum_trailing_dimensions(
- self._classification_loss(
- prediction_dict['class_predictions_with_background'],
- batch_cls_targets,
- weights=batch_cls_weights),
- ndims=2)
-
- if self._hard_example_miner:
- (loc_loss_list, cls_loss_list) = self._apply_hard_mining(
- location_losses, cls_losses, prediction_dict, match_list)
- localization_loss = tf.reduce_sum(tf.stack(loc_loss_list))
- classification_loss = tf.reduce_sum(tf.stack(cls_loss_list))
-
- if self._add_summaries:
- self._hard_example_miner.summarize()
- else:
- if self._add_summaries:
- class_ids = tf.argmax(batch_cls_targets, axis=2)
- flattened_class_ids = tf.reshape(class_ids, [-1])
- flattened_classification_losses = tf.reshape(cls_losses, [-1])
- self._summarize_anchor_classification_loss(
- flattened_class_ids, flattened_classification_losses)
- localization_loss = tf.reduce_sum(location_losses)
- classification_loss = tf.reduce_sum(cls_losses)
-
- # Optionally normalize by number of positive matches
- normalizer = tf.constant(1.0, dtype=tf.float32)
- if self._normalize_loss_by_num_matches:
- normalizer = tf.maximum(tf.to_float(tf.reduce_sum(batch_reg_weights)),
- 1.0)
-
- with tf.name_scope('localization_loss'):
- localization_loss_normalizer = normalizer
- if self._normalize_loc_loss_by_codesize:
- localization_loss_normalizer *= self._box_coder.code_size
- localization_loss = ((self._localization_loss_weight / (
- localization_loss_normalizer)) * localization_loss)
- with tf.name_scope('classification_loss'):
- classification_loss = ((self._classification_loss_weight / normalizer) *
- classification_loss)
-
- loss_dict = {
- 'localization_loss': localization_loss,
- 'classification_loss': classification_loss
- }
- return loss_dict
-
- def restore_map(self, fine_tune_checkpoint_type='lstm'):
- """Returns a map of variables to load from a foreign checkpoint.
-
- See parent class for details.
-
- Args:
- fine_tune_checkpoint_type: the type of checkpoint to restore from, either
- SSD/LSTM detection checkpoint (with compatible variable names)
- classification checkpoint for initialization prior to training.
- Available options: `classification`, `detection`, `interleaved`,
- and `lstm`.
-
- Returns:
- A dict mapping variable names (to load from a checkpoint) to variables in
- the model graph.
- Raises:
- ValueError: if fine_tune_checkpoint_type is not among
- `classification`/`detection`/`interleaved`/`lstm`.
- """
- if fine_tune_checkpoint_type not in [
- 'classification', 'detection', 'interleaved', 'lstm',
- 'interleaved_pretrain'
- ]:
- raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format(
- fine_tune_checkpoint_type))
-
- self._restored_networks += 1
- base_network_scope = self.get_base_network_scope()
- if base_network_scope:
- scope_to_replace = '{0}_{1}'.format(base_network_scope,
- self._restored_networks)
-
- interleaved_model = False
- for variable in tf.global_variables():
- if scope_to_replace in variable.op.name:
- interleaved_model = True
- break
-
- variables_to_restore = {}
- for variable in tf.global_variables():
- var_name = variable.op.name
- if 'global_step' in var_name:
- continue
-
- # Remove FeatureExtractor prefix for classification checkpoints.
- if (fine_tune_checkpoint_type == 'classification' or
- fine_tune_checkpoint_type == 'interleaved_pretrain'):
- var_name = (
- re.split('^' + self._extract_features_scope + '/', var_name)[-1])
-
- # When loading from single frame detection checkpoints, we need to
- # remap FeatureMaps variable names.
- if ('FeatureMaps' in var_name and
- fine_tune_checkpoint_type == 'detection'):
- var_name = var_name.replace('FeatureMaps',
- self.get_base_network_scope())
-
- # Load interleaved checkpoint specifically.
- if interleaved_model: # Interleaved LSTD.
- if 'interleaved' in fine_tune_checkpoint_type:
- variables_to_restore[var_name] = variable
- else:
- # Restore non-base layers from the first checkpoint only.
- if self._restored_networks == 1:
- if base_network_scope + '_' not in var_name: # LSTM and FeatureMap
- variables_to_restore[var_name] = variable
- if scope_to_replace in var_name:
- var_name = var_name.replace(scope_to_replace, base_network_scope)
- variables_to_restore[var_name] = variable
- else:
- # Restore from the first model of interleaved checkpoints
- if 'interleaved' in fine_tune_checkpoint_type:
- var_name = var_name.replace(self.get_base_network_scope(),
- self.get_base_network_scope() + '_1', 1)
-
- variables_to_restore[var_name] = variable
-
- return variables_to_restore
-
- def get_base_network_scope(self):
- """Returns the variable scope of the base network.
-
- Returns:
- The variable scope of the feature extractor base network, e.g. MobilenetV1
- """
- return self._feature_extractor.get_base_network_scope()
-
-
-class LSTMSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
- """LSTM SSD Meta-architecture Feature Extractor definition."""
-
- __metaclass__ = abc.ABCMeta
-
- @property
- def clip_state(self):
- return self._clip_state
-
- @clip_state.setter
- def clip_state(self, clip_state):
- self._clip_state = clip_state
-
- @property
- def depth_multipliers(self):
- return self._depth_multipliers
-
- @depth_multipliers.setter
- def depth_multipliers(self, depth_multipliers):
- self._depth_multipliers = depth_multipliers
-
- @property
- def lstm_state_depth(self):
- return self._lstm_state_depth
-
- @lstm_state_depth.setter
- def lstm_state_depth(self, lstm_state_depth):
- self._lstm_state_depth = lstm_state_depth
-
- @property
- def is_quantized(self):
- return self._is_quantized
-
- @is_quantized.setter
- def is_quantized(self, is_quantized):
- self._is_quantized = is_quantized
-
- @property
- def interleaved(self):
- return False
-
- @property
- def states_and_outputs(self):
- """LSTM states and outputs.
-
- This variable includes both LSTM states {C_t} and outputs {h_t}.
-
- Returns:
- states_and_outputs: A list of 4-D float tensors, including the lstm state
- and output at each timestep.
- """
- return self._states_out
-
- @property
- def step(self):
- return self._step
-
- def preprocess(self, resized_inputs):
- """SSD preprocessing.
-
- Maps pixel values to the range [-1, 1].
-
- Args:
- resized_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
-
- Returns:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of images.
- """
- return (2.0 / 255.0) * resized_inputs - 1.0
-
- def get_base_network_scope(self):
- """Returns the variable scope of the base network.
-
- Returns:
- The variable scope of the base network, e.g. MobilenetV1
- """
- return self._base_network_scope
-
- @abc.abstractmethod
- def create_lstm_cell(self, batch_size, output_size, state_saver, state_name):
- """Create the LSTM cell, and initialize state if necessary.
-
- Args:
- batch_size: input batch size.
- output_size: output size of the lstm cell, [width, height].
- state_saver: a state saver object with methods `state` and `save_state`.
- state_name: string, the name to use with the state_saver.
- Returns:
- lstm_cell: the lstm cell unit.
- init_state: initial state representations.
- step: the step
- """
- pass
-
-
-class LSTMSSDInterleavedFeatureExtractor(LSTMSSDFeatureExtractor):
- """LSTM SSD Meta-architecture Interleaved Feature Extractor definition."""
-
- __metaclass__ = abc.ABCMeta
-
- @property
- def pre_bottleneck(self):
- return self._pre_bottleneck
-
- @pre_bottleneck.setter
- def pre_bottleneck(self, pre_bottleneck):
- self._pre_bottleneck = pre_bottleneck
-
- @property
- def low_res(self):
- return self._low_res
-
- @low_res.setter
- def low_res(self, low_res):
- self._low_res = low_res
-
- @property
- def interleaved(self):
- return True
-
- @property
- def interleave_method(self):
- return self._interleave_method
-
- @interleave_method.setter
- def interleave_method(self, interleave_method):
- self._interleave_method = interleave_method
-
- @abc.abstractmethod
- def extract_base_features_large(self, preprocessed_inputs):
- """Extract the large base model features.
-
- Args:
- preprocessed_inputs: preprocessed input images of shape:
- [batch, width, height, depth].
-
- Returns:
- net: the last feature map created from the base feature extractor.
- end_points: a dictionary of feature maps created.
- """
- pass
-
- @abc.abstractmethod
- def extract_base_features_small(self, preprocessed_inputs):
- """Extract the small base model features.
-
- Args:
- preprocessed_inputs: preprocessed input images of shape:
- [batch, width, height, depth].
-
- Returns:
- net: the last feature map created from the base feature extractor.
- end_points: a dictionary of feature maps created.
- """
- pass
diff --git a/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch_test.py b/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch_test.py
deleted file mode 100644
index 03e8a1274603806c19bc36ad09022c9b4d6ca91b..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/meta_architectures/lstm_ssd_meta_arch_test.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for meta_architectures.lstm_ssd_meta_arch."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-
-from lstm_object_detection.lstm import lstm_cells
-from lstm_object_detection.meta_architectures import lstm_ssd_meta_arch
-from object_detection.core import anchor_generator
-from object_detection.core import box_list
-from object_detection.core import losses
-from object_detection.core import post_processing
-from object_detection.core import region_similarity_calculator as sim_calc
-from object_detection.core import standard_fields as fields
-from object_detection.core import target_assigner
-from object_detection.models import feature_map_generators
-from object_detection.utils import test_case
-from object_detection.utils import test_utils
-
-
-MAX_TOTAL_NUM_BOXES = 5
-NUM_CLASSES = 1
-
-
-class FakeLSTMFeatureExtractor(
- lstm_ssd_meta_arch.LSTMSSDFeatureExtractor):
-
- def __init__(self):
- super(FakeLSTMFeatureExtractor, self).__init__(
- is_training=True,
- depth_multiplier=1.0,
- min_depth=0,
- pad_to_multiple=1,
- conv_hyperparams_fn=self.scope_fn)
- self._lstm_state_depth = 256
-
- def scope_fn(self):
- with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu6) as sc:
- return sc
-
- def create_lstm_cell(self):
- pass
-
- def extract_features(self, preprocessed_inputs, state_saver=None,
- state_name='lstm_state', unroll_length=5, scope=None):
- with tf.variable_scope('mock_model'):
- net = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
- kernel_size=1, scope='layer1')
- image_features = {'last_layer': net}
-
- self._states_out = {}
- feature_map_layout = {
- 'from_layer': ['last_layer'],
- 'layer_depth': [-1],
- 'use_explicit_padding': self._use_explicit_padding,
- 'use_depthwise': self._use_depthwise,
- }
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=(self._depth_multiplier),
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
- return list(feature_maps.values())
-
-
-class FakeLSTMInterleavedFeatureExtractor(
- lstm_ssd_meta_arch.LSTMSSDInterleavedFeatureExtractor):
-
- def __init__(self):
- super(FakeLSTMInterleavedFeatureExtractor, self).__init__(
- is_training=True,
- depth_multiplier=1.0,
- min_depth=0,
- pad_to_multiple=1,
- conv_hyperparams_fn=self.scope_fn)
- self._lstm_state_depth = 256
-
- def scope_fn(self):
- with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu6) as sc:
- return sc
-
- def create_lstm_cell(self):
- pass
-
- def extract_base_features_large(self, preprocessed_inputs):
- with tf.variable_scope('base_large'):
- net = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
- kernel_size=1, scope='layer1')
- return net
-
- def extract_base_features_small(self, preprocessed_inputs):
- with tf.variable_scope('base_small'):
- net = slim.conv2d(inputs=preprocessed_inputs, num_outputs=32,
- kernel_size=1, scope='layer1')
- return net
-
- def extract_features(self, preprocessed_inputs, state_saver=None,
- state_name='lstm_state', unroll_length=5, scope=None):
- with tf.variable_scope('mock_model'):
- net_large = self.extract_base_features_large(preprocessed_inputs)
- net_small = self.extract_base_features_small(preprocessed_inputs)
- net = slim.conv2d(
- inputs=tf.concat([net_large, net_small], axis=3),
- num_outputs=32,
- kernel_size=1,
- scope='layer1')
- image_features = {'last_layer': net}
-
- self._states_out = {}
- feature_map_layout = {
- 'from_layer': ['last_layer'],
- 'layer_depth': [-1],
- 'use_explicit_padding': self._use_explicit_padding,
- 'use_depthwise': self._use_depthwise,
- }
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=feature_map_layout,
- depth_multiplier=(self._depth_multiplier),
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
- return list(feature_maps.values())
-
-
-class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
- """Sets up a simple 2x2 anchor grid on the unit square."""
-
- def name_scope(self):
- return 'MockAnchorGenerator'
-
- def num_anchors_per_location(self):
- return [1]
-
- def _generate(self, feature_map_shape_list, im_height, im_width):
- return [box_list.BoxList(
- tf.constant([[0, 0, .5, .5],
- [0, .5, .5, 1],
- [.5, 0, 1, .5],
- [1., 1., 1.5, 1.5] # Anchor that is outside clip_window.
- ], tf.float32))]
-
- def num_anchors(self):
- return 4
-
-
-class LSTMSSDMetaArchTest(test_case.TestCase):
-
- def _create_model(self,
- interleaved=False,
- apply_hard_mining=True,
- normalize_loc_loss_by_codesize=False,
- add_background_class=True,
- random_example_sampling=False,
- use_expected_classification_loss_under_sampling=False,
- min_num_negative_samples=1,
- desired_negative_sampling_ratio=3,
- unroll_length=1):
- num_classes = NUM_CLASSES
- is_training = False
- mock_anchor_generator = MockAnchorGenerator2x2()
- mock_box_predictor = test_utils.MockBoxPredictor(is_training, num_classes)
- mock_box_coder = test_utils.MockBoxCoder()
- if interleaved:
- fake_feature_extractor = FakeLSTMInterleavedFeatureExtractor()
- else:
- fake_feature_extractor = FakeLSTMFeatureExtractor()
- mock_matcher = test_utils.MockMatcher()
- region_similarity_calculator = sim_calc.IouSimilarity()
- encode_background_as_zeros = False
- def image_resizer_fn(image):
- return [tf.identity(image), tf.shape(image)]
-
- classification_loss = losses.WeightedSigmoidClassificationLoss()
- localization_loss = losses.WeightedSmoothL1LocalizationLoss()
- non_max_suppression_fn = functools.partial(
- post_processing.batch_multiclass_non_max_suppression,
- score_thresh=-20.0,
- iou_thresh=1.0,
- max_size_per_class=5,
- max_total_size=MAX_TOTAL_NUM_BOXES)
- classification_loss_weight = 1.0
- localization_loss_weight = 1.0
- negative_class_weight = 1.0
- normalize_loss_by_num_matches = False
-
- hard_example_miner = None
- if apply_hard_mining:
- # This hard example miner is expected to be a no-op.
- hard_example_miner = losses.HardExampleMiner(
- num_hard_examples=None,
- iou_threshold=1.0)
-
- target_assigner_instance = target_assigner.TargetAssigner(
- region_similarity_calculator,
- mock_matcher,
- mock_box_coder,
- negative_class_weight=negative_class_weight)
-
- code_size = 4
- model = lstm_ssd_meta_arch.LSTMSSDMetaArch(
- is_training=is_training,
- anchor_generator=mock_anchor_generator,
- box_predictor=mock_box_predictor,
- box_coder=mock_box_coder,
- feature_extractor=fake_feature_extractor,
- encode_background_as_zeros=encode_background_as_zeros,
- image_resizer_fn=image_resizer_fn,
- non_max_suppression_fn=non_max_suppression_fn,
- score_conversion_fn=tf.identity,
- classification_loss=classification_loss,
- localization_loss=localization_loss,
- classification_loss_weight=classification_loss_weight,
- localization_loss_weight=localization_loss_weight,
- normalize_loss_by_num_matches=normalize_loss_by_num_matches,
- hard_example_miner=hard_example_miner,
- unroll_length=unroll_length,
- target_assigner_instance=target_assigner_instance,
- add_summaries=False)
- return model, num_classes, mock_anchor_generator.num_anchors(), code_size
-
- def _get_value_for_matching_key(self, dictionary, suffix):
- for key in dictionary.keys():
- if key.endswith(suffix):
- return dictionary[key]
- raise ValueError('key not found {}'.format(suffix))
-
- def test_predict_returns_correct_items_and_sizes(self):
- batch_size = 3
- height = width = 2
- num_unroll = 1
-
- graph = tf.Graph()
- with graph.as_default():
- model, num_classes, num_anchors, code_size = self._create_model()
- preprocessed_images = tf.random_uniform(
- [batch_size * num_unroll, height, width, 3],
- minval=-1.,
- maxval=1.)
- true_image_shapes = tf.tile(
- [[height, width, 3]], [batch_size, 1])
- prediction_dict = model.predict(preprocessed_images, true_image_shapes)
-
-
- self.assertIn('preprocessed_inputs', prediction_dict)
- self.assertIn('box_encodings', prediction_dict)
- self.assertIn('class_predictions_with_background', prediction_dict)
- self.assertIn('feature_maps', prediction_dict)
- self.assertIn('anchors', prediction_dict)
- self.assertAllEqual(
- [batch_size * num_unroll, height, width, 3],
- prediction_dict['preprocessed_inputs'].shape.as_list())
- self.assertAllEqual(
- [batch_size * num_unroll, num_anchors, code_size],
- prediction_dict['box_encodings'].shape.as_list())
- self.assertAllEqual(
- [batch_size * num_unroll, num_anchors, num_classes + 1],
- prediction_dict['class_predictions_with_background'].shape.as_list())
- self.assertAllEqual(
- [num_anchors, code_size],
- prediction_dict['anchors'].shape.as_list())
-
- def test_interleaved_predict_returns_correct_items_and_sizes(self):
- batch_size = 3
- height = width = 2
- num_unroll = 1
-
- graph = tf.Graph()
- with graph.as_default():
- model, num_classes, num_anchors, code_size = self._create_model(
- interleaved=True)
- preprocessed_images = tf.random_uniform(
- [batch_size * num_unroll, height, width, 3],
- minval=-1.,
- maxval=1.)
- true_image_shapes = tf.tile(
- [[height, width, 3]], [batch_size, 1])
- prediction_dict = model.predict(preprocessed_images, true_image_shapes)
-
- self.assertIn('preprocessed_inputs', prediction_dict)
- self.assertIn('box_encodings', prediction_dict)
- self.assertIn('class_predictions_with_background', prediction_dict)
- self.assertIn('feature_maps', prediction_dict)
- self.assertIn('anchors', prediction_dict)
- self.assertAllEqual(
- [batch_size * num_unroll, height, width, 3],
- prediction_dict['preprocessed_inputs'].shape.as_list())
- self.assertAllEqual(
- [batch_size * num_unroll, num_anchors, code_size],
- prediction_dict['box_encodings'].shape.as_list())
- self.assertAllEqual(
- [batch_size * num_unroll, num_anchors, num_classes + 1],
- prediction_dict['class_predictions_with_background'].shape.as_list())
- self.assertAllEqual(
- [num_anchors, code_size],
- prediction_dict['anchors'].shape.as_list())
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/metrics/__init__.py b/research/lstm_object_detection/metrics/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/metrics/coco_evaluation_all_frames.py b/research/lstm_object_detection/metrics/coco_evaluation_all_frames.py
deleted file mode 100644
index 8e6d336cbf71ecfdf5f438b6f74e078db1a6fb17..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/metrics/coco_evaluation_all_frames.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Class for evaluating video object detections with COCO metrics."""
-
-import tensorflow.compat.v1 as tf
-
-from object_detection.core import standard_fields
-from object_detection.metrics import coco_evaluation
-from object_detection.metrics import coco_tools
-
-
-class CocoEvaluationAllFrames(coco_evaluation.CocoDetectionEvaluator):
- """Class to evaluate COCO detection metrics for frame sequences.
-
- The class overrides two functions: add_single_ground_truth_image_info and
- add_single_detected_image_info.
-
- For the evaluation of sequence video detection, by iterating through the
- entire groundtruth_dict, all the frames in the unrolled frames in one LSTM
- training sample are considered. Therefore, both groundtruth and detection
- results of all frames are added for the evaluation. This is used when all the
- frames are labeled in the video object detection training job.
- """
-
- def add_single_ground_truth_image_info(self, image_id, groundtruth_dict):
- """Add groundtruth results of all frames to the eval pipeline.
-
- This method overrides the function defined in the base class.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- groundtruth_dict: A list of dictionary containing -
- InputDataFields.groundtruth_boxes: float32 numpy array of shape
- [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format
- [ymin, xmin, ymax, xmax] in absolute image coordinates.
- InputDataFields.groundtruth_classes: integer numpy array of shape
- [num_boxes] containing 1-indexed groundtruth classes for the boxes.
- InputDataFields.groundtruth_is_crowd (optional): integer numpy array of
- shape [num_boxes] containing iscrowd flag for groundtruth boxes.
- """
- for idx, gt in enumerate(groundtruth_dict):
- if not gt:
- continue
-
- image_frame_id = '{}_{}'.format(image_id, idx)
- if image_frame_id in self._image_ids:
- tf.logging.warning(
- 'Ignoring ground truth with image id %s since it was '
- 'previously added', image_frame_id)
- continue
-
- self._groundtruth_list.extend(
- coco_tools.ExportSingleImageGroundtruthToCoco(
- image_id=image_frame_id,
- next_annotation_id=self._annotation_id,
- category_id_set=self._category_id_set,
- groundtruth_boxes=gt[
- standard_fields.InputDataFields.groundtruth_boxes],
- groundtruth_classes=gt[
- standard_fields.InputDataFields.groundtruth_classes]))
- self._annotation_id += (
- gt[standard_fields.InputDataFields.groundtruth_boxes].shape[0])
-
- # Boolean to indicate whether a detection has been added for this image.
- self._image_ids[image_frame_id] = False
-
- def add_single_detected_image_info(self, image_id, detections_dict):
- """Add detection results of all frames to the eval pipeline.
-
- This method overrides the function defined in the base class.
-
- Args:
- image_id: A unique string/integer identifier for the image.
- detections_dict: A list of dictionary containing -
- DetectionResultFields.detection_boxes: float32 numpy array of shape
- [num_boxes, 4] containing `num_boxes` detection boxes of the format
- [ymin, xmin, ymax, xmax] in absolute image coordinates.
- DetectionResultFields.detection_scores: float32 numpy array of shape
- [num_boxes] containing detection scores for the boxes.
- DetectionResultFields.detection_classes: integer numpy array of shape
- [num_boxes] containing 1-indexed detection classes for the boxes.
-
- Raises:
- ValueError: If groundtruth for the image_id is not available.
- """
- for idx, det in enumerate(detections_dict):
- if not det:
- continue
-
- image_frame_id = '{}_{}'.format(image_id, idx)
- if image_frame_id not in self._image_ids:
- raise ValueError(
- 'Missing groundtruth for image-frame id: {}'.format(image_frame_id))
-
- if self._image_ids[image_frame_id]:
- tf.logging.warning(
- 'Ignoring detection with image id %s since it was '
- 'previously added', image_frame_id)
- continue
-
- self._detection_boxes_list.extend(
- coco_tools.ExportSingleImageDetectionBoxesToCoco(
- image_id=image_frame_id,
- category_id_set=self._category_id_set,
- detection_boxes=det[
- standard_fields.DetectionResultFields.detection_boxes],
- detection_scores=det[
- standard_fields.DetectionResultFields.detection_scores],
- detection_classes=det[
- standard_fields.DetectionResultFields.detection_classes]))
- self._image_ids[image_frame_id] = True
diff --git a/research/lstm_object_detection/metrics/coco_evaluation_all_frames_test.py b/research/lstm_object_detection/metrics/coco_evaluation_all_frames_test.py
deleted file mode 100644
index 9c1e7b7546b037d974bde9e3dadef94d7535235b..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/metrics/coco_evaluation_all_frames_test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for video_object_detection.metrics.coco_video_evaluation."""
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-from lstm_object_detection.metrics import coco_evaluation_all_frames
-from object_detection.core import standard_fields
-
-
-class CocoEvaluationAllFramesTest(tf.test.TestCase):
-
- def testGroundtruthAndDetectionsDisagreeOnAllFrames(self):
- """Tests that mAP is calculated on several different frame results."""
- category_list = [{'id': 0, 'name': 'dog'}, {'id': 1, 'name': 'cat'}]
- video_evaluator = coco_evaluation_all_frames.CocoEvaluationAllFrames(
- category_list)
- video_evaluator.add_single_ground_truth_image_info(
- image_id='image1',
- groundtruth_dict=[{
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[50., 50., 200., 200.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }, {
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[50., 50., 100., 100.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }])
- video_evaluator.add_single_detected_image_info(
- image_id='image1',
- # A different groundtruth box on the frame other than the last one.
- detections_dict=[{
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[100., 100., 200., 200.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }, {
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[50., 50., 100., 100.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }])
-
- metrics = video_evaluator.evaluate()
- self.assertNotEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
-
- def testGroundtruthAndDetections(self):
- """Tests that mAP is calculated correctly on GT and Detections."""
- category_list = [{'id': 0, 'name': 'dog'}, {'id': 1, 'name': 'cat'}]
- video_evaluator = coco_evaluation_all_frames.CocoEvaluationAllFrames(
- category_list)
- video_evaluator.add_single_ground_truth_image_info(
- image_id='image1',
- groundtruth_dict=[{
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[100., 100., 200., 200.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }])
- video_evaluator.add_single_ground_truth_image_info(
- image_id='image2',
- groundtruth_dict=[{
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[50., 50., 100., 100.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }])
- video_evaluator.add_single_ground_truth_image_info(
- image_id='image3',
- groundtruth_dict=[{
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[50., 100., 100., 120.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }])
- video_evaluator.add_single_detected_image_info(
- image_id='image1',
- detections_dict=[{
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[100., 100., 200., 200.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }])
- video_evaluator.add_single_detected_image_info(
- image_id='image2',
- detections_dict=[{
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[50., 50., 100., 100.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }])
- video_evaluator.add_single_detected_image_info(
- image_id='image3',
- detections_dict=[{
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[50., 100., 100., 120.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }])
- metrics = video_evaluator.evaluate()
- self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0)
-
- def testMissingDetectionResults(self):
- """Tests if groundtrue is missing, raises ValueError."""
- category_list = [{'id': 0, 'name': 'dog'}]
- video_evaluator = coco_evaluation_all_frames.CocoEvaluationAllFrames(
- category_list)
- video_evaluator.add_single_ground_truth_image_info(
- image_id='image1',
- groundtruth_dict=[{
- standard_fields.InputDataFields.groundtruth_boxes:
- np.array([[100., 100., 200., 200.]]),
- standard_fields.InputDataFields.groundtruth_classes:
- np.array([1])
- }])
- with self.assertRaisesRegexp(ValueError,
- r'Missing groundtruth for image-frame id:.*'):
- video_evaluator.add_single_detected_image_info(
- image_id='image3',
- detections_dict=[{
- standard_fields.DetectionResultFields.detection_boxes:
- np.array([[100., 100., 200., 200.]]),
- standard_fields.DetectionResultFields.detection_scores:
- np.array([.8]),
- standard_fields.DetectionResultFields.detection_classes:
- np.array([1])
- }])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/model_builder.py b/research/lstm_object_detection/model_builder.py
deleted file mode 100644
index d622558cf75f6664f9a1b075e3ed690caf457f68..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/model_builder.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""A function to build a DetectionModel from configuration."""
-from lstm_object_detection.meta_architectures import lstm_ssd_meta_arch
-from lstm_object_detection.models import lstm_ssd_interleaved_mobilenet_v2_feature_extractor
-from lstm_object_detection.models import lstm_ssd_mobilenet_v1_feature_extractor
-from object_detection.builders import anchor_generator_builder
-from object_detection.builders import box_coder_builder
-from object_detection.builders import box_predictor_builder
-from object_detection.builders import hyperparams_builder
-from object_detection.builders import image_resizer_builder
-from object_detection.builders import losses_builder
-from object_detection.builders import matcher_builder
-from object_detection.builders import model_builder
-from object_detection.builders import post_processing_builder
-from object_detection.builders import region_similarity_calculator_builder as sim_calc
-from object_detection.core import target_assigner
-
-model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP.update({
- 'lstm_ssd_mobilenet_v1':
- lstm_ssd_mobilenet_v1_feature_extractor
- .LSTMSSDMobileNetV1FeatureExtractor,
- 'lstm_ssd_interleaved_mobilenet_v2':
- lstm_ssd_interleaved_mobilenet_v2_feature_extractor
- .LSTMSSDInterleavedMobilenetV2FeatureExtractor,
-})
-SSD_FEATURE_EXTRACTOR_CLASS_MAP = model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP
-
-
-def build(model_config, lstm_config, is_training):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- lstm_config: LstmModel config proto that specifies LSTM train/eval configs.
- is_training: True if this model is being built for training purposes.
-
- Returns:
- DetectionModel based on the config.
-
- Raises:
- ValueError: On invalid meta architecture or model.
- """
- return _build_lstm_model(model_config.ssd, lstm_config, is_training)
-
-
-def _build_lstm_feature_extractor(feature_extractor_config,
- is_training,
- lstm_config,
- reuse_weights=None):
- """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Args:
- feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
- is_training: True if this feature extractor is being built for training.
- lstm_config: LSTM-SSD specific configs.
- reuse_weights: If the feature extractor should reuse weights.
-
- Returns:
- ssd_meta_arch.SSDFeatureExtractor based on config.
-
- Raises:
- ValueError: On invalid feature extractor type.
- """
-
- feature_type = feature_extractor_config.type
- depth_multiplier = feature_extractor_config.depth_multiplier
- min_depth = feature_extractor_config.min_depth
- pad_to_multiple = feature_extractor_config.pad_to_multiple
- use_explicit_padding = feature_extractor_config.use_explicit_padding
- use_depthwise = feature_extractor_config.use_depthwise
- conv_hyperparams = hyperparams_builder.build(
- feature_extractor_config.conv_hyperparams, is_training)
- override_base_feature_extractor_hyperparams = (
- feature_extractor_config.override_base_feature_extractor_hyperparams)
-
- if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
- raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
-
- feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
- feature_extractor = feature_extractor_class(
- is_training, depth_multiplier, min_depth, pad_to_multiple,
- conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
- override_base_feature_extractor_hyperparams)
-
- # Extra configs for LSTM-SSD.
- feature_extractor.lstm_state_depth = lstm_config.lstm_state_depth
- feature_extractor.flatten_state = lstm_config.flatten_state
- feature_extractor.clip_state = lstm_config.clip_state
- feature_extractor.scale_state = lstm_config.scale_state
- feature_extractor.is_quantized = lstm_config.is_quantized
- feature_extractor.low_res = lstm_config.low_res
- # Extra configs for interleaved LSTM-SSD.
- if 'interleaved' in feature_extractor_config.type:
- feature_extractor.pre_bottleneck = lstm_config.pre_bottleneck
- feature_extractor.depth_multipliers = lstm_config.depth_multipliers
- if is_training:
- feature_extractor.interleave_method = lstm_config.train_interleave_method
- else:
- feature_extractor.interleave_method = lstm_config.eval_interleave_method
- return feature_extractor
-
-
-def _build_lstm_model(ssd_config, lstm_config, is_training):
- """Builds an LSTM detection model based on the model config.
-
- Args:
- ssd_config: A ssd.proto object containing the config for the desired
- LSTMSSDMetaArch.
- lstm_config: LstmModel config proto that specifies LSTM train/eval configs.
- is_training: True if this model is being built for training purposes.
-
- Returns:
- LSTMSSDMetaArch based on the config.
- Raises:
- ValueError: If ssd_config.type is not recognized (i.e. not registered in
- model_class_map), or if lstm_config.interleave_strategy is not recognized.
- ValueError: If unroll_length is not specified in the config file.
- """
- feature_extractor = _build_lstm_feature_extractor(
- ssd_config.feature_extractor, is_training, lstm_config)
-
- box_coder = box_coder_builder.build(ssd_config.box_coder)
- matcher = matcher_builder.build(ssd_config.matcher)
- region_similarity_calculator = sim_calc.build(
- ssd_config.similarity_calculator)
-
- num_classes = ssd_config.num_classes
- ssd_box_predictor = box_predictor_builder.build(hyperparams_builder.build,
- ssd_config.box_predictor,
- is_training, num_classes)
- anchor_generator = anchor_generator_builder.build(ssd_config.anchor_generator)
- image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
- non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
- ssd_config.post_processing)
- (classification_loss, localization_loss, classification_weight,
- localization_weight, miner, _, _) = losses_builder.build(ssd_config.loss)
-
- normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
- encode_background_as_zeros = ssd_config.encode_background_as_zeros
- negative_class_weight = ssd_config.negative_class_weight
-
- # Extra configs for lstm unroll length.
- unroll_length = None
- if 'lstm' in ssd_config.feature_extractor.type:
- if is_training:
- unroll_length = lstm_config.train_unroll_length
- else:
- unroll_length = lstm_config.eval_unroll_length
- if unroll_length is None:
- raise ValueError('No unroll length found in the config file')
-
- target_assigner_instance = target_assigner.TargetAssigner(
- region_similarity_calculator,
- matcher,
- box_coder,
- negative_class_weight=negative_class_weight)
-
- lstm_model = lstm_ssd_meta_arch.LSTMSSDMetaArch(
- is_training=is_training,
- anchor_generator=anchor_generator,
- box_predictor=ssd_box_predictor,
- box_coder=box_coder,
- feature_extractor=feature_extractor,
- encode_background_as_zeros=encode_background_as_zeros,
- image_resizer_fn=image_resizer_fn,
- non_max_suppression_fn=non_max_suppression_fn,
- score_conversion_fn=score_conversion_fn,
- classification_loss=classification_loss,
- localization_loss=localization_loss,
- classification_loss_weight=classification_weight,
- localization_loss_weight=localization_weight,
- normalize_loss_by_num_matches=normalize_loss_by_num_matches,
- hard_example_miner=miner,
- unroll_length=unroll_length,
- target_assigner_instance=target_assigner_instance)
-
- return lstm_model
diff --git a/research/lstm_object_detection/model_builder_test.py b/research/lstm_object_detection/model_builder_test.py
deleted file mode 100644
index 9d64b537cdc4044d5302845c53a1a3e4ac700f39..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/model_builder_test.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for lstm_object_detection.tensorflow.model_builder."""
-
-import tensorflow.compat.v1 as tf
-from google.protobuf import text_format
-from lstm_object_detection import model_builder
-from lstm_object_detection.meta_architectures import lstm_ssd_meta_arch
-from lstm_object_detection.protos import pipeline_pb2 as internal_pipeline_pb2
-from object_detection.protos import pipeline_pb2
-
-
-class ModelBuilderTest(tf.test.TestCase):
-
- def create_train_model(self, model_config, lstm_config):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- lstm_config: LstmModel config proto that specifies LSTM train/eval
- configs.
-
- Returns:
- DetectionModel based on the config.
- """
- return model_builder.build(model_config, lstm_config, is_training=True)
-
- def create_eval_model(self, model_config, lstm_config):
- """Builds a DetectionModel based on the model config.
-
- Args:
- model_config: A model.proto object containing the config for the desired
- DetectionModel.
- lstm_config: LstmModel config proto that specifies LSTM train/eval
- configs.
-
- Returns:
- DetectionModel based on the config.
- """
- return model_builder.build(model_config, lstm_config, is_training=False)
-
- def get_model_configs_from_proto(self):
- """Creates a model text proto for testing.
-
- Returns:
- A dictionary of model configs.
- """
-
- model_text_proto = """
- [lstm_object_detection.protos.lstm_model] {
- train_unroll_length: 4
- eval_unroll_length: 4
- }
- model {
- ssd {
- feature_extractor {
- type: 'lstm_ssd_mobilenet_v1'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- negative_class_weight: 2.0
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }
- }"""
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- text_format.Merge(model_text_proto, pipeline_config)
-
- configs = {}
- configs['model'] = pipeline_config.model
- configs['lstm_model'] = pipeline_config.Extensions[
- internal_pipeline_pb2.lstm_model]
-
- return configs
-
- def get_interleaved_model_configs_from_proto(self):
- """Creates an interleaved model text proto for testing.
-
- Returns:
- A dictionary of model configs.
- """
-
- model_text_proto = """
- [lstm_object_detection.protos.lstm_model] {
- train_unroll_length: 4
- eval_unroll_length: 10
- lstm_state_depth: 320
- depth_multipliers: 1.4
- depth_multipliers: 0.35
- pre_bottleneck: true
- low_res: true
- train_interleave_method: 'RANDOM_SKIP_SMALL'
- eval_interleave_method: 'SKIP3'
- }
- model {
- ssd {
- feature_extractor {
- type: 'lstm_ssd_interleaved_mobilenet_v2'
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- negative_class_weight: 2.0
- box_coder {
- faster_rcnn_box_coder {
- }
- }
- matcher {
- argmax_matcher {
- }
- }
- similarity_calculator {
- iou_similarity {
- }
- }
- anchor_generator {
- ssd_anchor_generator {
- aspect_ratios: 1.0
- }
- }
- image_resizer {
- fixed_shape_resizer {
- height: 320
- width: 320
- }
- }
- box_predictor {
- convolutional_box_predictor {
- conv_hyperparams {
- regularizer {
- l2_regularizer {
- }
- }
- initializer {
- truncated_normal_initializer {
- }
- }
- }
- }
- }
- normalize_loc_loss_by_codesize: true
- loss {
- classification_loss {
- weighted_softmax {
- }
- }
- localization_loss {
- weighted_smooth_l1 {
- }
- }
- }
- }
- }"""
-
- pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
- text_format.Merge(model_text_proto, pipeline_config)
-
- configs = {}
- configs['model'] = pipeline_config.model
- configs['lstm_model'] = pipeline_config.Extensions[
- internal_pipeline_pb2.lstm_model]
-
- return configs
-
- def test_model_creation_from_valid_configs(self):
- configs = self.get_model_configs_from_proto()
- # Test model properties.
- self.assertEqual(configs['model'].ssd.negative_class_weight, 2.0)
- self.assertTrue(configs['model'].ssd.normalize_loc_loss_by_codesize)
- self.assertEqual(configs['model'].ssd.feature_extractor.type,
- 'lstm_ssd_mobilenet_v1')
-
- model = self.create_train_model(configs['model'], configs['lstm_model'])
- # Test architechture type.
- self.assertIsInstance(model, lstm_ssd_meta_arch.LSTMSSDMetaArch)
- # Test LSTM unroll length.
- self.assertEqual(model.unroll_length, 4)
-
- model = self.create_eval_model(configs['model'], configs['lstm_model'])
- # Test architechture type.
- self.assertIsInstance(model, lstm_ssd_meta_arch.LSTMSSDMetaArch)
- # Test LSTM configs.
- self.assertEqual(model.unroll_length, 4)
-
- def test_interleaved_model_creation_from_valid_configs(self):
- configs = self.get_interleaved_model_configs_from_proto()
- # Test model properties.
- self.assertEqual(configs['model'].ssd.negative_class_weight, 2.0)
- self.assertTrue(configs['model'].ssd.normalize_loc_loss_by_codesize)
- self.assertEqual(configs['model'].ssd.feature_extractor.type,
- 'lstm_ssd_interleaved_mobilenet_v2')
-
- model = self.create_train_model(configs['model'], configs['lstm_model'])
- # Test architechture type.
- self.assertIsInstance(model, lstm_ssd_meta_arch.LSTMSSDMetaArch)
- # Test LSTM configs.
- self.assertEqual(model.unroll_length, 4)
- self.assertEqual(model._feature_extractor.lstm_state_depth, 320)
- self.assertAllClose(model._feature_extractor.depth_multipliers, (1.4, 0.35))
- self.assertTrue(model._feature_extractor.pre_bottleneck)
- self.assertTrue(model._feature_extractor.low_res)
- self.assertEqual(model._feature_extractor.interleave_method,
- 'RANDOM_SKIP_SMALL')
-
- model = self.create_eval_model(configs['model'], configs['lstm_model'])
- # Test architechture type.
- self.assertIsInstance(model, lstm_ssd_meta_arch.LSTMSSDMetaArch)
- # Test LSTM configs.
- self.assertEqual(model.unroll_length, 10)
- self.assertEqual(model._feature_extractor.lstm_state_depth, 320)
- self.assertAllClose(model._feature_extractor.depth_multipliers, (1.4, 0.35))
- self.assertTrue(model._feature_extractor.pre_bottleneck)
- self.assertTrue(model._feature_extractor.low_res)
- self.assertEqual(model._feature_extractor.interleave_method, 'SKIP3')
-
- def test_model_creation_from_invalid_configs(self):
- configs = self.get_model_configs_from_proto()
- # Test model build failure with wrong input configs.
- with self.assertRaises(AttributeError):
- _ = self.create_train_model(configs['model'], configs['model'])
- with self.assertRaises(AttributeError):
- _ = self.create_eval_model(configs['model'], configs['model'])
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/models/__init__.py b/research/lstm_object_detection/models/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor.py b/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor.py
deleted file mode 100644
index 5a2d4bd0bdceb39801b46b864f512273ae10f8bc..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""LSTDInterleavedFeatureExtractor which interleaves multiple MobileNet V2."""
-
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-
-from tensorflow.python.framework import ops as tf_ops
-from lstm_object_detection.lstm import lstm_cells
-from lstm_object_detection.lstm import rnn_decoder
-from lstm_object_detection.meta_architectures import lstm_ssd_meta_arch
-from lstm_object_detection.models import mobilenet_defs
-from object_detection.models import feature_map_generators
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-from nets.mobilenet import mobilenet
-from nets.mobilenet import mobilenet_v2
-
-
-class LSTMSSDInterleavedMobilenetV2FeatureExtractor(
- lstm_ssd_meta_arch.LSTMSSDInterleavedFeatureExtractor):
- """LSTM-SSD Interleaved Feature Extractor using MobilenetV2 features."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams_fn,
- reuse_weights=None,
- use_explicit_padding=False,
- use_depthwise=True,
- override_base_feature_extractor_hyperparams=False):
- """Interleaved Feature Extractor for LSTD Models with MobileNet v2.
-
- Args:
- is_training: whether the network is in training mode.
- depth_multiplier: float depth multiplier for feature extractor.
- min_depth: minimum feature extractor depth.
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
- and separable_conv2d ops in the layers that are added on top of the
- base feature extractor.
- reuse_weights: Whether to reuse variables. Default is None.
- use_explicit_padding: Whether to use explicit padding when extracting
- features. Default is False.
- use_depthwise: Whether to use depthwise convolutions. Default is True.
- override_base_feature_extractor_hyperparams: Whether to override
- hyperparameters of the base feature extractor with the one from
- `conv_hyperparams_fn`.
- """
- super(LSTMSSDInterleavedMobilenetV2FeatureExtractor, self).__init__(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams_fn=conv_hyperparams_fn,
- reuse_weights=reuse_weights,
- use_explicit_padding=use_explicit_padding,
- use_depthwise=use_depthwise,
- override_base_feature_extractor_hyperparams=
- override_base_feature_extractor_hyperparams)
- # RANDOM_SKIP_SMALL means the training policy is random and the small model
- # does not update state during training.
- if self._is_training:
- self._interleave_method = 'RANDOM_SKIP_SMALL'
- else:
- self._interleave_method = 'SKIP9'
-
- self._flatten_state = False
- self._scale_state = False
- self._clip_state = True
- self._pre_bottleneck = True
- self._feature_map_layout = {
- 'from_layer': ['layer_19', '', '', '', ''],
- 'layer_depth': [-1, 256, 256, 256, 256],
- 'use_depthwise': self._use_depthwise,
- 'use_explicit_padding': self._use_explicit_padding,
- }
- self._low_res = True
- self._base_network_scope = 'MobilenetV2'
-
- def extract_base_features_large(self, preprocessed_inputs):
- """Extract the large base model features.
-
- Variables are created under the scope of /MobilenetV2_1/
-
- Args:
- preprocessed_inputs: preprocessed input images of shape:
- [batch, width, height, depth].
-
- Returns:
- net: the last feature map created from the base feature extractor.
- end_points: a dictionary of feature maps created.
- """
- scope_name = self._base_network_scope + '_1'
- with tf.variable_scope(scope_name, reuse=self._reuse_weights) as base_scope:
- net, end_points = mobilenet_v2.mobilenet_base(
- preprocessed_inputs,
- depth_multiplier=self._depth_multipliers[0],
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(
- is_quantized=self._is_quantized),
- use_explicit_padding=self._use_explicit_padding,
- scope=base_scope)
- return net, end_points
-
- def extract_base_features_small(self, preprocessed_inputs):
- """Extract the small base model features.
-
- Variables are created under the scope of /MobilenetV2_2/
-
- Args:
- preprocessed_inputs: preprocessed input images of shape:
- [batch, width, height, depth].
-
- Returns:
- net: the last feature map created from the base feature extractor.
- end_points: a dictionary of feature maps created.
- """
- scope_name = self._base_network_scope + '_2'
- with tf.variable_scope(scope_name, reuse=self._reuse_weights) as base_scope:
- if self._low_res:
- height_small = preprocessed_inputs.get_shape().as_list()[1] // 2
- width_small = preprocessed_inputs.get_shape().as_list()[2] // 2
- inputs_small = tf.image.resize_images(preprocessed_inputs,
- [height_small, width_small])
- # Create end point handle for tflite deployment.
- with tf.name_scope(None):
- inputs_small = tf.identity(
- inputs_small, name='normalized_input_image_tensor_small')
- else:
- inputs_small = preprocessed_inputs
- net, end_points = mobilenet_v2.mobilenet_base(
- inputs_small,
- depth_multiplier=self._depth_multipliers[1],
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(
- is_quantized=self._is_quantized, low_res=self._low_res),
- use_explicit_padding=self._use_explicit_padding,
- scope=base_scope)
- return net, end_points
-
- def create_lstm_cell(self, batch_size, output_size, state_saver, state_name,
- dtype=tf.float32):
- """Create the LSTM cell, and initialize state if necessary.
-
- Args:
- batch_size: input batch size.
- output_size: output size of the lstm cell, [width, height].
- state_saver: a state saver object with methods `state` and `save_state`.
- state_name: string, the name to use with the state_saver.
- dtype: dtype to initialize lstm state.
-
- Returns:
- lstm_cell: the lstm cell unit.
- init_state: initial state representations.
- step: the step
- """
- lstm_cell = lstm_cells.GroupedConvLSTMCell(
- filter_size=(3, 3),
- output_size=output_size,
- num_units=max(self._min_depth, self._lstm_state_depth),
- is_training=self._is_training,
- activation=tf.nn.relu6,
- flatten_state=self._flatten_state,
- scale_state=self._scale_state,
- clip_state=self._clip_state,
- output_bottleneck=True,
- pre_bottleneck=self._pre_bottleneck,
- is_quantized=self._is_quantized,
- visualize_gates=False)
-
- if state_saver is None:
- init_state = lstm_cell.init_state('lstm_state', batch_size, dtype)
- step = None
- else:
- step = state_saver.state(state_name + '_step')
- c = state_saver.state(state_name + '_c')
- h = state_saver.state(state_name + '_h')
- c.set_shape([batch_size] + c.get_shape().as_list()[1:])
- h.set_shape([batch_size] + h.get_shape().as_list()[1:])
- init_state = (c, h)
- return lstm_cell, init_state, step
-
- def extract_features(self, preprocessed_inputs, state_saver=None,
- state_name='lstm_state', unroll_length=10, scope=None):
- """Extract features from preprocessed inputs.
-
- The features include the base network features, lstm features and SSD
- features, organized in the following name scope:
-
- /MobilenetV2_1/...
- /MobilenetV2_2/...
- /LSTM/...
- /FeatureMap/...
-
- Args:
- preprocessed_inputs: a [batch, height, width, channels] float tensor
- representing a batch of consecutive frames from video clips.
- state_saver: A state saver object with methods `state` and `save_state`.
- state_name: Python string, the name to use with the state_saver.
- unroll_length: number of steps to unroll the lstm.
- scope: Scope for the base network of the feature extractor.
-
- Returns:
- feature_maps: a list of tensors where the ith tensor has shape
- [batch, height_i, width_i, depth_i]
- Raises:
- ValueError: if interleave_method not recognized or large and small base
- network output feature maps of different sizes.
- """
- preprocessed_inputs = shape_utils.check_min_image_dim(
- 33, preprocessed_inputs)
- preprocessed_inputs = ops.pad_to_multiple(
- preprocessed_inputs, self._pad_to_multiple)
- batch_size = preprocessed_inputs.shape[0].value // unroll_length
- batch_axis = 0
- nets = []
-
- # Batch processing of mobilenet features.
- with slim.arg_scope(mobilenet_v2.training_scope(
- is_training=self._is_training,
- bn_decay=0.9997)), \
- slim.arg_scope([mobilenet.depth_multiplier],
- min_depth=self._min_depth, divisible_by=8):
- # Big model.
- net, _ = self.extract_base_features_large(preprocessed_inputs)
- nets.append(net)
- large_base_feature_shape = net.shape
-
- # Small models
- net, _ = self.extract_base_features_small(preprocessed_inputs)
- nets.append(net)
- small_base_feature_shape = net.shape
- if not (large_base_feature_shape[1] == small_base_feature_shape[1] and
- large_base_feature_shape[2] == small_base_feature_shape[2]):
- raise ValueError('Large and Small base network feature map dimension '
- 'not equal!')
-
- with slim.arg_scope(self._conv_hyperparams_fn()):
- with tf.variable_scope('LSTM', reuse=self._reuse_weights):
- output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
- lstm_cell, init_state, step = self.create_lstm_cell(
- batch_size, output_size, state_saver, state_name,
- dtype=preprocessed_inputs.dtype)
-
- nets_seq = [
- tf.split(net, unroll_length, axis=batch_axis) for net in nets
- ]
-
- net_seq, states_out = rnn_decoder.multi_input_rnn_decoder(
- nets_seq,
- init_state,
- lstm_cell,
- step,
- selection_strategy=self._interleave_method,
- is_training=self._is_training,
- is_quantized=self._is_quantized,
- pre_bottleneck=self._pre_bottleneck,
- flatten_state=self._flatten_state,
- scope=None)
- self._states_out = states_out
-
- image_features = {}
- if state_saver is not None:
- self._step = state_saver.state(state_name + '_step')
- batcher_ops = [
- state_saver.save_state(state_name + '_c', states_out[-1][0]),
- state_saver.save_state(state_name + '_h', states_out[-1][1]),
- state_saver.save_state(state_name + '_step', self._step + 1)]
- with tf_ops.control_dependencies(batcher_ops):
- image_features['layer_19'] = tf.concat(net_seq, 0)
- else:
- image_features['layer_19'] = tf.concat(net_seq, 0)
-
- # SSD layers.
- with tf.variable_scope('FeatureMap'):
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=self._feature_map_layout,
- depth_multiplier=self._depth_multiplier,
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features,
- pool_residual=True)
- return list(feature_maps.values())
diff --git a/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor_test.py b/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor_test.py
deleted file mode 100644
index b285f0e44417a309f54973327c16b55c1169260f..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor_test.py
+++ /dev/null
@@ -1,352 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for lstm_ssd_interleaved_mobilenet_v2_feature_extractor."""
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-from tensorflow.contrib import training as contrib_training
-
-from lstm_object_detection.models import lstm_ssd_interleaved_mobilenet_v2_feature_extractor
-from object_detection.models import ssd_feature_extractor_test
-
-
-class LSTMSSDInterleavedMobilenetV2FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
-
- def _create_feature_extractor(self,
- depth_multiplier,
- pad_to_multiple,
- is_quantized=False):
- """Constructs a new feature extractor.
-
- Args:
- depth_multiplier: float depth multiplier for feature extractor
- pad_to_multiple: the nearest multiple to zero pad the input height and
- width dimensions to.
- is_quantized: whether to quantize the graph.
- Returns:
- an ssd_meta_arch.SSDFeatureExtractor object.
- """
- min_depth = 32
- def conv_hyperparams_fn():
- with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm), \
- slim.arg_scope([slim.batch_norm], is_training=False) as sc:
- return sc
- feature_extractor = (
- lstm_ssd_interleaved_mobilenet_v2_feature_extractor
- .LSTMSSDInterleavedMobilenetV2FeatureExtractor(False, depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams_fn))
- feature_extractor.lstm_state_depth = int(320 * depth_multiplier)
- feature_extractor.depth_multipliers = [
- depth_multiplier, depth_multiplier / 4.0
- ]
- feature_extractor.is_quantized = is_quantized
- return feature_extractor
-
- def test_feature_extractor_construct_with_expected_params(self):
- def conv_hyperparams_fn():
- with (slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) and
- slim.arg_scope([slim.batch_norm], decay=0.97, epsilon=1e-3)) as sc:
- return sc
-
- params = {
- 'is_training': True,
- 'depth_multiplier': .55,
- 'min_depth': 9,
- 'pad_to_multiple': 3,
- 'conv_hyperparams_fn': conv_hyperparams_fn,
- 'reuse_weights': False,
- 'use_explicit_padding': True,
- 'use_depthwise': False,
- 'override_base_feature_extractor_hyperparams': True}
-
- feature_extractor = (
- lstm_ssd_interleaved_mobilenet_v2_feature_extractor
- .LSTMSSDInterleavedMobilenetV2FeatureExtractor(**params))
-
- self.assertEqual(params['is_training'],
- feature_extractor._is_training)
- self.assertEqual(params['depth_multiplier'],
- feature_extractor._depth_multiplier)
- self.assertEqual(params['min_depth'],
- feature_extractor._min_depth)
- self.assertEqual(params['pad_to_multiple'],
- feature_extractor._pad_to_multiple)
- self.assertEqual(params['conv_hyperparams_fn'],
- feature_extractor._conv_hyperparams_fn)
- self.assertEqual(params['reuse_weights'],
- feature_extractor._reuse_weights)
- self.assertEqual(params['use_explicit_padding'],
- feature_extractor._use_explicit_padding)
- self.assertEqual(params['use_depthwise'],
- feature_extractor._use_depthwise)
- self.assertEqual(params['override_base_feature_extractor_hyperparams'],
- (feature_extractor.
- _override_base_feature_extractor_hyperparams))
-
- def test_extract_features_returns_correct_shapes_128(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(2, 4, 4, 640),
- (2, 2, 2, 256), (2, 1, 1, 256),
- (2, 1, 1, 256), (2, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- 2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_unroll10(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(10, 4, 4, 640),
- (10, 2, 2, 256), (10, 1, 1, 256),
- (10, 1, 1, 256), (10, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- 10, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape, unroll_length=10)
-
- def test_extract_features_returns_correct_shapes_320(self):
- image_height = 320
- image_width = 320
- depth_multiplier = 1.0
- pad_to_multiple = 1
- expected_feature_map_shape = [(2, 10, 10, 640),
- (2, 5, 5, 256), (2, 3, 3, 256),
- (2, 2, 2, 256), (2, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- 2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_enforcing_min_depth(self):
- image_height = 320
- image_width = 320
- depth_multiplier = 0.5**12
- pad_to_multiple = 1
- expected_feature_map_shape = [(2, 10, 10, 64),
- (2, 5, 5, 32), (2, 3, 3, 32),
- (2, 2, 2, 32), (2, 1, 1, 32)]
- self.check_extract_features_returns_correct_shape(
- 2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_extract_features_returns_correct_shapes_with_pad_to_multiple(self):
- image_height = 299
- image_width = 299
- depth_multiplier = 1.0
- pad_to_multiple = 32
- expected_feature_map_shape = [(2, 10, 10, 640),
- (2, 5, 5, 256), (2, 3, 3, 256),
- (2, 2, 2, 256), (2, 1, 1, 256)]
- self.check_extract_features_returns_correct_shape(
- 2, image_height, image_width, depth_multiplier, pad_to_multiple,
- expected_feature_map_shape)
-
- def test_preprocess_returns_correct_value_range(self):
- image_height = 128
- image_width = 128
- depth_multiplier = 1
- pad_to_multiple = 1
- test_image = np.random.rand(4, image_height, image_width, 3)
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- depth_multiplier = 1
- pad_to_multiple = 1
- scope_names = ['MobilenetV2', 'LSTM', 'FeatureMap']
- self.check_feature_extractor_variables_under_scopes(
- depth_multiplier, pad_to_multiple, scope_names)
-
- def test_has_fused_batchnorm(self):
- image_height = 40
- image_width = 40
- depth_multiplier = 1
- pad_to_multiple = 32
- image_placeholder = tf.placeholder(tf.float32,
- [1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(image_placeholder)
- _ = feature_extractor.extract_features(preprocessed_image, unroll_length=1)
- self.assertTrue(any(op.type.startswith('FusedBatchNorm')
- for op in tf.get_default_graph().get_operations()))
-
- def test_variables_for_tflite(self):
- image_height = 40
- image_width = 40
- depth_multiplier = 1
- pad_to_multiple = 32
- image_placeholder = tf.placeholder(tf.float32,
- [1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(image_placeholder)
- tflite_unsupported = ['SquaredDifference']
- _ = feature_extractor.extract_features(preprocessed_image, unroll_length=1)
- self.assertFalse(any(op.type in tflite_unsupported
- for op in tf.get_default_graph().get_operations()))
-
- def test_output_nodes_for_tflite(self):
- image_height = 64
- image_width = 64
- depth_multiplier = 1.0
- pad_to_multiple = 1
- image_placeholder = tf.placeholder(tf.float32,
- [1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- preprocessed_image = feature_extractor.preprocess(image_placeholder)
- _ = feature_extractor.extract_features(preprocessed_image, unroll_length=1)
-
- tflite_nodes = [
- 'raw_inputs/init_lstm_c',
- 'raw_inputs/init_lstm_h',
- 'raw_inputs/base_endpoint',
- 'raw_outputs/lstm_c',
- 'raw_outputs/lstm_h',
- 'raw_outputs/base_endpoint_1',
- 'raw_outputs/base_endpoint_2'
- ]
- ops_names = [op.name for op in tf.get_default_graph().get_operations()]
- for node in tflite_nodes:
- self.assertTrue(any(node in s for s in ops_names))
-
- def test_fixed_concat_nodes(self):
- image_height = 64
- image_width = 64
- depth_multiplier = 1.0
- pad_to_multiple = 1
- image_placeholder = tf.placeholder(tf.float32,
- [1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(
- depth_multiplier, pad_to_multiple, is_quantized=True)
- preprocessed_image = feature_extractor.preprocess(image_placeholder)
- _ = feature_extractor.extract_features(preprocessed_image, unroll_length=1)
-
- concat_nodes = [
- 'MobilenetV2_1/expanded_conv_16/project/Relu6',
- 'MobilenetV2_2/expanded_conv_16/project/Relu6'
- ]
- ops_names = [op.name for op in tf.get_default_graph().get_operations()]
- for node in concat_nodes:
- self.assertTrue(any(node in s for s in ops_names))
-
- def test_lstm_states(self):
- image_height = 256
- image_width = 256
- depth_multiplier = 1
- pad_to_multiple = 1
- state_channel = 320
- init_state1 = {
- 'lstm_state_c': tf.zeros(
- [image_height // 32, image_width // 32, state_channel]),
- 'lstm_state_h': tf.zeros(
- [image_height // 32, image_width // 32, state_channel]),
- 'lstm_state_step': tf.zeros([1])
- }
- init_state2 = {
- 'lstm_state_c': tf.random_uniform(
- [image_height // 32, image_width // 32, state_channel]),
- 'lstm_state_h': tf.random_uniform(
- [image_height // 32, image_width // 32, state_channel]),
- 'lstm_state_step': tf.zeros([1])
- }
- seq = {'dummy': tf.random_uniform([2, 1, 1, 1])}
- stateful_reader1 = contrib_training.SequenceQueueingStateSaver(
- batch_size=1,
- num_unroll=1,
- input_length=2,
- input_key='',
- input_sequences=seq,
- input_context={},
- initial_states=init_state1,
- capacity=1)
- stateful_reader2 = contrib_training.SequenceQueueingStateSaver(
- batch_size=1,
- num_unroll=1,
- input_length=2,
- input_key='',
- input_sequences=seq,
- input_context={},
- initial_states=init_state2,
- capacity=1)
- image = tf.random_uniform([1, image_height, image_width, 3])
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- with tf.variable_scope('zero_state'):
- feature_maps1 = feature_extractor.extract_features(
- image, stateful_reader1.next_batch, unroll_length=1)
- with tf.variable_scope('random_state'):
- feature_maps2 = feature_extractor.extract_features(
- image, stateful_reader2.next_batch, unroll_length=1)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
- sess.run(tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS))
- sess.run([stateful_reader1.prefetch_op, stateful_reader2.prefetch_op])
- maps1, maps2 = sess.run([feature_maps1, feature_maps2])
- state = sess.run(stateful_reader1.next_batch.state('lstm_state_c'))
- # feature maps should be different because states are different
- self.assertFalse(np.all(np.equal(maps1[0], maps2[0])))
- # state should no longer be zero after update
- self.assertTrue(state.any())
-
- def check_extract_features_returns_correct_shape(
- self, batch_size, image_height, image_width, depth_multiplier,
- pad_to_multiple, expected_feature_map_shapes, unroll_length=1):
- def graph_fn(image_tensor):
- feature_extractor = self._create_feature_extractor(depth_multiplier,
- pad_to_multiple)
- feature_maps = feature_extractor.extract_features(
- image_tensor, unroll_length=unroll_length)
- return feature_maps
-
- image_tensor = np.random.rand(batch_size, image_height, image_width,
- 3).astype(np.float32)
- feature_maps = self.execute(graph_fn, [image_tensor])
- for feature_map, expected_shape in zip(
- feature_maps, expected_feature_map_shapes):
- self.assertAllEqual(feature_map.shape, expected_shape)
-
- def check_feature_extractor_variables_under_scopes(
- self, depth_multiplier, pad_to_multiple, scope_names):
- g = tf.Graph()
- with g.as_default():
- feature_extractor = self._create_feature_extractor(
- depth_multiplier, pad_to_multiple)
- preprocessed_inputs = tf.placeholder(tf.float32, (4, 320, 320, 3))
- feature_extractor.extract_features(
- preprocessed_inputs, unroll_length=1)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- for variable in variables:
- self.assertTrue(
- any([
- variable.name.startswith(scope_name)
- for scope_name in scope_names
- ]), 'Variable name: ' + variable.name +
- ' is not under any provided scopes: ' + ','.join(scope_names))
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor.py b/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor.py
deleted file mode 100644
index cccf740aadd337d29bec56a7fed93fc6937fc123..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""LSTMSSDFeatureExtractor for MobilenetV1 features."""
-
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-from tensorflow.python.framework import ops as tf_ops
-from lstm_object_detection.lstm import lstm_cells
-from lstm_object_detection.lstm import rnn_decoder
-from lstm_object_detection.meta_architectures import lstm_ssd_meta_arch
-from object_detection.models import feature_map_generators
-from object_detection.utils import context_manager
-from object_detection.utils import ops
-from object_detection.utils import shape_utils
-from nets import mobilenet_v1
-
-
-class LSTMSSDMobileNetV1FeatureExtractor(
- lstm_ssd_meta_arch.LSTMSSDFeatureExtractor):
- """LSTM Feature Extractor using MobilenetV1 features."""
-
- def __init__(self,
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- conv_hyperparams_fn,
- reuse_weights=None,
- use_explicit_padding=False,
- use_depthwise=True,
- override_base_feature_extractor_hyperparams=False,
- lstm_state_depth=256):
- """Initializes instance of MobileNetV1 Feature Extractor for LSTMSSD Models.
-
- Args:
- is_training: A boolean whether the network is in training mode.
- depth_multiplier: A float depth multiplier for feature extractor.
- min_depth: A number representing minimum feature extractor depth.
- pad_to_multiple: The nearest multiple to zero pad the input height and
- width dimensions to.
- conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
- and separable_conv2d ops in the layers that are added on top of the
- base feature extractor.
- reuse_weights: Whether to reuse variables. Default is None.
- use_explicit_padding: Whether to use explicit padding when extracting
- features. Default is False.
- use_depthwise: Whether to use depthwise convolutions. Default is True.
- override_base_feature_extractor_hyperparams: Whether to override
- hyperparameters of the base feature extractor with the one from
- `conv_hyperparams_fn`.
- lstm_state_depth: An integter of the depth of the lstm state.
- """
- super(LSTMSSDMobileNetV1FeatureExtractor, self).__init__(
- is_training=is_training,
- depth_multiplier=depth_multiplier,
- min_depth=min_depth,
- pad_to_multiple=pad_to_multiple,
- conv_hyperparams_fn=conv_hyperparams_fn,
- reuse_weights=reuse_weights,
- use_explicit_padding=use_explicit_padding,
- use_depthwise=use_depthwise,
- override_base_feature_extractor_hyperparams=
- override_base_feature_extractor_hyperparams)
- self._feature_map_layout = {
- 'from_layer': ['Conv2d_13_pointwise_lstm', '', '', '', ''],
- 'layer_depth': [-1, 512, 256, 256, 128],
- 'use_explicit_padding': self._use_explicit_padding,
- 'use_depthwise': self._use_depthwise,
- }
- self._base_network_scope = 'MobilenetV1'
- self._lstm_state_depth = lstm_state_depth
-
- def create_lstm_cell(self, batch_size, output_size, state_saver, state_name,
- dtype=tf.float32):
- """Create the LSTM cell, and initialize state if necessary.
-
- Args:
- batch_size: input batch size.
- output_size: output size of the lstm cell, [width, height].
- state_saver: a state saver object with methods `state` and `save_state`.
- state_name: string, the name to use with the state_saver.
- dtype: dtype to initialize lstm state.
-
- Returns:
- lstm_cell: the lstm cell unit.
- init_state: initial state representations.
- step: the step
- """
- lstm_cell = lstm_cells.BottleneckConvLSTMCell(
- filter_size=(3, 3),
- output_size=output_size,
- num_units=max(self._min_depth, self._lstm_state_depth),
- activation=tf.nn.relu6,
- visualize_gates=False)
-
- if state_saver is None:
- init_state = lstm_cell.init_state(state_name, batch_size, dtype)
- step = None
- else:
- step = state_saver.state(state_name + '_step')
- c = state_saver.state(state_name + '_c')
- h = state_saver.state(state_name + '_h')
- init_state = (c, h)
- return lstm_cell, init_state, step
-
- def extract_features(self,
- preprocessed_inputs,
- state_saver=None,
- state_name='lstm_state',
- unroll_length=5,
- scope=None):
- """Extracts features from preprocessed inputs.
-
- The features include the base network features, lstm features and SSD
- features, organized in the following name scope:
-
- /MobilenetV1/...
- /LSTM/...
- /FeatureMaps/...
-
- Args:
- preprocessed_inputs: A [batch, height, width, channels] float tensor
- representing a batch of consecutive frames from video clips.
- state_saver: A state saver object with methods `state` and `save_state`.
- state_name: A python string for the name to use with the state_saver.
- unroll_length: The number of steps to unroll the lstm.
- scope: The scope for the base network of the feature extractor.
-
- Returns:
- A list of tensors where the ith tensor has shape [batch, height_i,
- width_i, depth_i]
- """
- preprocessed_inputs = shape_utils.check_min_image_dim(
- 33, preprocessed_inputs)
- with slim.arg_scope(
- mobilenet_v1.mobilenet_v1_arg_scope(is_training=self._is_training)):
- with (slim.arg_scope(self._conv_hyperparams_fn())
- if self._override_base_feature_extractor_hyperparams else
- context_manager.IdentityContextManager()):
- with slim.arg_scope([slim.batch_norm], fused=False):
- # Base network.
- with tf.variable_scope(
- scope, self._base_network_scope,
- reuse=self._reuse_weights) as scope:
- net, image_features = mobilenet_v1.mobilenet_v1_base(
- ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=self._min_depth,
- depth_multiplier=self._depth_multiplier,
- scope=scope)
-
- with slim.arg_scope(self._conv_hyperparams_fn()):
- with slim.arg_scope(
- [slim.batch_norm], fused=False, is_training=self._is_training):
- # ConvLSTM layers.
- batch_size = net.shape[0].value // unroll_length
- with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
- lstm_cell, init_state, _ = self.create_lstm_cell(
- batch_size,
- (net.shape[1].value, net.shape[2].value),
- state_saver,
- state_name,
- dtype=preprocessed_inputs.dtype)
- net_seq = list(tf.split(net, unroll_length))
-
- # Identities added for inputing state tensors externally.
- c_ident = tf.identity(init_state[0], name='lstm_state_in_c')
- h_ident = tf.identity(init_state[1], name='lstm_state_in_h')
- init_state = (c_ident, h_ident)
-
- net_seq, states_out = rnn_decoder.rnn_decoder(
- net_seq, init_state, lstm_cell, scope=lstm_scope)
- batcher_ops = None
- self._states_out = states_out
- if state_saver is not None:
- self._step = state_saver.state('%s_step' % state_name)
- batcher_ops = [
- state_saver.save_state('%s_c' % state_name, states_out[-1][0]),
- state_saver.save_state('%s_h' % state_name, states_out[-1][1]),
- state_saver.save_state('%s_step' % state_name, self._step + 1)
- ]
- with tf_ops.control_dependencies(batcher_ops):
- image_features['Conv2d_13_pointwise_lstm'] = tf.concat(net_seq, 0)
-
- # Identities added for reading output states, to be reused externally.
- tf.identity(states_out[-1][0], name='lstm_state_out_c')
- tf.identity(states_out[-1][1], name='lstm_state_out_h')
-
- # SSD layers.
- with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights):
- feature_maps = feature_map_generators.multi_resolution_feature_maps(
- feature_map_layout=self._feature_map_layout,
- depth_multiplier=(self._depth_multiplier),
- min_depth=self._min_depth,
- insert_1x1_conv=True,
- image_features=image_features)
-
- return list(feature_maps.values())
diff --git a/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor_test.py b/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor_test.py
deleted file mode 100644
index 56ad2745dae558acdb806c8f236d25754799cf49..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/lstm_ssd_mobilenet_v1_feature_extractor_test.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for models.lstm_ssd_mobilenet_v1_feature_extractor."""
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-from tensorflow.contrib import training as contrib_training
-
-from lstm_object_detection.models import lstm_ssd_mobilenet_v1_feature_extractor as feature_extractor
-from object_detection.models import ssd_feature_extractor_test
-
-
-class LstmSsdMobilenetV1FeatureExtractorTest(
- ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
-
- def _create_feature_extractor(self,
- depth_multiplier=1.0,
- pad_to_multiple=1,
- is_training=True,
- use_explicit_padding=False):
- """Constructs a new feature extractor.
-
- Args:
- depth_multiplier: A float depth multiplier for feature extractor.
- pad_to_multiple: The nearest multiple to zero pad the input height and
- width dimensions to.
- is_training: A boolean whether the network is in training mode.
- use_explicit_padding: A boolean whether to use explicit padding.
-
- Returns:
- An lstm_ssd_meta_arch.LSTMSSDMobileNetV1FeatureExtractor object.
- """
- min_depth = 32
- extractor = (
- feature_extractor.LSTMSSDMobileNetV1FeatureExtractor(
- is_training,
- depth_multiplier,
- min_depth,
- pad_to_multiple,
- self.conv_hyperparams_fn,
- use_explicit_padding=use_explicit_padding))
- extractor.lstm_state_depth = int(256 * depth_multiplier)
- return extractor
-
- def test_feature_extractor_construct_with_expected_params(self):
- def conv_hyperparams_fn():
- with (slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm) and
- slim.arg_scope([slim.batch_norm], decay=0.97, epsilon=1e-3)) as sc:
- return sc
-
- params = {
- 'is_training': True,
- 'depth_multiplier': .55,
- 'min_depth': 9,
- 'pad_to_multiple': 3,
- 'conv_hyperparams_fn': conv_hyperparams_fn,
- 'reuse_weights': False,
- 'use_explicit_padding': True,
- 'use_depthwise': False,
- 'override_base_feature_extractor_hyperparams': True}
-
- extractor = (
- feature_extractor.LSTMSSDMobileNetV1FeatureExtractor(**params))
-
- self.assertEqual(params['is_training'],
- extractor._is_training)
- self.assertEqual(params['depth_multiplier'],
- extractor._depth_multiplier)
- self.assertEqual(params['min_depth'],
- extractor._min_depth)
- self.assertEqual(params['pad_to_multiple'],
- extractor._pad_to_multiple)
- self.assertEqual(params['conv_hyperparams_fn'],
- extractor._conv_hyperparams_fn)
- self.assertEqual(params['reuse_weights'],
- extractor._reuse_weights)
- self.assertEqual(params['use_explicit_padding'],
- extractor._use_explicit_padding)
- self.assertEqual(params['use_depthwise'],
- extractor._use_depthwise)
- self.assertEqual(params['override_base_feature_extractor_hyperparams'],
- (extractor.
- _override_base_feature_extractor_hyperparams))
-
- def test_extract_features_returns_correct_shapes_256(self):
- image_height = 256
- image_width = 256
- depth_multiplier = 1.0
- pad_to_multiple = 1
- batch_size = 5
- expected_feature_map_shape = [(batch_size, 8, 8, 256), (batch_size, 4, 4,
- 512),
- (batch_size, 2, 2, 256), (batch_size, 1, 1,
- 256)]
- self.check_extract_features_returns_correct_shape(
- batch_size,
- image_height,
- image_width,
- depth_multiplier,
- pad_to_multiple,
- expected_feature_map_shape,
- use_explicit_padding=False)
- self.check_extract_features_returns_correct_shape(
- batch_size,
- image_height,
- image_width,
- depth_multiplier,
- pad_to_multiple,
- expected_feature_map_shape,
- use_explicit_padding=True)
-
- def test_preprocess_returns_correct_value_range(self):
- test_image = np.random.rand(5, 128, 128, 3)
- extractor = self._create_feature_extractor()
- preprocessed_image = extractor.preprocess(test_image)
- self.assertTrue(np.all(np.less_equal(np.abs(preprocessed_image), 1.0)))
-
- def test_variables_only_created_in_scope(self):
- scope_name = 'MobilenetV1'
- g = tf.Graph()
- with g.as_default():
- preprocessed_inputs = tf.placeholder(tf.float32, (5, 256, 256, 3))
- extractor = self._create_feature_extractor()
- extractor.extract_features(preprocessed_inputs)
- variables = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
- find_scope = False
- for variable in variables:
- if scope_name in variable.name:
- find_scope = True
- break
- self.assertTrue(find_scope)
-
- def test_lstm_non_zero_state(self):
- init_state = {
- 'lstm_state_c': tf.zeros([8, 8, 256]),
- 'lstm_state_h': tf.zeros([8, 8, 256]),
- 'lstm_state_step': tf.zeros([1])
- }
- seq = {'test': tf.random_uniform([3, 1, 1, 1])}
- stateful_reader = contrib_training.SequenceQueueingStateSaver(
- batch_size=1,
- num_unroll=1,
- input_length=2,
- input_key='',
- input_sequences=seq,
- input_context={},
- initial_states=init_state,
- capacity=1)
- extractor = self._create_feature_extractor()
- image = tf.random_uniform([5, 256, 256, 3])
- with tf.variable_scope('zero_state'):
- feature_map = extractor.extract_features(
- image, stateful_reader.next_batch)
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- sess.run([stateful_reader.prefetch_op])
- _ = sess.run([feature_map])
- # Update states with the next batch.
- state = sess.run(stateful_reader.next_batch.state('lstm_state_c'))
- # State should no longer be zero after update.
- self.assertTrue(state.any())
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/models/mobilenet_defs.py b/research/lstm_object_detection/models/mobilenet_defs.py
deleted file mode 100644
index 4f984240215b818c3e8c9b5481db3319b54ef8fd..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/mobilenet_defs.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Definitions for modified MobileNet models used in LSTD."""
-
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-from nets import mobilenet_v1
-from nets.mobilenet import conv_blocks as mobilenet_convs
-from nets.mobilenet import mobilenet
-
-
-def mobilenet_v1_lite_def(depth_multiplier, low_res=False):
- """Conv definitions for a lite MobileNet v1 model.
-
- Args:
- depth_multiplier: float depth multiplier for MobileNet.
- low_res: An option of low-res conv input for interleave model.
-
- Returns:
- Array of convolutions.
-
- Raises:
- ValueError: On invalid channels with provided depth multiplier.
- """
- conv = mobilenet_v1.Conv
- sep_conv = mobilenet_v1.DepthSepConv
-
- def _find_target_depth(original, depth_multiplier):
- # Find the target depth such that:
- # int(target * depth_multiplier) == original
- pseudo_target = int(original / depth_multiplier)
- for target in range(pseudo_target - 1, pseudo_target + 2):
- if int(target * depth_multiplier) == original:
- return target
- raise ValueError('Cannot have %d channels with depth multiplier %0.2f' %
- (original, depth_multiplier))
-
- return [
- conv(kernel=[3, 3], stride=2, depth=32),
- sep_conv(kernel=[3, 3], stride=1, depth=64),
- sep_conv(kernel=[3, 3], stride=2, depth=128),
- sep_conv(kernel=[3, 3], stride=1, depth=128),
- sep_conv(kernel=[3, 3], stride=2, depth=256),
- sep_conv(kernel=[3, 3], stride=1, depth=256),
- sep_conv(kernel=[3, 3], stride=2, depth=512),
- sep_conv(kernel=[3, 3], stride=1, depth=512),
- sep_conv(kernel=[3, 3], stride=1, depth=512),
- sep_conv(kernel=[3, 3], stride=1, depth=512),
- sep_conv(kernel=[3, 3], stride=1, depth=512),
- sep_conv(kernel=[3, 3], stride=1, depth=512),
- sep_conv(kernel=[3, 3], stride=1 if low_res else 2, depth=1024),
- sep_conv(
- kernel=[3, 3],
- stride=1,
- depth=int(_find_target_depth(1024, depth_multiplier)))
- ]
-
-
-def mobilenet_v2_lite_def(reduced=False, is_quantized=False, low_res=False):
- """Conv definitions for a lite MobileNet v2 model.
-
- Args:
- reduced: Determines the scaling factor for expanded conv. If True, a factor
- of 6 is used. If False, a factor of 3 is used.
- is_quantized: Whether the model is trained in quantized mode.
- low_res: Whether the input to the model is of half resolution.
-
- Returns:
- Array of convolutions.
- """
- expanded_conv = mobilenet_convs.expanded_conv
- expand_input = mobilenet_convs.expand_input_by_factor
- op = mobilenet.op
- return dict(
- defaults={
- # Note: these parameters of batch norm affect the architecture
- # that's why they are here and not in training_scope.
- (slim.batch_norm,): {
- 'center': True,
- 'scale': True
- },
- (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
- 'normalizer_fn': slim.batch_norm,
- 'activation_fn': tf.nn.relu6
- },
- (expanded_conv,): {
- 'expansion_size': expand_input(6),
- 'split_expansion': 1,
- 'normalizer_fn': slim.batch_norm,
- 'residual': True
- },
- (slim.conv2d, slim.separable_conv2d): {
- 'padding': 'SAME'
- }
- },
- spec=[
- op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
- op(expanded_conv,
- expansion_size=expand_input(1, divisible_by=1),
- num_outputs=16),
- op(expanded_conv,
- expansion_size=(expand_input(3, divisible_by=1)
- if reduced else expand_input(6)),
- stride=2,
- num_outputs=24),
- op(expanded_conv,
- expansion_size=(expand_input(3, divisible_by=1)
- if reduced else expand_input(6)),
- stride=1,
- num_outputs=24),
- op(expanded_conv, stride=2, num_outputs=32),
- op(expanded_conv, stride=1, num_outputs=32),
- op(expanded_conv, stride=1, num_outputs=32),
- op(expanded_conv, stride=2, num_outputs=64),
- op(expanded_conv, stride=1, num_outputs=64),
- op(expanded_conv, stride=1, num_outputs=64),
- op(expanded_conv, stride=1, num_outputs=64),
- op(expanded_conv, stride=1, num_outputs=96),
- op(expanded_conv, stride=1, num_outputs=96),
- op(expanded_conv, stride=1, num_outputs=96),
- op(expanded_conv, stride=1 if low_res else 2, num_outputs=160),
- op(expanded_conv, stride=1, num_outputs=160),
- op(expanded_conv, stride=1, num_outputs=160),
- op(expanded_conv,
- stride=1,
- num_outputs=320,
- project_activation_fn=(tf.nn.relu6
- if is_quantized else tf.identity))
- ],
- )
diff --git a/research/lstm_object_detection/models/mobilenet_defs_test.py b/research/lstm_object_detection/models/mobilenet_defs_test.py
deleted file mode 100644
index f1b5bda504bb02ac89f55e3acd370862f513a3a3..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/models/mobilenet_defs_test.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for lstm_object_detection.models.mobilenet_defs."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-from lstm_object_detection.models import mobilenet_defs
-from nets import mobilenet_v1
-from nets.mobilenet import mobilenet_v2
-
-
-class MobilenetV1DefsTest(tf.test.TestCase):
-
- def test_mobilenet_v1_lite_def(self):
- net, _ = mobilenet_v1.mobilenet_v1_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v1_lite_def(1.0),
- use_explicit_padding=True,
- scope='MobilenetV1')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 1024])
-
- def test_mobilenet_v1_lite_def_depthmultiplier_half(self):
- net, _ = mobilenet_v1.mobilenet_v1_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=8,
- depth_multiplier=0.5,
- conv_defs=mobilenet_defs.mobilenet_v1_lite_def(0.5),
- use_explicit_padding=True,
- scope='MobilenetV1')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 1024])
-
- def test_mobilenet_v1_lite_def_depthmultiplier_2x(self):
- net, _ = mobilenet_v1.mobilenet_v1_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=8,
- depth_multiplier=2.0,
- conv_defs=mobilenet_defs.mobilenet_v1_lite_def(2.0),
- use_explicit_padding=True,
- scope='MobilenetV1')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 1024])
-
- def test_mobilenet_v1_lite_def_low_res(self):
- net, _ = mobilenet_v1.mobilenet_v1_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- final_endpoint='Conv2d_13_pointwise',
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v1_lite_def(1.0, low_res=True),
- use_explicit_padding=True,
- scope='MobilenetV1')
- self.assertEqual(net.get_shape().as_list(), [10, 20, 20, 1024])
-
-
-class MobilenetV2DefsTest(tf.test.TestCase):
-
- def test_mobilenet_v2_lite_def(self):
- net, features = mobilenet_v2.mobilenet_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(),
- use_explicit_padding=True,
- scope='MobilenetV2')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
- self._assert_contains_op('MobilenetV2/expanded_conv_16/project/Identity')
- self.assertEqual(
- features['layer_3/expansion_output'].get_shape().as_list(),
- [10, 160, 160, 96])
- self.assertEqual(
- features['layer_4/expansion_output'].get_shape().as_list(),
- [10, 80, 80, 144])
-
- def test_mobilenet_v2_lite_def_is_quantized(self):
- net, _ = mobilenet_v2.mobilenet_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(is_quantized=True),
- use_explicit_padding=True,
- scope='MobilenetV2')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
- self._assert_contains_op('MobilenetV2/expanded_conv_16/project/Relu6')
-
- def test_mobilenet_v2_lite_def_low_res(self):
- net, _ = mobilenet_v2.mobilenet_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(low_res=True),
- use_explicit_padding=True,
- scope='MobilenetV2')
- self.assertEqual(net.get_shape().as_list(), [10, 20, 20, 320])
-
- def test_mobilenet_v2_lite_def_reduced(self):
- net, features = mobilenet_v2.mobilenet_base(
- tf.placeholder(tf.float32, (10, 320, 320, 3)),
- min_depth=8,
- depth_multiplier=1.0,
- conv_defs=mobilenet_defs.mobilenet_v2_lite_def(reduced=True),
- use_explicit_padding=True,
- scope='MobilenetV2')
- self.assertEqual(net.get_shape().as_list(), [10, 10, 10, 320])
- self.assertEqual(
- features['layer_3/expansion_output'].get_shape().as_list(),
- [10, 160, 160, 48])
- self.assertEqual(
- features['layer_4/expansion_output'].get_shape().as_list(),
- [10, 80, 80, 72])
-
- def _assert_contains_op(self, op_name):
- op_names = [op.name for op in tf.get_default_graph().get_operations()]
- self.assertIn(op_name, op_names)
-
-
-if __name__ == '__main__':
- tf.test.main()
diff --git a/research/lstm_object_detection/protos/__init__.py b/research/lstm_object_detection/protos/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/research/lstm_object_detection/protos/input_reader_google.proto b/research/lstm_object_detection/protos/input_reader_google.proto
deleted file mode 100644
index 2c494a62e97321ee9206cebe28cd6601049f3293..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/protos/input_reader_google.proto
+++ /dev/null
@@ -1,32 +0,0 @@
-syntax = "proto2";
-
-package lstm_object_detection.protos;
-
-import "object_detection/protos/input_reader.proto";
-
-message GoogleInputReader {
- extend object_detection.protos.ExternalInputReader {
- optional GoogleInputReader google_input_reader = 444;
- }
-
- oneof input_reader {
- TFRecordVideoInputReader tf_record_video_input_reader = 1;
- }
-}
-
-message TFRecordVideoInputReader {
- // Path(s) to tfrecords of input data.
- repeated string input_path = 1;
-
- enum DataType {
- UNSPECIFIED = 0;
- TF_EXAMPLE = 1;
- TF_SEQUENCE_EXAMPLE = 2;
- }
- optional DataType data_type = 2 [default=TF_SEQUENCE_EXAMPLE];
-
- // Length of the video sequence. All the input video sequence should have the
- // same length in frames, e.g. 5 frames.
- optional int32 video_length = 3;
-}
-
diff --git a/research/lstm_object_detection/protos/pipeline.proto b/research/lstm_object_detection/protos/pipeline.proto
deleted file mode 100644
index 10dd652554ad38e933acdedf8ce1479f15eed9d7..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/protos/pipeline.proto
+++ /dev/null
@@ -1,69 +0,0 @@
-syntax = "proto2";
-
-package lstm_object_detection.protos;
-
-import "object_detection/protos/pipeline.proto";
-import "lstm_object_detection/protos/quant_overrides.proto";
-
-extend object_detection.protos.TrainEvalPipelineConfig {
- optional LstmModel lstm_model = 205743444;
- optional QuantOverrides quant_overrides = 246059837;
-}
-
-// Message for extra fields needed for configuring LSTM model.
-message LstmModel {
- // Unroll length for training LSTMs.
- optional int32 train_unroll_length = 1;
-
- // Unroll length for evaluating LSTMs.
- optional int32 eval_unroll_length = 2;
-
- // Depth of the lstm feature map.
- optional int32 lstm_state_depth = 3 [default = 256];
-
- // Depth multipliers for multiple feature extractors. Used for interleaved
- // or ensemble model.
- repeated float depth_multipliers = 4;
-
- // Specifies how models are interleaved when multiple feature extractors are
- // used during training. Must be in ['RANDOM', 'RANDOM_SKIP_SMALL'].
- optional string train_interleave_method = 5 [default = 'RANDOM'];
-
- // Specifies how models are interleaved when multiple feature extractors are
- // used during training. Must be in ['RANDOM', 'RANDOM_SKIP', 'SKIPK'].
- optional string eval_interleave_method = 6 [default = 'SKIP9'];
-
- // The stride of the lstm state.
- optional int32 lstm_state_stride = 7 [default = 32];
-
- // Whether to flattern LSTM state and output. Note that this is typically
- // intended only to be modified internally by export_tfmini_lstd_graph_lib
- // to support flatten state for tfmini/tflite. Do not set this field in
- // the pipeline config file unless necessary.
- optional bool flatten_state = 8 [default = false];
-
- // Whether to apply bottleneck layer before going into LSTM gates. This
- // allows multiple feature extractors to use separate bottleneck layers
- // instead of sharing the same one so that different base model output
- // feature dimensions are not forced to be the same.
- // For example:
- // Model 1 outputs feature map f_1 of depth d_1.
- // Model 2 outputs feature map f_2 of depth d_2.
- // Pre-bottlenecking allows lstm input to be either:
- // conv(concat([f_1, h])) or conv(concat([f_2, h])).
- optional bool pre_bottleneck = 9 [default = false];
-
- // Normalize LSTM state, default false.
- optional bool scale_state = 10 [default = false];
-
- // Clip LSTM state at [0, 6], default true.
- optional bool clip_state = 11 [default = true];
-
- // If the model is in quantized training. This field does NOT need to be set
- // manually. Instead, it will be overridden by configs in graph_rewriter.
- optional bool is_quantized = 12 [default = false];
-
- // Downsample input image when using the smaller network in interleaved
- // models, default false.
- optional bool low_res = 13 [default = false];
-}
diff --git a/research/lstm_object_detection/protos/quant_overrides.proto b/research/lstm_object_detection/protos/quant_overrides.proto
deleted file mode 100644
index 9dc0eaf86e5f507f87b87fe1571b4e3d82991df1..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/protos/quant_overrides.proto
+++ /dev/null
@@ -1,40 +0,0 @@
-syntax = "proto2";
-
-package lstm_object_detection.protos;
-
-// Message to override default quantization behavior.
-message QuantOverrides {
- repeated QuantConfig quant_configs = 1;
-}
-
-// Parameters to manually create fake quant ops outside of the generic
-// tensorflow/contrib/quantize/python/quantize.py script. This may be
-// used to override default behaviour or quantize ops not already supported.
-message QuantConfig {
- // The name of the op to add a fake quant op to.
- required string op_name = 1;
-
- // The name of the fake quant op.
- required string quant_op_name = 2;
-
- // Whether the fake quant op uses fixed ranges. Otherwise, learned moving
- // average ranges are used.
- required bool fixed_range = 3 [default = false];
-
- // The intitial minimum value of the range.
- optional float min = 4 [default = -6];
-
- // The initial maximum value of the range.
- optional float max = 5 [default = 6];
-
- // Number of steps to delay before quantization takes effect during training.
- optional int32 delay = 6 [default = 500000];
-
- // Number of bits to use for quantizing weights.
- // Only 8 bit is supported for now.
- optional int32 weight_bits = 7 [default = 8];
-
- // Number of bits to use for quantizing activations.
- // Only 8 bit is supported for now.
- optional int32 activation_bits = 8 [default = 8];
-}
diff --git a/research/lstm_object_detection/test_tflite_model.py b/research/lstm_object_detection/test_tflite_model.py
deleted file mode 100644
index a8b5e15e210ab6c191911d3c440cef33d936274c..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/test_tflite_model.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Test a tflite model using random input data."""
-
-from __future__ import print_function
-from absl import flags
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-flags.DEFINE_string('model_path', None, 'Path to model.')
-FLAGS = flags.FLAGS
-
-
-def main(_):
-
- flags.mark_flag_as_required('model_path')
-
- # Load TFLite model and allocate tensors.
- interpreter = tf.lite.Interpreter(model_path=FLAGS.model_path)
- interpreter.allocate_tensors()
-
- # Get input and output tensors.
- input_details = interpreter.get_input_details()
- print('input_details:', input_details)
- output_details = interpreter.get_output_details()
- print('output_details:', output_details)
-
- # Test model on random input data.
- input_shape = input_details[0]['shape']
- # change the following line to feed into your own data.
- input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
- interpreter.set_tensor(input_details[0]['index'], input_data)
-
- interpreter.invoke()
- output_data = interpreter.get_tensor(output_details[0]['index'])
- print(output_data)
-
-
-if __name__ == '__main__':
- tf.app.run()
diff --git a/research/lstm_object_detection/tflite/BUILD b/research/lstm_object_detection/tflite/BUILD
deleted file mode 100644
index 66068925da4fde7eb99215d907d627e0ff1d3847..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/tflite/BUILD
+++ /dev/null
@@ -1,81 +0,0 @@
-package(
- default_visibility = ["//visibility:public"],
-)
-
-licenses(["notice"])
-
-cc_library(
- name = "mobile_ssd_client",
- srcs = ["mobile_ssd_client.cc"],
- hdrs = ["mobile_ssd_client.h"],
- deps = [
- "//protos:box_encodings_cc_proto",
- "//protos:detections_cc_proto",
- "//protos:labelmap_cc_proto",
- "//protos:mobile_ssd_client_options_cc_proto",
- "//utils:conversion_utils",
- "//utils:ssd_utils",
- "@com_google_absl//absl/base:core_headers",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/types:span",
- "@com_google_glog//:glog",
- "@gemmlowp",
- ],
-)
-
-config_setting(
- name = "enable_edgetpu",
- define_values = {"enable_edgetpu": "true"},
- visibility = ["//visibility:public"],
-)
-
-cc_library(
- name = "mobile_ssd_tflite_client",
- srcs = ["mobile_ssd_tflite_client.cc"],
- hdrs = ["mobile_ssd_tflite_client.h"],
- defines = select({
- "//conditions:default": [],
- "enable_edgetpu": ["ENABLE_EDGETPU"],
- }),
- deps = [
- ":mobile_ssd_client",
- "@com_google_glog//:glog",
- "@com_google_absl//absl/memory",
- "@org_tensorflow//tensorflow/lite:arena_planner",
- "@org_tensorflow//tensorflow/lite:framework",
- "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
- "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
- "//protos:anchor_generation_options_cc_proto",
- "//utils:file_utils",
- "//utils:ssd_utils",
- ] + select({
- "//conditions:default": [],
- "enable_edgetpu": [
- "@libedgetpu//libedgetpu:header",
- ],
- }),
- alwayslink = 1,
-)
-
-cc_library(
- name = "mobile_lstd_tflite_client",
- srcs = ["mobile_lstd_tflite_client.cc"],
- hdrs = ["mobile_lstd_tflite_client.h"],
- defines = select({
- "//conditions:default": [],
- "enable_edgetpu": ["ENABLE_EDGETPU"],
- }),
- deps = [
- ":mobile_ssd_client",
- ":mobile_ssd_tflite_client",
- "@com_google_glog//:glog",
- "@com_google_absl//absl/base:core_headers",
- "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
- ] + select({
- "//conditions:default": [],
- "enable_edgetpu": [
- "@libedgetpu//libedgetpu:header",
- ],
- }),
- alwayslink = 1,
-)
diff --git a/research/lstm_object_detection/tflite/WORKSPACE b/research/lstm_object_detection/tflite/WORKSPACE
deleted file mode 100644
index 3bce3814f365ec2bcc1122d7dfc8a5ba5f7d3dcb..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/tflite/WORKSPACE
+++ /dev/null
@@ -1,133 +0,0 @@
-workspace(name = "lstm_object_detection")
-
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
-
-http_archive(
- name = "bazel_skylib",
- sha256 = "bbccf674aa441c266df9894182d80de104cabd19be98be002f6d478aaa31574d",
- strip_prefix = "bazel-skylib-2169ae1c374aab4a09aa90e65efe1a3aad4e279b",
- urls = ["https://github.com/bazelbuild/bazel-skylib/archive/2169ae1c374aab4a09aa90e65efe1a3aad4e279b.tar.gz"],
-)
-load("@bazel_skylib//lib:versions.bzl", "versions")
-versions.check(minimum_bazel_version = "0.23.0")
-
-# ABSL cpp library.
-http_archive(
- name = "com_google_absl",
- urls = [
- "https://github.com/abseil/abseil-cpp/archive/a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a.tar.gz",
- ],
- sha256 = "d437920d1434c766d22e85773b899c77c672b8b4865d5dc2cd61a29fdff3cf03",
- strip_prefix = "abseil-cpp-a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a",
-)
-
-http_archive(
- name = "rules_cc",
- strip_prefix = "rules_cc-master",
- urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
-)
-
-# GoogleTest/GoogleMock framework. Used by most unit-tests.
-http_archive(
- name = "com_google_googletest",
- urls = ["https://github.com/google/googletest/archive/master.zip"],
- strip_prefix = "googletest-master",
-)
-
-# gflags needed by glog
-http_archive(
- name = "com_github_gflags_gflags",
- sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
- strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
- urls = [
- "https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
- "https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
- ],
-)
-
-# glog
-http_archive(
- name = "com_google_glog",
- sha256 = "f28359aeba12f30d73d9e4711ef356dc842886968112162bc73002645139c39c",
- strip_prefix = "glog-0.4.0",
- urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"],
-)
-
-http_archive(
- name = "zlib",
- build_file = "@com_google_protobuf//:third_party/zlib.BUILD",
- sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
- strip_prefix = "zlib-1.2.11",
- urls = ["https://zlib.net/zlib-1.2.11.tar.gz"],
-)
-
-http_archive(
- name = "gemmlowp",
- sha256 = "6678b484d929f2d0d3229d8ac4e3b815a950c86bb9f17851471d143f6d4f7834",
- strip_prefix = "gemmlowp-12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3",
- urls = [
- "http://mirror.tensorflow.org/github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip",
- "https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip",
- ],
-)
-
-#-----------------------------------------------------------------------------
-# proto
-#-----------------------------------------------------------------------------
-# proto_library, cc_proto_library and java_proto_library rules implicitly depend
-# on @com_google_protobuf//:proto, @com_google_protobuf//:cc_toolchain and
-# @com_google_protobuf//:java_toolchain, respectively.
-# This statement defines the @com_google_protobuf repo.
-http_archive(
- name = "com_google_protobuf",
- strip_prefix = "protobuf-3.8.0",
- urls = ["https://github.com/google/protobuf/archive/v3.8.0.zip"],
- sha256 = "1e622ce4b84b88b6d2cdf1db38d1a634fe2392d74f0b7b74ff98f3a51838ee53",
-)
-
-# java_lite_proto_library rules implicitly depend on
-# @com_google_protobuf_javalite//:javalite_toolchain, which is the JavaLite proto
-# runtime (base classes and common utilities).
-http_archive(
- name = "com_google_protobuf_javalite",
- strip_prefix = "protobuf-384989534b2246d413dbcd750744faab2607b516",
- urls = ["https://github.com/google/protobuf/archive/384989534b2246d413dbcd750744faab2607b516.zip"],
- sha256 = "79d102c61e2a479a0b7e5fc167bcfaa4832a0c6aad4a75fa7da0480564931bcc",
-)
-
-#
-# http_archive(
-# name = "com_google_protobuf",
-# strip_prefix = "protobuf-master",
-# urls = ["https://github.com/protocolbuffers/protobuf/archive/master.zip"],
-# )
-
-# Needed by TensorFlow
-http_archive(
- name = "io_bazel_rules_closure",
- sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9",
- strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df",
- urls = [
- "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz",
- "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04
- ],
-)
-
-
-# TensorFlow r1.14-rc0
-http_archive(
- name = "org_tensorflow",
- strip_prefix = "tensorflow-1.14.0-rc0",
- sha256 = "76404a6157a45e8d7a07e4f5690275256260130145924c2a7c73f6eda2a3de10",
- urls = ["https://github.com/tensorflow/tensorflow/archive/v1.14.0-rc0.zip"],
-)
-
-load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
-tf_workspace(tf_repo_name = "org_tensorflow")
-
-git_repository(
- name = "libedgetpu",
- remote = "sso://coral.googlesource.com/edgetpu-native",
- commit = "83e47d1bcf22686fae5150ebb99281f6134ef062",
-)
diff --git a/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.cc b/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.cc
deleted file mode 100644
index 05a7bbac1b5c8a58c4f10476a2be4fb3a097a463..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.cc
+++ /dev/null
@@ -1,261 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "mobile_lstd_tflite_client.h"
-
-#include
-
-namespace lstm_object_detection {
-namespace tflite {
-
-std::unique_ptr MobileLSTDTfLiteClient::Create() {
- auto client = absl::make_unique();
- if (!client->InitializeClient(CreateDefaultOptions())) {
- LOG(ERROR) << "Failed to initialize client";
- return nullptr;
- }
- return client;
-}
-
-protos::ClientOptions MobileLSTDTfLiteClient::CreateDefaultOptions() {
- const int kMaxDetections = 100;
- const int kClassesPerDetection = 1;
- const double kScoreThreshold = -2.0;
- const double kIouThreshold = 0.5;
-
- protos::ClientOptions options;
- options.set_max_detections(kMaxDetections);
- options.set_max_categories(kClassesPerDetection);
- options.set_score_threshold(kScoreThreshold);
- options.set_iou_threshold(kIouThreshold);
- options.set_agnostic_mode(false);
- options.set_quantize(false);
- options.set_num_keypoints(0);
-
- return options;
-}
-
-std::unique_ptr MobileLSTDTfLiteClient::Create(
- const protos::ClientOptions& options) {
- auto client = absl::make_unique();
- if (!client->InitializeClient(options)) {
- LOG(ERROR) << "Failed to initialize client";
- return nullptr;
- }
- return client;
-}
-
-bool MobileLSTDTfLiteClient::InitializeInterpreter(
- const protos::ClientOptions& options) {
- if (options.prefer_nnapi_delegate()) {
- LOG(ERROR) << "NNAPI not supported.";
- return false;
- } else {
- interpreter_->UseNNAPI(false);
- }
-
-#ifdef ENABLE_EDGETPU
- interpreter_->SetExternalContext(kTfLiteEdgeTpuContext,
- edge_tpu_context_.get());
-#endif
-
- // Inputs are: normalized_input_image_tensor, raw_inputs/init_lstm_c,
- // raw_inputs/init_lstm_h
- if (interpreter_->inputs().size() != 3) {
- LOG(ERROR) << "Invalid number of interpreter inputs: " <<
- interpreter_->inputs().size();
- return false;
- }
-
- const std::vector input_tensor_indices = interpreter_->inputs();
- const TfLiteTensor& input_lstm_c =
- *interpreter_->tensor(input_tensor_indices[1]);
- if (input_lstm_c.dims->size != 4) {
- LOG(ERROR) << "Invalid input lstm_c dimensions: " <<
- input_lstm_c.dims->size;
- return false;
- }
- if (input_lstm_c.dims->data[0] != 1) {
- LOG(ERROR) << "Invalid input lstm_c batch size: " <<
- input_lstm_c.dims->data[0];
- return false;
- }
- lstm_state_width_ = input_lstm_c.dims->data[1];
- lstm_state_height_ = input_lstm_c.dims->data[2];
- lstm_state_depth_ = input_lstm_c.dims->data[3];
- lstm_state_size_ = lstm_state_width_ * lstm_state_height_ * lstm_state_depth_;
-
- const TfLiteTensor& input_lstm_h =
- *interpreter_->tensor(input_tensor_indices[2]);
- if (!ValidateStateTensor(input_lstm_h, "input lstm_h")) {
- return false;
- }
-
- // Outputs are:
- // TFLite_Detection_PostProcess,
- // TFLite_Detection_PostProcess:1,
- // TFLite_Detection_PostProcess:2,
- // TFLite_Detection_PostProcess:3,
- // raw_outputs/lstm_c, raw_outputs/lstm_h
- if (interpreter_->outputs().size() != 6) {
- LOG(ERROR) << "Invalid number of interpreter outputs: " <<
- interpreter_->outputs().size();
- return false;
- }
-
- const std::vector output_tensor_indices = interpreter_->outputs();
- const TfLiteTensor& output_lstm_c =
- *interpreter_->tensor(output_tensor_indices[4]);
- if (!ValidateStateTensor(output_lstm_c, "output lstm_c")) {
- return false;
- }
- const TfLiteTensor& output_lstm_h =
- *interpreter_->tensor(output_tensor_indices[5]);
- if (!ValidateStateTensor(output_lstm_h, "output lstm_h")) {
- return false;
- }
-
- // Initialize state with all zeroes.
- lstm_c_data_.resize(lstm_state_size_);
- lstm_h_data_.resize(lstm_state_size_);
- lstm_c_data_uint8_.resize(lstm_state_size_);
- lstm_h_data_uint8_.resize(lstm_state_size_);
-
- if (interpreter_->AllocateTensors() != kTfLiteOk) {
- LOG(ERROR) << "Failed to allocate tensors";
- return false;
- }
-
- return true;
-}
-
-bool MobileLSTDTfLiteClient::ValidateStateTensor(const TfLiteTensor& tensor,
- const std::string& name) {
- if (tensor.dims->size != 4) {
- LOG(ERROR) << "Invalid " << name << " dimensions: " << tensor.dims->size;
- return false;
- }
- if (tensor.dims->data[0] != 1) {
- LOG(ERROR) << "Invalid " << name << " batch size: " << tensor.dims->data[0];
- return false;
- }
- if (tensor.dims->data[1] != lstm_state_width_ ||
- tensor.dims->data[2] != lstm_state_height_ ||
- tensor.dims->data[3] != lstm_state_depth_) {
- LOG(ERROR) << "Invalid " << name << " dimensions: [" <<
- tensor.dims->data[0] << ", " << tensor.dims->data[1] << ", " <<
- tensor.dims->data[2] << ", " << tensor.dims->data[3] << "]";
- return false;
- }
- return true;
-}
-
-bool MobileLSTDTfLiteClient::ComputeOutputLayerCount() {
- // Outputs are: raw_outputs/box_encodings, raw_outputs/class_predictions,
- // raw_outputs/lstm_c, raw_outputs/lstm_h
- CHECK_EQ(interpreter_->outputs().size(), 4);
- num_output_layers_ = 1;
- return true;
-}
-
-bool MobileLSTDTfLiteClient::FloatInference(const uint8_t* input_data) {
- // Inputs are: normalized_input_image_tensor, raw_inputs/init_lstm_c,
- // raw_inputs/init_lstm_h
- CHECK(input_data) << "Input data cannot be null.";
- float* input = interpreter_->typed_input_tensor(0);
- CHECK(input) << "Input tensor cannot be null.";
- // Normalize the uint8 input image with mean_value_, std_value_.
- NormalizeInputImage(input_data, input);
-
- // Copy input LSTM state into TFLite's input tensors.
- float* lstm_c_input = interpreter_->typed_input_tensor(1);
- CHECK(lstm_c_input) << "Input lstm_c tensor cannot be null.";
- std::copy(lstm_c_data_.begin(), lstm_c_data_.end(), lstm_c_input);
-
- float* lstm_h_input = interpreter_->typed_input_tensor(2);
- CHECK(lstm_h_input) << "Input lstm_h tensor cannot be null.";
- std::copy(lstm_h_data_.begin(), lstm_h_data_.end(), lstm_h_input);
-
- // Run inference on inputs.
- CHECK_EQ(interpreter_->Invoke(), kTfLiteOk) << "Invoking interpreter failed.";
-
- // Copy LSTM state out of TFLite's output tensors.
- // Outputs are: raw_outputs/box_encodings, raw_outputs/class_predictions,
- // raw_outputs/lstm_c, raw_outputs/lstm_h
- float* lstm_c_output = interpreter_->typed_output_tensor(2);
- CHECK(lstm_c_output) << "Output lstm_c tensor cannot be null.";
- std::copy(lstm_c_output, lstm_c_output + lstm_state_size_,
- lstm_c_data_.begin());
-
- float* lstm_h_output = interpreter_->typed_output_tensor(3);
- CHECK(lstm_h_output) << "Output lstm_h tensor cannot be null.";
- std::copy(lstm_h_output, lstm_h_output + lstm_state_size_,
- lstm_h_data_.begin());
- return true;
-}
-
-bool MobileLSTDTfLiteClient::QuantizedInference(const uint8_t* input_data) {
- // Inputs are: normalized_input_image_tensor, raw_inputs/init_lstm_c,
- // raw_inputs/init_lstm_h
- CHECK(input_data) << "Input data cannot be null.";
- uint8_t* input = interpreter_->typed_input_tensor(0);
- CHECK(input) << "Input tensor cannot be null.";
- memcpy(input, input_data, input_size_);
-
- // Copy input LSTM state into TFLite's input tensors.
- uint8_t* lstm_c_input = interpreter_->typed_input_tensor(1);
- CHECK(lstm_c_input) << "Input lstm_c tensor cannot be null.";
- std::copy(lstm_c_data_uint8_.begin(), lstm_c_data_uint8_.end(), lstm_c_input);
-
- uint8_t* lstm_h_input = interpreter_->typed_input_tensor(2);
- CHECK(lstm_h_input) << "Input lstm_h tensor cannot be null.";
- std::copy(lstm_h_data_uint8_.begin(), lstm_h_data_uint8_.end(), lstm_h_input);
-
- // Run inference on inputs.
- CHECK_EQ(interpreter_->Invoke(), kTfLiteOk) << "Invoking interpreter failed.";
-
- // Copy LSTM state out of TFLite's output tensors.
- // Outputs are:
- // TFLite_Detection_PostProcess,
- // TFLite_Detection_PostProcess:1,
- // TFLite_Detection_PostProcess:2,
- // TFLite_Detection_PostProcess:3,
- // raw_outputs/lstm_c, raw_outputs/lstm_h
- uint8_t* lstm_c_output = interpreter_->typed_output_tensor(4);
- CHECK(lstm_c_output) << "Output lstm_c tensor cannot be null.";
- std::copy(lstm_c_output, lstm_c_output + lstm_state_size_,
- lstm_c_data_uint8_.begin());
-
- uint8_t* lstm_h_output = interpreter_->typed_output_tensor(5);
- CHECK(lstm_h_output) << "Output lstm_h tensor cannot be null.";
- std::copy(lstm_h_output, lstm_h_output + lstm_state_size_,
- lstm_h_data_uint8_.begin());
- return true;
-}
-
-bool MobileLSTDTfLiteClient::Inference(const uint8_t* input_data) {
- if (input_data == nullptr) {
- LOG(ERROR) << "input_data cannot be null for inference.";
- return false;
- }
- if (IsQuantizedModel())
- return QuantizedInference(input_data);
- else
- return FloatInference(input_data);
- return true;
-}
-
-} // namespace tflite
-} // namespace lstm_object_detection
diff --git a/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.h b/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.h
deleted file mode 100644
index e4f16bc945a6725025e285885967637629d0a5fc..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/tflite/mobile_lstd_tflite_client.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_MOBILE_LSTD_TFLITE_CLIENT_H_
-#define TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_MOBILE_LSTD_TFLITE_CLIENT_H_
-
-#include
-#include
-
-#include
-#include "mobile_ssd_client.h"
-#include "mobile_ssd_tflite_client.h"
-
-namespace lstm_object_detection {
-namespace tflite {
-
-// Client for LSTD MobileNet TfLite model.
-class MobileLSTDTfLiteClient : public MobileSSDTfLiteClient {
- public:
- MobileLSTDTfLiteClient() = default;
- // Create with default options.
- static std::unique_ptr Create();
- static std::unique_ptr Create(
- const protos::ClientOptions& options);
- ~MobileLSTDTfLiteClient() override = default;
- static protos::ClientOptions CreateDefaultOptions();
-
- protected:
- bool InitializeInterpreter(const protos::ClientOptions& options) override;
- bool ComputeOutputLayerCount() override;
- bool Inference(const uint8_t* input_data) override;
-
- private:
- // MobileLSTDTfLiteClient is neither copyable nor movable.
- MobileLSTDTfLiteClient(const MobileLSTDTfLiteClient&) = delete;
- MobileLSTDTfLiteClient& operator=(const MobileLSTDTfLiteClient&) = delete;
-
- bool ValidateStateTensor(const TfLiteTensor& tensor, const std::string& name);
-
- // Helper functions used by Inference functions.
- bool FloatInference(const uint8_t* input_data);
- bool QuantizedInference(const uint8_t* input_data);
-
- // LSTM model parameters.
- int lstm_state_width_ = 0;
- int lstm_state_height_ = 0;
- int lstm_state_depth_ = 0;
- int lstm_state_size_ = 0;
-
- // LSTM state stored between float inference runs.
- std::vector lstm_c_data_;
- std::vector lstm_h_data_;
-
- // LSTM state stored between uint8 inference runs.
- std::vector lstm_c_data_uint8_;
- std::vector lstm_h_data_uint8_;
-};
-
-} // namespace tflite
-} // namespace lstm_object_detection
-
-#endif // TENSORFLOW_MODELS_LSTM_OBJECT_DETECTION_TFLITE_MOBILE_LSTD_TFLITE_CLIENT_H_
diff --git a/research/lstm_object_detection/tflite/mobile_ssd_client.cc b/research/lstm_object_detection/tflite/mobile_ssd_client.cc
deleted file mode 100644
index 27bf70109e46d2b9612480bb192f01aa3c9bfde1..0000000000000000000000000000000000000000
--- a/research/lstm_object_detection/tflite/mobile_ssd_client.cc
+++ /dev/null
@@ -1,209 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "mobile_ssd_client.h"
-
-#include
-
-#include