Merge branch 'master' of github.com:tensorflow/models

f282f6ef · Alexander Gorban · 58a5da7b · a2970b03 · f282f6ef · f282f6ef
Commit f282f6ef authored Jul 05, 2017 by Alexander Gorban
20 changed files
--- a/im2txt/README.md
+++ b/im2txt/README.md
@@ -145,7 +145,8 @@ available space for storing the downloaded and processed data.
 MSCOCO_DIR="${HOME}/im2txt/data/mscoco"

 # Build the preprocessing script.
-bazel build im2txt/download_and_preprocess_mscoco
+cd tensorflow-models/im2txt
+bazel build //im2txt:download_and_preprocess_mscoco

 # Run the preprocessing script.
 bazel-bin/im2txt/download_and_preprocess_mscoco "${MSCOCO_DIR}"
@@ -211,7 +212,8 @@ INCEPTION_CHECKPOINT="${HOME}/im2txt/data/inception_v3.ckpt"
 MODEL_DIR="${HOME}/im2txt/model"

 # Build the model.
-bazel build -c opt im2txt/...
+cd tensorflow-models/im2txt
+bazel build -c opt //im2txt/...

 # Run the training script.
 bazel-bin/im2txt/train \
@@ -304,7 +306,8 @@ VOCAB_FILE="${HOME}/im2txt/data/mscoco/word_counts.txt"
 IMAGE_FILE="${HOME}/im2txt/data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg"

 # Build the inference binary.
-bazel build -c opt im2txt/run_inference
+cd tensorflow-models/im2txt
+bazel build -c opt //im2txt:run_inference

 # Ignore GPU devices (only necessary if your GPU is currently memory
 # constrained, for example, by running the training script).

--- a/inception/README.md
+++ b/inception/README.md
@@ -86,7 +86,8 @@ you will not need to interact with the script again.
 DATA_DIR=$HOME/imagenet-data

 # build the preprocessing script.
-bazel build inception/download_and_preprocess_imagenet
+cd tensorflow-models/inception
+bazel build //inception:download_and_preprocess_imagenet

 # run it
 bazel-bin/inception/download_and_preprocess_imagenet "${DATA_DIR}"
@@ -153,7 +154,8 @@ To train this model, you simply need to specify the following:
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/imagenet_train
+cd tensorflow-models/inception
+bazel build //inception:imagenet_train

 # run it
 bazel-bin/inception/imagenet_train --num_gpus=1 --batch_size=32 --train_dir=/tmp/imagenet_train --data_dir=/tmp/imagenet_data
@@ -189,7 +191,8 @@ GPU cards.
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/imagenet_train
+cd tensorflow-models/inception
+bazel build //inception:imagenet_train

 # run it
 bazel-bin/inception/imagenet_train --num_gpus=2 --batch_size=64 --train_dir=/tmp/imagenet_train
@@ -288,7 +291,8 @@ running. Several things to note here:
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/imagenet_distributed_train
+cd tensorflow-models/inception
+bazel build //inception:imagenet_distributed_train

 # To start worker 0, go to the worker0 host and run the following (Note that
 # task_id should be in the range [0, num_worker_tasks):
@@ -395,7 +399,8 @@ Briefly, one can evaluate the model by running:
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/imagenet_eval
+cd tensorflow-models/inception
+bazel build //inception:imagenet_eval

 # run it
 bazel-bin/inception/imagenet_eval --checkpoint_dir=/tmp/imagenet_train --eval_dir=/tmp/imagenet_eval
@@ -450,7 +455,8 @@ but feel free to edit accordingly.
 FLOWERS_DATA_DIR=/tmp/flowers-data/

 # build the preprocessing script.
-bazel build inception/download_and_preprocess_flowers
+cd tensorflow-models/inception
+bazel build //inception:download_and_preprocess_flowers

 # run it
 bazel-bin/inception/download_and_preprocess_flowers "${FLOWERS_DATA_DIR}"
@@ -530,7 +536,8 @@ the flowers data set with the following command.
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/flowers_train
+cd tensorflow-models/inception
+bazel build //inception:flowers_train

 # Path to the downloaded Inception-v3 model.
 MODEL_PATH="${INCEPTION_MODEL_DIR}/inception-v3/model.ckpt-157585"
@@ -566,7 +573,8 @@ fine-tuned model, you will need to run `flowers_eval`:
 ```shell
 # Build the model. Note that we need to make sure the TensorFlow is ready to
 # use before this as this command will not build TensorFlow.
-bazel build inception/flowers_eval
+cd tensorflow-models/inception
+bazel build //inception:flowers_eval

 # Directory where we saved the fine-tuned checkpoint and events files.
 TRAIN_DIR=/tmp/flowers_train/
@@ -654,7 +662,8 @@ To run `build_image_data.py`, you can run the following command line:
 OUTPUT_DIRECTORY=$HOME/my-custom-data/

 # build the preprocessing script.
-bazel build inception/build_image_data
+cd tensorflow-models/inception
+bazel build //inception:build_image_data

 # convert the data.
 bazel-bin/inception/build_image_data \

--- a/inception/inception/data/download_imagenet.sh
+++ b/inception/inception/data/download_imagenet.sh
@@ -40,7 +40,6 @@ fi

 OUTDIR="${1:-./imagenet-data}"
 SYNSETS_FILE="${2:-./synsets.txt}"
-SYNSETS_FILE="${PWD}/${SYNSETS_FILE}"

 echo "Saving downloaded files to $OUTDIR"
 mkdir -p "${OUTDIR}"

--- a/inception/inception/data/preprocess_imagenet_validation_data.py
+++ b/inception/inception/data/preprocess_imagenet_validation_data.py
@@ -76,7 +76,7 @@ if __name__ == '__main__':
    basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1)
    original_filename = os.path.join(data_dir, basename)
    if not os.path.exists(original_filename):
-      print('Failed to find: ' % original_filename)
+      print('Failed to find: %s' % original_filename)
      sys.exit(-1)
    new_filename = os.path.join(data_dir, labels[i], basename)
    os.rename(original_filename, new_filename)
--- a/inception/inception/data/process_bounding_boxes.py
+++ b/inception/inception/data/process_bounding_boxes.py
--- a/inception/inception/imagenet_eval.py
+++ b/inception/inception/imagenet_eval.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""A binary to evaluate Inception on the flowers data set.
+"""A binary to evaluate Inception on the ImageNet data set.

 Note that using the supplied pre-trained inception checkpoint, the eval should
 achieve:

--- a/inception/inception/slim/ops_test.py
+++ b/inception/inception/slim/ops_test.py
@@ -21,8 +21,6 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf

-from tensorflow.python.ops import control_flow_ops
-
 from inception.slim import ops
 from inception.slim import scopes
 from inception.slim import variables
@@ -420,7 +418,7 @@ class DropoutTest(tf.test.TestCase):
    with self.test_session():
      images = tf.random_uniform((5, height, width, 3), seed=1)
      output = ops.dropout(images)
-      self.assertEquals(output.op.name, 'Dropout/dropout/mul_1')
+      self.assertEquals(output.op.name, 'Dropout/dropout/mul')
      output.get_shape().assert_is_compatible_with(images.get_shape())

  def testCreateDropoutNoTraining(self):
@@ -601,8 +599,7 @@ class BatchNormTest(tf.test.TestCase):
      output = ops.batch_norm(images, decay=0.1)
      update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
      with tf.control_dependencies(update_ops):
-        barrier = tf.no_op(name='gradient_barrier')
-        output = control_flow_ops.with_dependencies([barrier], output)
+        output = tf.identity(output)
      # Initialize all variables
      sess.run(tf.global_variables_initializer())
      moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
@@ -631,8 +628,7 @@ class BatchNormTest(tf.test.TestCase):
      output = ops.batch_norm(images, decay=0.1, is_training=False)
      update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
      with tf.control_dependencies(update_ops):
-        barrier = tf.no_op(name='gradient_barrier')
-        output = control_flow_ops.with_dependencies([barrier], output)
+        output = tf.identity(output)
      # Initialize all variables
      sess.run(tf.global_variables_initializer())
      moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]
@@ -665,8 +661,7 @@ class BatchNormTest(tf.test.TestCase):
      output = ops.batch_norm(images, decay=0.1, is_training=False)
      update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION)
      with tf.control_dependencies(update_ops):
-        barrier = tf.no_op(name='gradient_barrier')
-        output = control_flow_ops.with_dependencies([barrier], output)
+        output = tf.identity(output)
      # Initialize all variables
      sess.run(tf.global_variables_initializer())
      moving_mean = variables.get_variables('BatchNorm/moving_mean')[0]

--- a/lfads/README.md
+++ b/lfads/README.md
+# LFADS - Latent Factor Analysis via Dynamical Systems
+
+This code implements the model from the paper "[LFADS - Latent Factor Analysis via Dynamical Systems](http://biorxiv.org/content/early/2017/06/20/152884)". It is a sequential variational auto-encoder designed specifically for investigating neuroscience data, but can be applied widely to any time series data. In an unsupervised setting, LFADS is able to decompose time series data into various factors, such as an initial condition, a generative dynamical system, control inputs to that generator, and a low dimensional description of the observed data, called the factors. Additionally, the observation model is a loss on a probability distribution, so when LFADS processes a dataset, a denoised version of the dataset is also created. For example, if the dataset is raw spike counts, then under the negative log-likeihood loss under a Poisson distribution, the denoised data would be the inferred Poisson rates.
+
+
+## Prerequisites
+
+The code is written in Python 2.7.6. You will also need:
+
+* **TensorFlow** version 1.1 ([install](http://tflearn.org/installation/)) -
+  there is an incompatibility with LFADS and TF v1.2, which we are in the
+  process of resolving
+* **NumPy, SciPy, Matplotlib** ([install SciPy stack](https://www.scipy.org/install.html), contains all of them)
+* **h5py** ([install](https://pypi.python.org/pypi/h5py))
+
+
+## Getting started
+
+Before starting, run the following:
+
+<pre>
+$ export PYTHONPATH=$PYTHONPATH:/<b>path/to/your/directory</b>/lfads/
+</pre>
+
+where "path/to/your/directory" is replaced with the path to the LFADS repository (you can get this path by using the `pwd` command). This allows the nested directories to access modules from their parent directory.
+
+## Generate synthetic data
+
+In order to generate the synthetic datasets first, from the top-level lfads directory, run:
+
+```sh
+$ cd synth_data
+$ ./run_generate_synth_data.sh
+$ cd ..
+```
+
+These synthetic datasets are provided 1. to gain insight into how the LFADS algorithm operates, and 2. to give reasonable starting points for analyses you might be interested for your own data.
+
+## Train an LFADS model
+
+Now that we have our example datasets, we can train some models! To spin up an LFADS model on the synthetic data, run any of the following commands. For the examples that are in the paper, the important hyperparameters are roughly replicated. Most hyperparameters are insensitive to small changes or won't ever be changed unless you want a very fine level of control. In the first example, all hyperparameter flags are enumerated for easy copy-pasting, but for the rest of the examples only the most important flags (~the first 8) are specified for brevity. For a full list of flags, their descriptions, and their default values, refer to the top of `run_lfads.py`.  Please see Table 1 in the Online Methods of the associated paper for definitions of the most important hyperparameters.
+
+```sh
+# Run LFADS on chaotic rnn data with no input pulses (g = 1.5)
+$ python run_lfads.py --kind=train \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_no_inputs \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_no_inputs \
+--co_dim=0 \
+--factors_dim=20 \
+--ext_input_dim=0 \
+--controller_input_lag=1 \
+--output_dist=poisson \
+--do_causal_controller=false \
+--batch_size=128 \
+--learning_rate_init=0.01 \
+--learning_rate_stop=1e-05 \
+--learning_rate_decay_factor=0.95 \
+--learning_rate_n_to_compare=6 \
+--do_reset_learning_rate=false \
+--keep_prob=0.95 \
+--con_dim=128 \
+--gen_dim=200 \
+--ci_enc_dim=128 \
+--ic_dim=64 \
+--ic_enc_dim=128 \
+--ic_prior_var_min=0.1 \
+--gen_cell_input_weight_scale=1.0 \
+--cell_weight_scale=1.0 \
+--do_feed_factors_to_controller=true \
+--kl_start_step=0 \
+--kl_increase_steps=2000 \
+--kl_ic_weight=1.0 \
+--l2_con_scale=0.0 \
+--l2_gen_scale=2000.0 \
+--l2_start_step=0 \
+--l2_increase_steps=2000 \
+--ic_prior_var_scale=0.1 \
+--ic_post_var_min=0.0001 \
+--kl_co_weight=1.0 \
+--prior_ar_nvar=0.1 \
+--cell_clip_value=5.0 \
+--max_ckpt_to_keep_lve=5 \
+--do_train_prior_ar_atau=true \
+--co_prior_var_scale=0.1 \
+--csv_log=fitlog \
+--feedback_factors_or_rates=factors \
+--do_train_prior_ar_nvar=true \
+--max_grad_norm=200.0 \
+--device=gpu:0 \
+--num_steps_for_gen_ic=100000000 \
+--ps_nexamples_to_process=100000000 \
+--checkpoint_name=lfads_vae \
+--temporal_spike_jitter_width=0 \
+--checkpoint_pb_load_name=checkpoint \
+--inject_ext_input_to_gen=false \
+--co_mean_corr_scale=0.0 \
+--gen_cell_rec_weight_scale=1.0 \
+--max_ckpt_to_keep=5 \
+--output_filename_stem="" \
+--ic_prior_var_max=0.1 \
+--prior_ar_atau=10.0 \
+--do_train_io_only=false
+
+# Run LFADS on chaotic rnn data with input pulses (g = 2.5)
+$ python run_lfads.py --kind=train \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_inputs_g2p5 \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
+--co_dim=1 \
+--factors_dim=20
+
+# Run LFADS on multi-session RNN data
+$ python run_lfads.py --kind=train \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_multisession \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_multisession \
+--factors_dim=10
+
+# Run LFADS on integration to bound model data
+$ python run_lfads.py --kind=train \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=itb_rnn \
+--lfads_save_dir=/tmp/lfads_itb_rnn \
+--co_dim=1 \
+--factors_dim=20 \
+--controller_input_lag=0
+
+# Run LFADS on chaotic RNN data with labels
+$ python run_lfads.py --kind=train \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnns_labeled \
+--lfads_save_dir=/tmp/lfads_chaotic_rnns_labeled \
+--co_dim=0 \
+--factors_dim=20 \
+--controller_input_lag=0 \
+--ext_input_dim=1
+
+```
+
+**Tip**: If you are running LFADS on GPU and would like to run more than one model concurrently, set the `--allow_gpu_growth=True` flag on each job, otherwise one model will take up the entire GPU for performance purposes.  Also, one needs to install the TensorFlow libraries with GPU support.
+
+
+## Visualize a training model
+
+To visualize training curves and various other metrics while training and LFADS model, run the following command on your model directory. To launch a tensorboard on the chaotic RNN data with input pulses, for example:
+
+```sh
+tensorboard --logdir=/tmp/lfads_chaotic_rnn_inputs_g2p5
+```
+
+## Evaluate a trained model
+
+Once your model is finished training, there are multiple ways you can evaluate
+it. Below are some sample commands to evaluate an LFADS model trained on the
+chaotic rnn data with input pulses (g = 2.5). The key differences here are
+setting the `--kind` flag to the appropriate mode, as well as the
+`--checkpoint_pb_load_name` flag to `checkpoint_lve` and the `--batch_size` flag
+(if you'd like to make it larger or smaller). All other flags should be the
+same as used in training, so that the same model architecture is built.
+
+```sh
+# Take samples from posterior then average (denoising operation)
+$ python run_lfads.py --kind=posterior_sample_and_average \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_inputs_g2p5 \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
+--co_dim=1 \
+--factors_dim=20 \
+--batch_size=1024 \
+--checkpoint_pb_load_name=checkpoint_lve
+
+# Sample from prior (generation of completely new samples)
+$ python run_lfads.py --kind=prior_sample \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_inputs_g2p5 \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
+--co_dim=1 \
+--factors_dim=20 \
+--batch_size=50 \
+--checkpoint_pb_load_name=checkpoint_lve
+
+# Write down model parameters
+$ python run_lfads.py --kind=write_model_params \
+--data_dir=/tmp/rnn_synth_data_v1.0/ \
+--data_filename_stem=chaotic_rnn_inputs_g2p5 \
+--lfads_save_dir=/tmp/lfads_chaotic_rnn_inputs_g2p5 \
+--co_dim=1 \
+--factors_dim=20 \
+--checkpoint_pb_load_name=checkpoint_lve
+```
+
+## Contact
+
+File any issues with the [issue tracker](https://github.com/tensorflow/models/issues). For any questions or problems, this code is maintained by [@sussillo](https://github.com/sussillo) and [@jazcollins](https://github.com/jazcollins).
+
--- a/lfads/distributions.py
+++ b/lfads/distributions.py
--- a/lfads/lfads.py
+++ b/lfads/lfads.py
--- a/lfads/plot_lfads.py
+++ b/lfads/plot_lfads.py
--- a/lfads/run_lfads.py
+++ b/lfads/run_lfads.py
--- a/lfads/synth_data/generate_chaotic_rnn_data.py
+++ b/lfads/synth_data/generate_chaotic_rnn_data.py
--- a/lfads/synth_data/generate_itb_data.py
+++ b/lfads/synth_data/generate_itb_data.py
--- a/lfads/synth_data/generate_labeled_rnn_data.py
+++ b/lfads/synth_data/generate_labeled_rnn_data.py
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+from __future__ import print_function
+
+import os
+import h5py
+import numpy as np
+
+from synthetic_data_utils import generate_data, generate_rnn
+from synthetic_data_utils import get_train_n_valid_inds
+from synthetic_data_utils import nparray_and_transpose
+from synthetic_data_utils import spikify_data, split_list_by_inds
+import tensorflow as tf
+from utils import write_datasets
+
+DATA_DIR = "rnn_synth_data_v1.0"
+
+flags = tf.app.flags
+flags.DEFINE_string("save_dir", "/tmp/" + DATA_DIR + "/",
+                    "Directory for saving data.")
+flags.DEFINE_string("datafile_name", "conditioned_rnn_data",
+                    "Name of data file for input case.")
+flags.DEFINE_integer("synth_data_seed", 5, "Random seed for RNN generation.")
+flags.DEFINE_float("T", 1.0, "Time in seconds to generate.")
+flags.DEFINE_integer("C", 400, "Number of conditions")
+flags.DEFINE_integer("N", 50, "Number of units for the RNN")
+flags.DEFINE_float("train_percentage", 4.0/5.0,
+                   "Percentage of train vs validation trials")
+flags.DEFINE_integer("nspikifications", 10,
+                     "Number of spikifications of the same underlying rates.")
+flags.DEFINE_float("g", 1.5, "Complexity of dynamics")
+flags.DEFINE_float("x0_std", 1.0,
+                   "Volume from which to pull initial conditions (affects diversity of dynamics.")
+flags.DEFINE_float("tau", 0.025, "Time constant of RNN")
+flags.DEFINE_float("dt", 0.010, "Time bin")
+flags.DEFINE_float("max_firing_rate", 30.0, "Map 1.0 of RNN to a spikes per second")
+FLAGS = flags.FLAGS
+
+rng = np.random.RandomState(seed=FLAGS.synth_data_seed)
+rnn_rngs = [np.random.RandomState(seed=FLAGS.synth_data_seed+1),
+            np.random.RandomState(seed=FLAGS.synth_data_seed+2)]
+T = FLAGS.T
+C = FLAGS.C
+N = FLAGS.N
+nspikifications = FLAGS.nspikifications
+E = nspikifications * C
+train_percentage = FLAGS.train_percentage
+ntimesteps = int(T / FLAGS.dt)
+
+rnn_a = generate_rnn(rnn_rngs[0], N, FLAGS.g, FLAGS.tau, FLAGS.dt,
+                     FLAGS.max_firing_rate)
+rnn_b = generate_rnn(rnn_rngs[1], N, FLAGS.g, FLAGS.tau, FLAGS.dt,
+                     FLAGS.max_firing_rate)
+rnns = [rnn_a, rnn_b]
+
+# pick which RNN is used on each trial
+rnn_to_use = rng.randint(2, size=E)
+ext_input = np.repeat(np.expand_dims(rnn_to_use, axis=1), ntimesteps, axis=1)
+ext_input = np.expand_dims(ext_input, axis=2)  # these are "a's" in the paper
+
+x0s = []
+condition_labels = []
+condition_number = 0
+for c in range(C):
+  x0 = FLAGS.x0_std * rng.randn(N, 1)
+  x0s.append(np.tile(x0, nspikifications))
+  for ns in range(nspikifications):
+    condition_labels.append(condition_number)
+  condition_number += 1
+x0s = np.concatenate(x0s, axis=1)
+
+P_nxn = rng.randn(N, N) / np.sqrt(N)
+
+# generate trials for both RNNs
+rates_a, x0s_a, _ = generate_data(rnn_a, T=T, E=E, x0s=x0s, P_sxn=P_nxn,
+                                  input_magnitude=0.0, input_times=None)
+spikes_a = spikify_data(rates_a, rng, rnn_a['dt'], rnn_a['max_firing_rate'])
+
+rates_b, x0s_b, _ = generate_data(rnn_b, T=T, E=E, x0s=x0s, P_sxn=P_nxn,
+                                  input_magnitude=0.0, input_times=None)
+spikes_b = spikify_data(rates_b, rng, rnn_b['dt'], rnn_b['max_firing_rate'])
+
+# not the best way to do this but E is small enough
+rates = []
+spikes = []
+for trial in xrange(E):
+  if rnn_to_use[trial] == 0:
+    rates.append(rates_a[trial])
+    spikes.append(spikes_a[trial])
+  else:
+    rates.append(rates_b[trial])
+    spikes.append(spikes_b[trial])
+
+# split into train and validation sets
+train_inds, valid_inds = get_train_n_valid_inds(E, train_percentage,
+                                                nspikifications)
+
+rates_train, rates_valid = split_list_by_inds(rates, train_inds, valid_inds)
+spikes_train, spikes_valid = split_list_by_inds(spikes, train_inds, valid_inds)
+condition_labels_train, condition_labels_valid = split_list_by_inds(
+    condition_labels, train_inds, valid_inds)
+ext_input_train, ext_input_valid = split_list_by_inds(
+    ext_input, train_inds, valid_inds)
+
+rates_train = nparray_and_transpose(rates_train)
+rates_valid = nparray_and_transpose(rates_valid)
+spikes_train = nparray_and_transpose(spikes_train)
+spikes_valid = nparray_and_transpose(spikes_valid)
+
+# add train_ext_input and valid_ext input
+data = {'train_truth': rates_train,
+        'valid_truth': rates_valid,
+        'train_data' : spikes_train,
+        'valid_data' : spikes_valid,
+        'train_ext_input' : np.array(ext_input_train),
+        'valid_ext_input': np.array(ext_input_valid),
+        'train_percentage' : train_percentage,
+        'nspikifications' : nspikifications,
+        'dt' : FLAGS.dt,
+        'P_sxn' : P_nxn,
+        'condition_labels_train' : condition_labels_train,
+        'condition_labels_valid' : condition_labels_valid,
+        'conversion_factor': 1.0 / rnn_a['conversion_factor']}
+
+# just one dataset here
+datasets = {}
+dataset_name = 'dataset_N' + str(N)
+datasets[dataset_name] = data
+
+# write out the dataset
+write_datasets(FLAGS.save_dir, FLAGS.datafile_name, datasets)
+print ('Saved to ', os.path.join(FLAGS.save_dir,
+                                 FLAGS.datafile_name + '_' + dataset_name))
--- a/lfads/synth_data/run_generate_synth_data.sh
+++ b/lfads/synth_data/run_generate_synth_data.sh
--- a/lfads/synth_data/synthetic_data_utils.py
+++ b/lfads/synth_data/synthetic_data_utils.py
--- a/lfads/synth_data/trained_itb/model-65000.data-00000-of-00001
+++ b/lfads/synth_data/trained_itb/model-65000.data-00000-of-00001
--- a/lfads/synth_data/trained_itb/model-65000.index
+++ b/lfads/synth_data/trained_itb/model-65000.index
--- a/lfads/synth_data/trained_itb/model-65000.meta
+++ b/lfads/synth_data/trained_itb/model-65000.meta