"example/vscode:/vscode.git/clone" did not exist on "abf4bdb9a9946c578d4801a79650e79938fb0e41"
Commit 3e93722a authored by Neal Wu's avatar Neal Wu Committed by GitHub
Browse files

Merge branch 'master' into master

parents 2335c9fc 4de34a4c
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generate the sprites tfrecords from raw_images."""
import os
import random
import re
import sys
import numpy as np
import scipy.misc
import tensorflow as tf
tf.flags.DEFINE_string('data_filepattern', '', 'The raw images.')
tf.flags.DEFINE_string('out_file', '',
'File name for the tfrecord output.')
def _read_images():
"""Read images from image files into data structure."""
sprites = dict()
files = tf.gfile.Glob(tf.flags.FLAGS.data_filepattern)
for f in files:
image = scipy.misc.imread(f)
m = re.search('image_([0-9]+)_([0-9]+)_([0-9]+).jpg', os.path.basename(f))
if m.group(1) not in sprites:
sprites[m.group(1)] = dict()
character = sprites[m.group(1)]
if m.group(2) not in character:
character[m.group(2)] = dict()
pose = character[m.group(2)]
pose[int(m.group(3))] = image
return sprites
def _images_to_example(image, image2):
"""Convert 2 consecutive image to a SequenceExample."""
example = tf.SequenceExample()
feature_list = example.feature_lists.feature_list['moving_objs']
feature = feature_list.feature.add()
feature.float_list.value.extend(np.reshape(image, [-1]).tolist())
feature = feature_list.feature.add()
feature.float_list.value.extend(np.reshape(image2, [-1]).tolist())
return example
def generate_input():
"""Generate tfrecords."""
sprites = _read_images()
sys.stderr.write('Finish reading images.\n')
train_writer = tf.python_io.TFRecordWriter(
tf.flags.FLAGS.out_file.replace('sprites', 'sprites_train'))
test_writer = tf.python_io.TFRecordWriter(
tf.flags.FLAGS.out_file.replace('sprites', 'sprites_test'))
train_examples = []
test_examples = []
for i in sprites:
if int(i) < 24:
examples = test_examples
else:
examples = train_examples
character = sprites[i]
for j in character.keys():
pose = character[j]
for k in xrange(1, len(pose), 1):
image = pose[k]
image2 = pose[k+1]
examples.append(_images_to_example(image, image2))
sys.stderr.write('Finish generating examples: %d, %d.\n' %
(len(train_examples), len(test_examples)))
random.shuffle(train_examples)
_ = [train_writer.write(ex.SerializeToString()) for ex in train_examples]
_ = [test_writer.write(ex.SerializeToString()) for ex in test_examples]
def main(_):
generate_input()
if __name__ == '__main__':
tf.app.run()
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the cross convolutional model."""
import os
import sys
import numpy as np
import tensorflow as tf
import model as cross_conv_model
import reader
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string('master', '', 'Session address.')
tf.flags.DEFINE_string('log_root', '/tmp/moving_obj', 'The root dir of output.')
tf.flags.DEFINE_string('data_filepattern', '',
'training data file pattern.')
tf.flags.DEFINE_integer('image_size', 64, 'Image height and width.')
tf.flags.DEFINE_integer('batch_size', 1, 'Batch size.')
tf.flags.DEFINE_float('norm_scale', 1.0, 'Normalize the original image')
tf.flags.DEFINE_float('scale', 10.0,
'Scale the image after norm_scale and move the diff '
'to the positive realm.')
tf.flags.DEFINE_integer('sequence_length', 2, 'tf.SequenceExample length.')
tf.flags.DEFINE_float('learning_rate', 0.8, 'Learning rate.')
tf.flags.DEFINE_bool('l2_loss', True, 'If true, include l2_loss.')
tf.flags.DEFINE_bool('reconstr_loss', False, 'If true, include reconstr_loss.')
tf.flags.DEFINE_bool('kl_loss', True, 'If true, include KL loss.')
slim = tf.contrib.slim
def _Train():
params = dict()
params['batch_size'] = FLAGS.batch_size
params['seq_len'] = FLAGS.sequence_length
params['image_size'] = FLAGS.image_size
params['is_training'] = True
params['norm_scale'] = FLAGS.norm_scale
params['scale'] = FLAGS.scale
params['learning_rate'] = FLAGS.learning_rate
params['l2_loss'] = FLAGS.l2_loss
params['reconstr_loss'] = FLAGS.reconstr_loss
params['kl_loss'] = FLAGS.kl_loss
train_dir = os.path.join(FLAGS.log_root, 'train')
images = reader.ReadInput(FLAGS.data_filepattern, shuffle=True, params=params)
images *= params['scale']
# Increase the value makes training much faster.
image_diff_list = reader.SequenceToImageAndDiff(images)
model = cross_conv_model.CrossConvModel(image_diff_list, params)
model.Build()
tf.contrib.tfprof.model_analyzer.print_model_analysis(tf.get_default_graph())
summary_writer = tf.summary.FileWriter(train_dir)
sv = tf.train.Supervisor(logdir=FLAGS.log_root,
summary_op=None,
is_chief=True,
save_model_secs=60,
global_step=model.global_step)
sess = sv.prepare_or_wait_for_session(
FLAGS.master, config=tf.ConfigProto(allow_soft_placement=True))
total_loss = 0.0
step = 0
sample_z_mean = np.zeros(model.z_mean.get_shape().as_list())
sample_z_stddev_log = np.zeros(model.z_stddev_log.get_shape().as_list())
sample_step = 0
while True:
_, loss_val, total_steps, summaries, z_mean, z_stddev_log = sess.run(
[model.train_op, model.loss, model.global_step,
model.summary_op,
model.z_mean, model.z_stddev_log])
sample_z_mean += z_mean
sample_z_stddev_log += z_stddev_log
total_loss += loss_val
step += 1
sample_step += 1
if step % 100 == 0:
summary_writer.add_summary(summaries, total_steps)
sys.stderr.write('step: %d, loss: %f\n' %
(total_steps, total_loss / step))
total_loss = 0.0
step = 0
# Sampled z is used for eval.
# It seems 10k is better than 1k. Maybe try 100k next?
if sample_step % 10000 == 0:
with tf.gfile.Open(os.path.join(FLAGS.log_root, 'z_mean.npy'), 'w') as f:
np.save(f, sample_z_mean / sample_step)
with tf.gfile.Open(
os.path.join(FLAGS.log_root, 'z_stddev_log.npy'), 'w') as f:
np.save(f, sample_z_stddev_log / sample_step)
sample_z_mean = np.zeros(model.z_mean.get_shape().as_list())
sample_z_stddev_log = np.zeros(
model.z_stddev_log.get_shape().as_list())
sample_step = 0
def main(_):
_Train()
if __name__ == '__main__':
tf.app.run()
# Real NVP in TensorFlow
*A Tensorflow implementation of the training procedure of*
[*Density estimation using Real NVP*](https://arxiv.org/abs/1605.08803)*, by
Laurent Dinh, Jascha Sohl-Dickstein and Samy Bengio, for Imagenet
(32x32 and 64x64), CelebA and LSUN Including the scripts to
put the datasets in `.tfrecords` format.*
We are happy to open source the code for *Real NVP*, a novel approach to
density estimation using deep neural networks that enables tractable density
estimation and efficient one-pass inference and sampling. This model
successfully decomposes images into hierarchical features ranging from
high-level concepts to low-resolution details. Visualizations are available
[here](http://goo.gl/yco14s).
## Installation
* python 2.7:
* python 3 support is not available yet
* pip (python package manager)
* `apt-get install python-pip` on Ubuntu
* `brew` installs pip along with python on OSX
* Install the dependencies for [LSUN](https://github.com/fyu/lsun.git)
* Install [OpenCV](http://opencv.org/)
* `pip install numpy lmdb`
* Install the python dependencies
* `pip install scipy scikit-image Pillow`
* Install the
[latest Tensorflow Pip package](https://www.tensorflow.org/get_started/os_setup.html#using-pip)
for Python 2.7
## Getting Started
Once you have successfully installed the dependencies, you can start by
downloading the repository:
```shell
git clone --recursive https://github.com/tensorflow/models.git
```
Afterward, you can use the utilities in this folder prepare the datasets.
## Preparing datasets
### CelebA
For [*CelebA*](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html), download
`img_align_celeba.zip` from the Dropbox link on this
[page](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) under the
link *Align&Cropped Images* in the *Img* directory and `list_eval_partition.txt`
under the link *Train/Val/Test Partitions* in the *Eval* directory. Then do:
```shell
mkdir celeba
cd celeba
unzip img_align_celeba.zip
```
We'll format the training subset:
```shell
python2.7 ../models/real_nvp/celeba_formatting.py \
--partition_fn list_eval_partition.txt \
--file_out celeba_train \
--fn_root img_align_celeba \
--set 0
```
Then the validation subset:
```shell
python2.7 ../models/real_nvp/celeba_formatting.py \
--partition_fn list_eval_partition.txt \
--file_out celeba_valid \
--fn_root img_align_celeba \
--set 1
```
And finally the test subset:
```shell
python2.7 ../models/real_nvp/celeba_formatting.py \
--partition_fn list_eval_partition.txt \
--file_out celeba_test \
--fn_root img_align_celeba \
--set 2
```
Afterward:
```shell
cd ..
```
### Small Imagenet
Downloading the [*small Imagenet*](http://image-net.org/small/download.php)
dataset is more straightforward and can be done
entirely in Shell:
```shell
mkdir small_imnet
cd small_imnet
for FILENAME in train_32x32.tar valid_32x32.tar train_64x64.tar valid_64x64.tar
do
curl -O http://image-net.org/small/$FILENAME
tar -xvf $FILENAME
done
```
Then, you can format the datasets as follow:
```shell
for DIRNAME in train_32x32 valid_32x32 train_64x64 valid_64x64
do
python2.7 ../models/real_nvp/imnet_formatting.py \
--file_out $DIRNAME \
--fn_root $DIRNAME
done
cd ..
```
### LSUN
To prepare the [*LSUN*](http://lsun.cs.princeton.edu/2016/) dataset, we will
need to use the code associated:
```shell
git clone https://github.com/fyu/lsun.git
cd lsun
```
Then we'll download the db files:
```shell
for CATEGORY in bedroom church_outdoor tower
do
python2.7 download.py -c $CATEGORY
unzip "$CATEGORY"_train_lmdb.zip
unzip "$CATEGORY"_val_lmdb.zip
python2.7 data.py export "$CATEGORY"_train_lmdb \
--out_dir "$CATEGORY"_train --flat
python2.7 data.py export "$CATEGORY"_val_lmdb \
--out_dir "$CATEGORY"_val --flat
done
```
Finally, we then format the dataset into `.tfrecords`:
```shell
for CATEGORY in bedroom church_outdoor tower
do
python2.7 ../models/real_nvp/lsun_formatting.py \
--file_out "$CATEGORY"_train \
--fn_root "$CATEGORY"_train
python2.7 ../models/real_nvp/lsun_formatting.py \
--file_out "$CATEGORY"_val \
--fn_root "$CATEGORY"_val
done
cd ..
```
## Training
We'll give an example on how to train a model on the small Imagenet
dataset (32x32):
```shell
cd models/real_nvp/
python2.7 real_nvp_multiscale_dataset.py \
--image_size 32 \
--hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset imnet \
--traindir /tmp/real_nvp_imnet32/train \
--logdir /tmp/real_nvp_imnet32/train \
--data_path ../../small_imnet/train_32x32_?????.tfrecords
```
In parallel, you can run the script to generate visualization from the model:
```shell
python2.7 real_nvp_multiscale_dataset.py \
--image_size 32 \
--hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset imnet \
--traindir /tmp/real_nvp_imnet32/train \
--logdir /tmp/real_nvp_imnet32/sample \
--data_path ../../small_imnet/valid_32x32_?????.tfrecords \
--mode sample
```
Additionally, you can also run in the script to evaluate the model on the
validation set:
```shell
python2.7 real_nvp_multiscale_dataset.py \
--image_size 32 \
--hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset imnet \
--traindir /tmp/real_nvp_imnet32/train \
--logdir /tmp/real_nvp_imnet32/eval \
--data_path ../../small_imnet/valid_32x32_?????.tfrecords \
--eval_set_size 50000
--mode eval
```
The visualizations and validation set evaluation can be seen through
[Tensorboard](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md).
Another example would be how to run the model on LSUN (bedroom category):
```shell
# train the model
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset lsun \
--traindir /tmp/real_nvp_church_outdoor/train \
--logdir /tmp/real_nvp_church_outdoor/train \
--data_path ../../lsun/church_outdoor_train_?????.tfrecords
```
```shell
# sample from the model
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset lsun \
--traindir /tmp/real_nvp_church_outdoor/train \
--logdir /tmp/real_nvp_church_outdoor/sample \
--data_path ../../lsun/church_outdoor_val_?????.tfrecords \
--mode sample
```
```shell
# evaluate the model
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset lsun \
--traindir /tmp/real_nvp_church_outdoor/train \
--logdir /tmp/real_nvp_church_outdoor/eval \
--data_path ../../lsun/church_outdoor_val_?????.tfrecords \
--eval_set_size 300
--mode eval
```
Finally, we'll give the commands to run the model on the CelebA dataset:
```shell
# train the model
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset lsun \
--traindir /tmp/real_nvp_celeba/train \
--logdir /tmp/real_nvp_celeba/train \
--data_path ../../celeba/celeba_train.tfrecords
```
```shell
# sample from the model
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset celeba \
--traindir /tmp/real_nvp_celeba/train \
--logdir /tmp/real_nvp_celeba/sample \
--data_path ../../celeba/celeba_valid.tfrecords \
--mode sample
```
```shell
# evaluate the model on validation set
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset celeba \
--traindir /tmp/real_nvp_celeba/train \
--logdir /tmp/real_nvp_celeba/eval_valid \
--data_path ../../celeba/celeba_valid.tfrecords \
--eval_set_size 19867
--mode eval
# evaluate the model on test set
python2.7 real_nvp_multiscale_dataset.py \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \
--dataset celeba \
--traindir /tmp/real_nvp_celeba/train \
--logdir /tmp/real_nvp_celeba/eval_test \
--data_path ../../celeba/celeba_test.tfrecords \
--eval_set_size 19962
--mode eval
```
## Credits
This code was written by Laurent Dinh
([@laurent-dinh](https://github.com/laurent-dinh)) with
the help of
Jascha Sohl-Dickstein ([@Sohl-Dickstein](https://github.com/Sohl-Dickstein)
and [jaschasd@google.com](mailto:jaschasd@google.com)),
Samy Bengio, Jon Shlens, Sherry Moore and
David Andersen.
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""CelebA dataset formating.
Download img_align_celeba.zip from
http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html under the
link "Align&Cropped Images" in the "Img" directory and list_eval_partition.txt
under the link "Train/Val/Test Partitions" in the "Eval" directory. Then do:
unzip img_align_celeba.zip
Use the script as follow:
python celeba_formatting.py \
--partition_fn [PARTITION_FILE_PATH] \
--file_out [OUTPUT_FILE_PATH_PREFIX] \
--fn_root [CELEBA_FOLDER] \
--set [SUBSET_INDEX]
"""
import os
import os.path
import scipy.io
import scipy.io.wavfile
import scipy.ndimage
import tensorflow as tf
tf.flags.DEFINE_string("file_out", "",
"Filename of the output .tfrecords file.")
tf.flags.DEFINE_string("fn_root", "", "Name of root file path.")
tf.flags.DEFINE_string("partition_fn", "", "Partition file path.")
tf.flags.DEFINE_string("set", "", "Name of subset.")
FLAGS = tf.flags.FLAGS
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def main():
"""Main converter function."""
# Celeb A
with open(FLAGS.partition_fn, "r") as infile:
img_fn_list = infile.readlines()
img_fn_list = [elem.strip().split() for elem in img_fn_list]
img_fn_list = [elem[0] for elem in img_fn_list if elem[1] == FLAGS.set]
fn_root = FLAGS.fn_root
num_examples = len(img_fn_list)
file_out = "%s.tfrecords" % FLAGS.file_out
writer = tf.python_io.TFRecordWriter(file_out)
for example_idx, img_fn in enumerate(img_fn_list):
if example_idx % 1000 == 0:
print example_idx, "/", num_examples
image_raw = scipy.ndimage.imread(os.path.join(fn_root, img_fn))
rows = image_raw.shape[0]
cols = image_raw.shape[1]
depth = image_raw.shape[2]
image_raw = image_raw.tostring()
example = tf.train.Example(
features=tf.train.Features(
feature={
"height": _int64_feature(rows),
"width": _int64_feature(cols),
"depth": _int64_feature(depth),
"image_raw": _bytes_feature(image_raw)
}
)
)
writer.write(example.SerializeToString())
writer.close()
if __name__ == "__main__":
main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""LSUN dataset formatting.
Download and format the Imagenet dataset as follow:
mkdir [IMAGENET_PATH]
cd [IMAGENET_PATH]
for FILENAME in train_32x32.tar valid_32x32.tar train_64x64.tar valid_64x64.tar
do
curl -O http://image-net.org/small/$FILENAME
tar -xvf $FILENAME
done
Then use the script as follow:
for DIRNAME in train_32x32 valid_32x32 train_64x64 valid_64x64
do
python imnet_formatting.py \
--file_out $DIRNAME \
--fn_root $DIRNAME
done
"""
import os
import os.path
import scipy.io
import scipy.io.wavfile
import scipy.ndimage
import tensorflow as tf
tf.flags.DEFINE_string("file_out", "",
"Filename of the output .tfrecords file.")
tf.flags.DEFINE_string("fn_root", "", "Name of root file path.")
FLAGS = tf.flags.FLAGS
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def main():
"""Main converter function."""
# LSUN
fn_root = FLAGS.fn_root
img_fn_list = os.listdir(fn_root)
img_fn_list = [img_fn for img_fn in img_fn_list
if img_fn.endswith('.png')]
num_examples = len(img_fn_list)
n_examples_per_file = 10000
for example_idx, img_fn in enumerate(img_fn_list):
if example_idx % n_examples_per_file == 0:
file_out = "%s_%05d.tfrecords"
file_out = file_out % (FLAGS.file_out,
example_idx // n_examples_per_file)
print "Writing on:", file_out
writer = tf.python_io.TFRecordWriter(file_out)
if example_idx % 1000 == 0:
print example_idx, "/", num_examples
image_raw = scipy.ndimage.imread(os.path.join(fn_root, img_fn))
rows = image_raw.shape[0]
cols = image_raw.shape[1]
depth = image_raw.shape[2]
image_raw = image_raw.astype("uint8")
image_raw = image_raw.tostring()
example = tf.train.Example(
features=tf.train.Features(
feature={
"height": _int64_feature(rows),
"width": _int64_feature(cols),
"depth": _int64_feature(depth),
"image_raw": _bytes_feature(image_raw)
}
)
)
writer.write(example.SerializeToString())
if example_idx % n_examples_per_file == (n_examples_per_file - 1):
writer.close()
writer.close()
if __name__ == "__main__":
main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""LSUN dataset formatting.
Download and format the LSUN dataset as follow:
git clone https://github.com/fyu/lsun.git
cd lsun
python2.7 download.py -c [CATEGORY]
Then unzip the downloaded .zip files before executing:
python2.7 data.py export [IMAGE_DB_PATH] --out_dir [LSUN_FOLDER] --flat
Then use the script as follow:
python lsun_formatting.py \
--file_out [OUTPUT_FILE_PATH_PREFIX] \
--fn_root [LSUN_FOLDER]
"""
import os
import os.path
import numpy
import skimage.transform
from PIL import Image
import tensorflow as tf
tf.flags.DEFINE_string("file_out", "",
"Filename of the output .tfrecords file.")
tf.flags.DEFINE_string("fn_root", "", "Name of root file path.")
FLAGS = tf.flags.FLAGS
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def main():
"""Main converter function."""
fn_root = FLAGS.fn_root
img_fn_list = os.listdir(fn_root)
img_fn_list = [img_fn for img_fn in img_fn_list
if img_fn.endswith('.webp')]
num_examples = len(img_fn_list)
n_examples_per_file = 10000
for example_idx, img_fn in enumerate(img_fn_list):
if example_idx % n_examples_per_file == 0:
file_out = "%s_%05d.tfrecords"
file_out = file_out % (FLAGS.file_out,
example_idx // n_examples_per_file)
print "Writing on:", file_out
writer = tf.python_io.TFRecordWriter(file_out)
if example_idx % 1000 == 0:
print example_idx, "/", num_examples
image_raw = numpy.array(Image.open(os.path.join(fn_root, img_fn)))
rows = image_raw.shape[0]
cols = image_raw.shape[1]
depth = image_raw.shape[2]
downscale = min(rows / 96., cols / 96.)
image_raw = skimage.transform.pyramid_reduce(image_raw, downscale)
image_raw *= 255.
image_raw = image_raw.astype("uint8")
rows = image_raw.shape[0]
cols = image_raw.shape[1]
depth = image_raw.shape[2]
image_raw = image_raw.tostring()
example = tf.train.Example(
features=tf.train.Features(
feature={
"height": _int64_feature(rows),
"width": _int64_feature(cols),
"depth": _int64_feature(depth),
"image_raw": _bytes_feature(image_raw)
}
)
)
writer.write(example.SerializeToString())
if example_idx % n_examples_per_file == (n_examples_per_file - 1):
writer.close()
writer.close()
if __name__ == "__main__":
main()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Script for training, evaluation and sampling for Real NVP.
$ python real_nvp_multiscale_dataset.py \
--alsologtostderr \
--image_size 64 \
--hpconfig=n_scale=5,base_dim=8 \
--dataset imnet \
--data_path [DATA_PATH]
"""
import time
from datetime import datetime
import os
import numpy
import tensorflow as tf
from tensorflow import gfile
from real_nvp_utils import (
batch_norm, batch_norm_log_diff, conv_layer,
squeeze_2x2, squeeze_2x2_ordered, standard_normal_ll,
standard_normal_sample, unsqueeze_2x2, variable_on_cpu)
tf.flags.DEFINE_string("master", "local",
"BNS name of the TensorFlow master, or local.")
tf.flags.DEFINE_string("logdir", "/tmp/real_nvp_multiscale",
"Directory to which writes logs.")
tf.flags.DEFINE_string("traindir", "/tmp/real_nvp_multiscale",
"Directory to which writes logs.")
tf.flags.DEFINE_integer("train_steps", 1000000000000000000,
"Number of steps to train for.")
tf.flags.DEFINE_string("data_path", "", "Path to the data.")
tf.flags.DEFINE_string("mode", "train",
"Mode of execution. Must be 'train', "
"'sample' or 'eval'.")
tf.flags.DEFINE_string("dataset", "imnet",
"Dataset used. Must be 'imnet', "
"'celeba' or 'lsun'.")
tf.flags.DEFINE_integer("recursion_type", 2,
"Type of the recursion.")
tf.flags.DEFINE_integer("image_size", 64,
"Size of the input image.")
tf.flags.DEFINE_integer("eval_set_size", 0,
"Size of evaluation dataset.")
tf.flags.DEFINE_string(
"hpconfig", "",
"A comma separated list of hyperparameters for the model. Format is "
"hp1=value1,hp2=value2,etc. If this FLAG is set, the model will be trained "
"with the specified hyperparameters, filling in missing hyperparameters "
"from the default_values in |hyper_params|.")
FLAGS = tf.flags.FLAGS
class HParams(object):
"""Dictionary of hyperparameters."""
def __init__(self, **kwargs):
self.dict_ = kwargs
self.__dict__.update(self.dict_)
def update_config(self, in_string):
"""Update the dictionary with a comma separated list."""
pairs = in_string.split(",")
pairs = [pair.split("=") for pair in pairs]
for key, val in pairs:
self.dict_[key] = type(self.dict_[key])(val)
self.__dict__.update(self.dict_)
return self
def __getitem__(self, key):
return self.dict_[key]
def __setitem__(self, key, val):
self.dict_[key] = val
self.__dict__.update(self.dict_)
def get_default_hparams():
"""Get the default hyperparameters."""
return HParams(
batch_size=64,
residual_blocks=2,
n_couplings=2,
n_scale=4,
learning_rate=0.001,
momentum=1e-1,
decay=1e-3,
l2_coeff=0.00005,
clip_gradient=100.,
optimizer="adam",
dropout_mask=0,
base_dim=32,
bottleneck=0,
use_batch_norm=1,
alternate=1,
use_aff=1,
skip=1,
data_constraint=.9,
n_opt=0)
# RESNET UTILS
def residual_block(input_, dim, name, use_batch_norm=True,
train=True, weight_norm=True, bottleneck=False):
"""Residual convolutional block."""
with tf.variable_scope(name):
res = input_
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_in", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
if bottleneck:
res = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim,
name="h_0", stddev=numpy.sqrt(2. / (dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim,
name="bn_0", scale=False, train=train,
epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim,
dim_out=dim, name="h_1", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None,
bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_1", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim,
name="out", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None,
bias=True, weight_norm=weight_norm, scale=True)
else:
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim,
name="h_0", stddev=numpy.sqrt(2. / (dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_0", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim,
name="out", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None,
bias=True, weight_norm=weight_norm, scale=True)
res += input_
return res
def resnet(input_, dim_in, dim, dim_out, name, use_batch_norm=True,
train=True, weight_norm=True, residual_blocks=5,
bottleneck=False, skip=True):
"""Residual convolutional network."""
with tf.variable_scope(name):
res = input_
if residual_blocks != 0:
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim_in, dim_out=dim,
name="h_in", stddev=numpy.sqrt(2. / (dim_in)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=False)
if skip:
out = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim,
name="skip_in", stddev=numpy.sqrt(2. / (dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=True)
# residual blocks
for idx_block in xrange(residual_blocks):
res = residual_block(res, dim, "block_%d" % idx_block,
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
bottleneck=bottleneck)
if skip:
out += conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim,
name="skip_%d" % idx_block, stddev=numpy.sqrt(2. / (dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=True)
# outputs
if skip:
res = out
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_pre_out", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim,
dim_out=dim_out,
name="out", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=True)
else:
if bottleneck:
res = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim_in, dim_out=dim,
name="h_0", stddev=numpy.sqrt(2. / (dim_in)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_0", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim,
dim_out=dim, name="h_1", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None,
bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_1", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim_out,
name="out", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=True)
else:
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim_in, dim_out=dim,
name="h_0", stddev=numpy.sqrt(2. / (dim_in)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=(not use_batch_norm),
weight_norm=weight_norm, scale=False)
if use_batch_norm:
res = batch_norm(
input_=res, dim=dim, name="bn_0", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.nn.relu(res)
res = conv_layer(
input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim_out,
name="out", stddev=numpy.sqrt(2. / (1. * dim)),
strides=[1, 1, 1, 1], padding="SAME",
nonlinearity=None, bias=True,
weight_norm=weight_norm, scale=True)
return res
# COUPLING LAYERS
# masked convolution implementations
def masked_conv_aff_coupling(input_, mask_in, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, use_width=1., use_height=1.,
mask_channel=0., skip=True):
"""Affine coupling with masked convolution."""
with tf.variable_scope(name) as scope:
if reverse or (not train):
scope.reuse_variables()
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
# build mask
mask = use_width * numpy.arange(width)
mask = use_height * numpy.arange(height).reshape((-1, 1)) + mask
mask = mask.astype("float32")
mask = tf.mod(mask_in + mask, 2)
mask = tf.reshape(mask, [-1, height, width, 1])
if mask.get_shape().as_list()[0] == 1:
mask = tf.tile(mask, [batch_size, 1, 1, 1])
res = input_ * tf.mod(mask_channel + mask, 2)
# initial input
if use_batch_norm:
res = batch_norm(
input_=res, dim=channels, name="bn_in", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res *= 2.
res = tf.concat_v2([res, -res], 3)
res = tf.concat_v2([res, mask], 3)
dim_in = 2. * channels + 1
res = tf.nn.relu(res)
res = resnet(input_=res, dim_in=dim_in, dim=dim,
dim_out=2 * channels,
name="resnet", use_batch_norm=use_batch_norm,
train=train, weight_norm=weight_norm,
residual_blocks=residual_blocks,
bottleneck=bottleneck, skip=skip)
mask = tf.mod(mask_channel + mask, 2)
res = tf.split(res, 2, 3)
shift, log_rescaling = res[-2], res[-1]
scale = variable_on_cpu(
"rescaling_scale", [],
tf.constant_initializer(0.))
shift = tf.reshape(
shift, [batch_size, height, width, channels])
log_rescaling = tf.reshape(
log_rescaling, [batch_size, height, width, channels])
log_rescaling = scale * tf.tanh(log_rescaling)
if not use_batch_norm:
scale_shift = variable_on_cpu(
"scale_shift", [],
tf.constant_initializer(0.))
log_rescaling += scale_shift
shift *= (1. - mask)
log_rescaling *= (1. - mask)
if reverse:
res = input_
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res * (1. - mask), dim=channels, name="bn_out",
train=False, epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res *= tf.exp(.5 * log_var * (1. - mask))
res += mean * (1. - mask)
res *= tf.exp(-log_rescaling)
res -= shift
log_diff = -log_rescaling
if use_batch_norm:
log_diff += .5 * log_var * (1. - mask)
else:
res = input_
res += shift
res *= tf.exp(log_rescaling)
log_diff = log_rescaling
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res * (1. - mask), dim=channels, name="bn_out",
train=train, epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res -= mean * (1. - mask)
res *= tf.exp(-.5 * log_var * (1. - mask))
log_diff -= .5 * log_var * (1. - mask)
return res, log_diff
def masked_conv_add_coupling(input_, mask_in, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, use_width=1., use_height=1.,
mask_channel=0., skip=True):
"""Additive coupling with masked convolution."""
with tf.variable_scope(name) as scope:
if reverse or (not train):
scope.reuse_variables()
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
# build mask
mask = use_width * numpy.arange(width)
mask = use_height * numpy.arange(height).reshape((-1, 1)) + mask
mask = mask.astype("float32")
mask = tf.mod(mask_in + mask, 2)
mask = tf.reshape(mask, [-1, height, width, 1])
if mask.get_shape().as_list()[0] == 1:
mask = tf.tile(mask, [batch_size, 1, 1, 1])
res = input_ * tf.mod(mask_channel + mask, 2)
# initial input
if use_batch_norm:
res = batch_norm(
input_=res, dim=channels, name="bn_in", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res *= 2.
res = tf.concat_v2([res, -res], 3)
res = tf.concat_v2([res, mask], 3)
dim_in = 2. * channels + 1
res = tf.nn.relu(res)
shift = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=channels,
name="resnet", use_batch_norm=use_batch_norm,
train=train, weight_norm=weight_norm,
residual_blocks=residual_blocks,
bottleneck=bottleneck, skip=skip)
mask = tf.mod(mask_channel + mask, 2)
shift *= (1. - mask)
# use_batch_norm = False
if reverse:
res = input_
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res * (1. - mask),
dim=channels, name="bn_out", train=False, epsilon=1e-4)
log_var = tf.log(var)
res *= tf.exp(.5 * log_var * (1. - mask))
res += mean * (1. - mask)
res -= shift
log_diff = tf.zeros_like(res)
if use_batch_norm:
log_diff += .5 * log_var * (1. - mask)
else:
res = input_
res += shift
log_diff = tf.zeros_like(res)
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res * (1. - mask), dim=channels,
name="bn_out", train=train, epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res -= mean * (1. - mask)
res *= tf.exp(-.5 * log_var * (1. - mask))
log_diff -= .5 * log_var * (1. - mask)
return res, log_diff
def masked_conv_coupling(input_, mask_in, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, use_aff=True,
use_width=1., use_height=1.,
mask_channel=0., skip=True):
"""Coupling with masked convolution."""
if use_aff:
return masked_conv_aff_coupling(
input_=input_, mask_in=mask_in, dim=dim, name=name,
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=reverse, residual_blocks=residual_blocks,
bottleneck=bottleneck, use_width=use_width, use_height=use_height,
mask_channel=mask_channel, skip=skip)
else:
return masked_conv_add_coupling(
input_=input_, mask_in=mask_in, dim=dim, name=name,
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=reverse, residual_blocks=residual_blocks,
bottleneck=bottleneck, use_width=use_width, use_height=use_height,
mask_channel=mask_channel, skip=skip)
# channel-axis splitting implementations
def conv_ch_aff_coupling(input_, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, change_bottom=True, skip=True):
"""Affine coupling with channel-wise splitting."""
with tf.variable_scope(name) as scope:
if reverse or (not train):
scope.reuse_variables()
if change_bottom:
input_, canvas = tf.split(input_, 2, 3)
else:
canvas, input_ = tf.split(input_, 2, 3)
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
res = input_
# initial input
if use_batch_norm:
res = batch_norm(
input_=res, dim=channels, name="bn_in", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.concat_v2([res, -res], 3)
dim_in = 2. * channels
res = tf.nn.relu(res)
res = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=2 * channels,
name="resnet", use_batch_norm=use_batch_norm,
train=train, weight_norm=weight_norm,
residual_blocks=residual_blocks,
bottleneck=bottleneck, skip=skip)
shift, log_rescaling = tf.split(res, 2, 3)
scale = variable_on_cpu(
"scale", [],
tf.constant_initializer(1.))
shift = tf.reshape(
shift, [batch_size, height, width, channels])
log_rescaling = tf.reshape(
log_rescaling, [batch_size, height, width, channels])
log_rescaling = scale * tf.tanh(log_rescaling)
if not use_batch_norm:
scale_shift = variable_on_cpu(
"scale_shift", [],
tf.constant_initializer(0.))
log_rescaling += scale_shift
if reverse:
res = canvas
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res, dim=channels, name="bn_out", train=False,
epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res *= tf.exp(.5 * log_var)
res += mean
res *= tf.exp(-log_rescaling)
res -= shift
log_diff = -log_rescaling
if use_batch_norm:
log_diff += .5 * log_var
else:
res = canvas
res += shift
res *= tf.exp(log_rescaling)
log_diff = log_rescaling
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res, dim=channels, name="bn_out", train=train,
epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res -= mean
res *= tf.exp(-.5 * log_var)
log_diff -= .5 * log_var
if change_bottom:
res = tf.concat_v2([input_, res], 3)
log_diff = tf.concat_v2([tf.zeros_like(log_diff), log_diff], 3)
else:
res = tf.concat_v2([res, input_], 3)
log_diff = tf.concat_v2([log_diff, tf.zeros_like(log_diff)], 3)
return res, log_diff
def conv_ch_add_coupling(input_, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, change_bottom=True, skip=True):
"""Additive coupling with channel-wise splitting."""
with tf.variable_scope(name) as scope:
if reverse or (not train):
scope.reuse_variables()
if change_bottom:
input_, canvas = tf.split(input_, 2, 3)
else:
canvas, input_ = tf.split(input_, 2, 3)
shape = input_.get_shape().as_list()
channels = shape[3]
res = input_
# initial input
if use_batch_norm:
res = batch_norm(
input_=res, dim=channels, name="bn_in", scale=False,
train=train, epsilon=1e-4, axes=[0, 1, 2])
res = tf.concat_v2([res, -res], 3)
dim_in = 2. * channels
res = tf.nn.relu(res)
shift = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=channels,
name="resnet", use_batch_norm=use_batch_norm,
train=train, weight_norm=weight_norm,
residual_blocks=residual_blocks,
bottleneck=bottleneck, skip=skip)
if reverse:
res = canvas
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res, dim=channels, name="bn_out", train=False,
epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res *= tf.exp(.5 * log_var)
res += mean
res -= shift
log_diff = tf.zeros_like(res)
if use_batch_norm:
log_diff += .5 * log_var
else:
res = canvas
res += shift
log_diff = tf.zeros_like(res)
if use_batch_norm:
mean, var = batch_norm_log_diff(
input_=res, dim=channels, name="bn_out", train=train,
epsilon=1e-4, axes=[0, 1, 2])
log_var = tf.log(var)
res -= mean
res *= tf.exp(-.5 * log_var)
log_diff -= .5 * log_var
if change_bottom:
res = tf.concat_v2([input_, res], 3)
log_diff = tf.concat_v2([tf.zeros_like(log_diff), log_diff], 3)
else:
res = tf.concat_v2([res, input_], 3)
log_diff = tf.concat_v2([log_diff, tf.zeros_like(log_diff)], 3)
return res, log_diff
def conv_ch_coupling(input_, dim, name,
use_batch_norm=True, train=True, weight_norm=True,
reverse=False, residual_blocks=5,
bottleneck=False, use_aff=True, change_bottom=True,
skip=True):
"""Coupling with channel-wise splitting."""
if use_aff:
return conv_ch_aff_coupling(
input_=input_, dim=dim, name=name,
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=reverse, residual_blocks=residual_blocks,
bottleneck=bottleneck, change_bottom=change_bottom, skip=skip)
else:
return conv_ch_add_coupling(
input_=input_, dim=dim, name=name,
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=reverse, residual_blocks=residual_blocks,
bottleneck=bottleneck, change_bottom=change_bottom, skip=skip)
# RECURSIVE USE OF COUPLING LAYERS
def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale,
use_batch_norm=True, weight_norm=True,
train=True):
"""Recursion on coupling layers."""
shape = input_.get_shape().as_list()
channels = shape[3]
residual_blocks = hps.residual_blocks
base_dim = hps.base_dim
mask = 1.
use_aff = hps.use_aff
res = input_
skip = hps.skip
log_diff = tf.zeros_like(input_)
dim = base_dim
if FLAGS.recursion_type < 4:
dim *= 2 ** scale_idx
with tf.variable_scope("scale_%d" % scale_idx):
# initial coupling layers
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=mask, dim=dim,
name="coupling_0",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=1. - mask, dim=dim,
name="coupling_1",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=mask, dim=dim,
name="coupling_2",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
if scale_idx < (n_scale - 1):
with tf.variable_scope("scale_%d" % scale_idx):
res = squeeze_2x2(res)
log_diff = squeeze_2x2(log_diff)
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=True, dim=2 * dim,
name="coupling_4",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=False, dim=2 * dim,
name="coupling_5",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=True, dim=2 * dim,
name="coupling_6",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True, skip=skip)
log_diff += inc_log_diff
res = unsqueeze_2x2(res)
log_diff = unsqueeze_2x2(log_diff)
if FLAGS.recursion_type > 1:
res = squeeze_2x2_ordered(res)
log_diff = squeeze_2x2_ordered(log_diff)
if FLAGS.recursion_type > 2:
res_1 = res[:, :, :, :channels]
res_2 = res[:, :, :, channels:]
log_diff_1 = log_diff[:, :, :, :channels]
log_diff_2 = log_diff[:, :, :, channels:]
else:
res_1, res_2 = tf.split(res, 2, 3)
log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
res_1, inc_log_diff = rec_masked_conv_coupling(
input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
res = tf.concat_v2([res_1, res_2], 3)
log_diff_1 += inc_log_diff
log_diff = tf.concat_v2([log_diff_1, log_diff_2], 3)
res = squeeze_2x2_ordered(res, reverse=True)
log_diff = squeeze_2x2_ordered(log_diff, reverse=True)
else:
res = squeeze_2x2_ordered(res)
log_diff = squeeze_2x2_ordered(log_diff)
res, inc_log_diff = rec_masked_conv_coupling(
input_=res, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
log_diff += inc_log_diff
res = squeeze_2x2_ordered(res, reverse=True)
log_diff = squeeze_2x2_ordered(log_diff, reverse=True)
else:
with tf.variable_scope("scale_%d" % scale_idx):
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=1. - mask, dim=dim,
name="coupling_3",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=False, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
return res, log_diff
def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale,
use_batch_norm=True, weight_norm=True,
train=True):
"""Recursion on inverting coupling layers."""
shape = input_.get_shape().as_list()
channels = shape[3]
residual_blocks = hps.residual_blocks
base_dim = hps.base_dim
mask = 1.
use_aff = hps.use_aff
res = input_
log_diff = tf.zeros_like(input_)
skip = hps.skip
dim = base_dim
if FLAGS.recursion_type < 4:
dim *= 2 ** scale_idx
if scale_idx < (n_scale - 1):
if FLAGS.recursion_type > 1:
res = squeeze_2x2_ordered(res)
log_diff = squeeze_2x2_ordered(log_diff)
if FLAGS.recursion_type > 2:
res_1 = res[:, :, :, :channels]
res_2 = res[:, :, :, channels:]
log_diff_1 = log_diff[:, :, :, :channels]
log_diff_2 = log_diff[:, :, :, channels:]
else:
res_1, res_2 = tf.split(res, 2, 3)
log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
res_1, log_diff_1 = rec_masked_deconv_coupling(
input_=res_1, hps=hps,
scale_idx=scale_idx + 1, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
res = tf.concat_v2([res_1, res_2], 3)
log_diff = tf.concat_v2([log_diff_1, log_diff_2], 3)
res = squeeze_2x2_ordered(res, reverse=True)
log_diff = squeeze_2x2_ordered(log_diff, reverse=True)
else:
res = squeeze_2x2_ordered(res)
log_diff = squeeze_2x2_ordered(log_diff)
res, log_diff = rec_masked_deconv_coupling(
input_=res, hps=hps,
scale_idx=scale_idx + 1, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
res = squeeze_2x2_ordered(res, reverse=True)
log_diff = squeeze_2x2_ordered(log_diff, reverse=True)
with tf.variable_scope("scale_%d" % scale_idx):
res = squeeze_2x2(res)
log_diff = squeeze_2x2(log_diff)
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=True, dim=2 * dim,
name="coupling_6",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True, skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=False, dim=2 * dim,
name="coupling_5",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = conv_ch_coupling(
input_=res,
change_bottom=True, dim=2 * dim,
name="coupling_4",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip)
log_diff += inc_log_diff
res = unsqueeze_2x2(res)
log_diff = unsqueeze_2x2(log_diff)
else:
with tf.variable_scope("scale_%d" % scale_idx):
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=1. - mask, dim=dim,
name="coupling_3",
use_batch_norm=use_batch_norm, train=train,
weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
with tf.variable_scope("scale_%d" % scale_idx):
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=mask, dim=dim,
name="coupling_2",
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=True,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=1. - mask, dim=dim,
name="coupling_1",
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
res, inc_log_diff = masked_conv_coupling(
input_=res,
mask_in=mask, dim=dim,
name="coupling_0",
use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm,
reverse=True, residual_blocks=residual_blocks,
bottleneck=hps.bottleneck, use_aff=use_aff,
use_width=1., use_height=1., skip=skip)
log_diff += inc_log_diff
return res, log_diff
# ENCODER AND DECODER IMPLEMENTATIONS
# start the recursions
def encoder(input_, hps, n_scale, use_batch_norm=True,
weight_norm=True, train=True):
"""Encoding/gaussianization function."""
res = input_
log_diff = tf.zeros_like(input_)
res, inc_log_diff = rec_masked_conv_coupling(
input_=res, hps=hps, scale_idx=0, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
log_diff += inc_log_diff
return res, log_diff
def decoder(input_, hps, n_scale, use_batch_norm=True,
weight_norm=True, train=True):
"""Decoding/generator function."""
res, log_diff = rec_masked_deconv_coupling(
input_=input_, hps=hps, scale_idx=0, n_scale=n_scale,
use_batch_norm=use_batch_norm, weight_norm=weight_norm,
train=train)
return res, log_diff
class RealNVP(object):
"""Real NVP model."""
def __init__(self, hps, sampling=False):
# DATA TENSOR INSTANTIATION
device = "/cpu:0"
if FLAGS.dataset == "imnet":
with tf.device(
tf.train.replica_device_setter(0, worker_device=device)):
filename_queue = tf.train.string_input_producer(
gfile.Glob(FLAGS.data_path), num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
"image_raw": tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features["image_raw"], tf.uint8)
image.set_shape([FLAGS.image_size * FLAGS.image_size * 3])
image = tf.cast(image, tf.float32)
if FLAGS.mode == "train":
images = tf.train.shuffle_batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
else:
images = tf.train.batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size)
self.x_orig = x_orig = images
image_size = FLAGS.image_size
x_in = tf.reshape(
x_orig,
[hps.batch_size, FLAGS.image_size, FLAGS.image_size, 3])
x_in = tf.clip_by_value(x_in, 0, 255)
x_in = (tf.cast(x_in, tf.float32)
+ tf.random_uniform(tf.shape(x_in))) / 256.
elif FLAGS.dataset == "celeba":
with tf.device(
tf.train.replica_device_setter(0, worker_device=device)):
filename_queue = tf.train.string_input_producer(
gfile.Glob(FLAGS.data_path), num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
"image_raw": tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(features["image_raw"], tf.uint8)
image.set_shape([218 * 178 * 3]) # 218, 178
image = tf.cast(image, tf.float32)
image = tf.reshape(image, [218, 178, 3])
image = image[40:188, 15:163, :]
if FLAGS.mode == "train":
image = tf.image.random_flip_left_right(image)
images = tf.train.shuffle_batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size,
min_after_dequeue=1000)
else:
images = tf.train.batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size)
self.x_orig = x_orig = images
image_size = 64
x_in = tf.reshape(x_orig, [hps.batch_size, 148, 148, 3])
x_in = tf.image.resize_images(
x_in, [64, 64], method=0, align_corners=False)
x_in = (tf.cast(x_in, tf.float32)
+ tf.random_uniform(tf.shape(x_in))) / 256.
elif FLAGS.dataset == "lsun":
with tf.device(
tf.train.replica_device_setter(0, worker_device=device)):
filename_queue = tf.train.string_input_producer(
gfile.Glob(FLAGS.data_path), num_epochs=None)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
"image_raw": tf.FixedLenFeature([], tf.string),
"height": tf.FixedLenFeature([], tf.int64),
"width": tf.FixedLenFeature([], tf.int64),
"depth": tf.FixedLenFeature([], tf.int64)
})
image = tf.decode_raw(features["image_raw"], tf.uint8)
height = tf.reshape((features["height"], tf.int64)[0], [1])
height = tf.cast(height, tf.int32)
width = tf.reshape((features["width"], tf.int64)[0], [1])
width = tf.cast(width, tf.int32)
depth = tf.reshape((features["depth"], tf.int64)[0], [1])
depth = tf.cast(depth, tf.int32)
image = tf.reshape(image, tf.concat_v2([height, width, depth], 0))
image = tf.random_crop(image, [64, 64, 3])
if FLAGS.mode == "train":
image = tf.image.random_flip_left_right(image)
images = tf.train.shuffle_batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
else:
images = tf.train.batch(
[image], batch_size=hps.batch_size, num_threads=1,
capacity=1000 + 3 * hps.batch_size)
self.x_orig = x_orig = images
image_size = 64
x_in = tf.reshape(x_orig, [hps.batch_size, 64, 64, 3])
x_in = (tf.cast(x_in, tf.float32)
+ tf.random_uniform(tf.shape(x_in))) / 256.
else:
raise ValueError("Unknown dataset.")
x_in = tf.reshape(x_in, [hps.batch_size, image_size, image_size, 3])
side_shown = int(numpy.sqrt(hps.batch_size))
shown_x = tf.transpose(
tf.reshape(
x_in[:(side_shown * side_shown), :, :, :],
[side_shown, image_size * side_shown, image_size, 3]),
[0, 2, 1, 3])
shown_x = tf.transpose(
tf.reshape(
shown_x,
[1, image_size * side_shown, image_size * side_shown, 3]),
[0, 2, 1, 3]) * 255.
tf.summary.image(
"inputs",
tf.cast(shown_x, tf.uint8),
max_outputs=1)
# restrict the data
FLAGS.image_size = image_size
data_constraint = hps.data_constraint
pre_logit_scale = numpy.log(data_constraint)
pre_logit_scale -= numpy.log(1. - data_constraint)
pre_logit_scale = tf.cast(pre_logit_scale, tf.float32)
logit_x_in = 2. * x_in # [0, 2]
logit_x_in -= 1. # [-1, 1]
logit_x_in *= data_constraint # [-.9, .9]
logit_x_in += 1. # [.1, 1.9]
logit_x_in /= 2. # [.05, .95]
# logit the data
logit_x_in = tf.log(logit_x_in) - tf.log(1. - logit_x_in)
transform_cost = tf.reduce_sum(
tf.nn.softplus(logit_x_in) + tf.nn.softplus(-logit_x_in)
- tf.nn.softplus(-pre_logit_scale),
[1, 2, 3])
# INFERENCE AND COSTS
z_out, log_diff = encoder(
input_=logit_x_in, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=True)
if FLAGS.mode != "train":
z_out, log_diff = encoder(
input_=logit_x_in, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
final_shape = [image_size, image_size, 3]
prior_ll = standard_normal_ll(z_out)
prior_ll = tf.reduce_sum(prior_ll, [1, 2, 3])
log_diff = tf.reduce_sum(log_diff, [1, 2, 3])
log_diff += transform_cost
cost = -(prior_ll + log_diff)
self.x_in = x_in
self.z_out = z_out
self.cost = cost = tf.reduce_mean(cost)
l2_reg = sum(
[tf.reduce_sum(tf.square(v)) for v in tf.trainable_variables()
if ("magnitude" in v.name) or ("rescaling_scale" in v.name)])
bit_per_dim = ((cost + numpy.log(256.) * image_size * image_size * 3.)
/ (image_size * image_size * 3. * numpy.log(2.)))
self.bit_per_dim = bit_per_dim
# OPTIMIZATION
momentum = 1. - hps.momentum
decay = 1. - hps.decay
if hps.optimizer == "adam":
optimizer = tf.train.AdamOptimizer(
learning_rate=hps.learning_rate,
beta1=momentum, beta2=decay, epsilon=1e-08,
use_locking=False, name="Adam")
elif hps.optimizer == "rmsprop":
optimizer = tf.train.RMSPropOptimizer(
learning_rate=hps.learning_rate, decay=decay,
momentum=momentum, epsilon=1e-04,
use_locking=False, name="RMSProp")
else:
optimizer = tf.train.MomentumOptimizer(hps.learning_rate,
momentum=momentum)
step = tf.get_variable(
"global_step", [], tf.int64,
tf.zeros_initializer(),
trainable=False)
self.step = step
grads_and_vars = optimizer.compute_gradients(
cost + hps.l2_coeff * l2_reg,
tf.trainable_variables())
grads, vars_ = zip(*grads_and_vars)
capped_grads, gradient_norm = tf.clip_by_global_norm(
grads, clip_norm=hps.clip_gradient)
gradient_norm = tf.check_numerics(gradient_norm,
"Gradient norm is NaN or Inf.")
l2_z = tf.reduce_sum(tf.square(z_out), [1, 2, 3])
if not sampling:
tf.summary.scalar("negative_log_likelihood", tf.reshape(cost, []))
tf.summary.scalar("gradient_norm", tf.reshape(gradient_norm, []))
tf.summary.scalar("bit_per_dim", tf.reshape(bit_per_dim, []))
tf.summary.scalar("log_diff", tf.reshape(tf.reduce_mean(log_diff), []))
tf.summary.scalar("prior_ll", tf.reshape(tf.reduce_mean(prior_ll), []))
tf.summary.scalar(
"log_diff_var",
tf.reshape(tf.reduce_mean(tf.square(log_diff))
- tf.square(tf.reduce_mean(log_diff)), []))
tf.summary.scalar(
"prior_ll_var",
tf.reshape(tf.reduce_mean(tf.square(prior_ll))
- tf.square(tf.reduce_mean(prior_ll)), []))
tf.summary.scalar("l2_z_mean", tf.reshape(tf.reduce_mean(l2_z), []))
tf.summary.scalar(
"l2_z_var",
tf.reshape(tf.reduce_mean(tf.square(l2_z))
- tf.square(tf.reduce_mean(l2_z)), []))
capped_grads_and_vars = zip(capped_grads, vars_)
self.train_step = optimizer.apply_gradients(
capped_grads_and_vars, global_step=step)
# SAMPLING AND VISUALIZATION
if sampling:
# SAMPLES
sample = standard_normal_sample([100] + final_shape)
sample, _ = decoder(
input_=sample, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=True)
sample = tf.nn.sigmoid(sample)
sample = tf.clip_by_value(sample, 0, 1) * 255.
sample = tf.reshape(sample, [100, image_size, image_size, 3])
sample = tf.transpose(
tf.reshape(sample, [10, image_size * 10, image_size, 3]),
[0, 2, 1, 3])
sample = tf.transpose(
tf.reshape(sample, [1, image_size * 10, image_size * 10, 3]),
[0, 2, 1, 3])
tf.summary.image(
"samples",
tf.cast(sample, tf.uint8),
max_outputs=1)
# CONCATENATION
concatenation, _ = encoder(
input_=logit_x_in, hps=hps,
n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
concatenation = tf.reshape(
concatenation,
[(side_shown * side_shown), image_size, image_size, 3])
concatenation = tf.transpose(
tf.reshape(
concatenation,
[side_shown, image_size * side_shown, image_size, 3]),
[0, 2, 1, 3])
concatenation = tf.transpose(
tf.reshape(
concatenation,
[1, image_size * side_shown, image_size * side_shown, 3]),
[0, 2, 1, 3])
concatenation, _ = decoder(
input_=concatenation, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
concatenation = tf.nn.sigmoid(concatenation) * 255.
tf.summary.image(
"concatenation",
tf.cast(concatenation, tf.uint8),
max_outputs=1)
# MANIFOLD
# Data basis
z_u, _ = encoder(
input_=logit_x_in[:8, :, :, :], hps=hps,
n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
u_1 = tf.reshape(z_u[0, :, :, :], [-1])
u_2 = tf.reshape(z_u[1, :, :, :], [-1])
u_3 = tf.reshape(z_u[2, :, :, :], [-1])
u_4 = tf.reshape(z_u[3, :, :, :], [-1])
u_5 = tf.reshape(z_u[4, :, :, :], [-1])
u_6 = tf.reshape(z_u[5, :, :, :], [-1])
u_7 = tf.reshape(z_u[6, :, :, :], [-1])
u_8 = tf.reshape(z_u[7, :, :, :], [-1])
# 3D dome
manifold_side = 8
angle_1 = numpy.arange(manifold_side) * 1. / manifold_side
angle_2 = numpy.arange(manifold_side) * 1. / manifold_side
angle_1 *= 2. * numpy.pi
angle_2 *= 2. * numpy.pi
angle_1 = angle_1.astype("float32")
angle_2 = angle_2.astype("float32")
angle_1 = tf.reshape(angle_1, [1, -1, 1])
angle_1 += tf.zeros([manifold_side, manifold_side, 1])
angle_2 = tf.reshape(angle_2, [-1, 1, 1])
angle_2 += tf.zeros([manifold_side, manifold_side, 1])
n_angle_3 = 40
angle_3 = numpy.arange(n_angle_3) * 1. / n_angle_3
angle_3 *= 2 * numpy.pi
angle_3 = angle_3.astype("float32")
angle_3 = tf.reshape(angle_3, [-1, 1, 1, 1])
angle_3 += tf.zeros([n_angle_3, manifold_side, manifold_side, 1])
manifold = tf.cos(angle_1) * (
tf.cos(angle_2) * (
tf.cos(angle_3) * u_1 + tf.sin(angle_3) * u_2)
+ tf.sin(angle_2) * (
tf.cos(angle_3) * u_3 + tf.sin(angle_3) * u_4))
manifold += tf.sin(angle_1) * (
tf.cos(angle_2) * (
tf.cos(angle_3) * u_5 + tf.sin(angle_3) * u_6)
+ tf.sin(angle_2) * (
tf.cos(angle_3) * u_7 + tf.sin(angle_3) * u_8))
manifold = tf.reshape(
manifold,
[n_angle_3 * manifold_side * manifold_side] + final_shape)
manifold, _ = decoder(
input_=manifold, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
manifold = tf.nn.sigmoid(manifold)
manifold = tf.clip_by_value(manifold, 0, 1) * 255.
manifold = tf.reshape(
manifold,
[n_angle_3,
manifold_side * manifold_side,
image_size,
image_size,
3])
manifold = tf.transpose(
tf.reshape(
manifold,
[n_angle_3, manifold_side,
image_size * manifold_side, image_size, 3]), [0, 1, 3, 2, 4])
manifold = tf.transpose(
tf.reshape(
manifold,
[n_angle_3, image_size * manifold_side,
image_size * manifold_side, 3]),
[0, 2, 1, 3])
manifold = tf.transpose(manifold, [1, 2, 0, 3])
manifold = tf.reshape(
manifold,
[1, image_size * manifold_side,
image_size * manifold_side, 3 * n_angle_3])
tf.summary.image(
"manifold",
tf.cast(manifold[:, :, :, :3], tf.uint8),
max_outputs=1)
# COMPRESSION
z_complete, _ = encoder(
input_=logit_x_in[:hps.n_scale, :, :, :], hps=hps,
n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
z_compressed_list = [z_complete]
z_noisy_list = [z_complete]
z_lost = z_complete
for scale_idx in xrange(hps.n_scale - 1):
z_lost = squeeze_2x2_ordered(z_lost)
z_lost, _ = tf.split(z_lost, 2, 3)
z_compressed = z_lost
z_noisy = z_lost
for _ in xrange(scale_idx + 1):
z_compressed = tf.concat_v2(
[z_compressed, tf.zeros_like(z_compressed)], 3)
z_compressed = squeeze_2x2_ordered(
z_compressed, reverse=True)
z_noisy = tf.concat_v2(
[z_noisy, tf.random_normal(
z_noisy.get_shape().as_list())], 3)
z_noisy = squeeze_2x2_ordered(z_noisy, reverse=True)
z_compressed_list.append(z_compressed)
z_noisy_list.append(z_noisy)
self.z_reduced = z_lost
z_compressed = tf.concat_v2(z_compressed_list, 0)
z_noisy = tf.concat_v2(z_noisy_list, 0)
noisy_images, _ = decoder(
input_=z_noisy, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
compressed_images, _ = decoder(
input_=z_compressed, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=False)
noisy_images = tf.nn.sigmoid(noisy_images)
compressed_images = tf.nn.sigmoid(compressed_images)
noisy_images = tf.clip_by_value(noisy_images, 0, 1) * 255.
noisy_images = tf.reshape(
noisy_images,
[(hps.n_scale * hps.n_scale), image_size, image_size, 3])
noisy_images = tf.transpose(
tf.reshape(
noisy_images,
[hps.n_scale, image_size * hps.n_scale, image_size, 3]),
[0, 2, 1, 3])
noisy_images = tf.transpose(
tf.reshape(
noisy_images,
[1, image_size * hps.n_scale, image_size * hps.n_scale, 3]),
[0, 2, 1, 3])
tf.summary.image(
"noise",
tf.cast(noisy_images, tf.uint8),
max_outputs=1)
compressed_images = tf.clip_by_value(compressed_images, 0, 1) * 255.
compressed_images = tf.reshape(
compressed_images,
[(hps.n_scale * hps.n_scale), image_size, image_size, 3])
compressed_images = tf.transpose(
tf.reshape(
compressed_images,
[hps.n_scale, image_size * hps.n_scale, image_size, 3]),
[0, 2, 1, 3])
compressed_images = tf.transpose(
tf.reshape(
compressed_images,
[1, image_size * hps.n_scale, image_size * hps.n_scale, 3]),
[0, 2, 1, 3])
tf.summary.image(
"compression",
tf.cast(compressed_images, tf.uint8),
max_outputs=1)
# SAMPLES x2
final_shape[0] *= 2
final_shape[1] *= 2
big_sample = standard_normal_sample([25] + final_shape)
big_sample, _ = decoder(
input_=big_sample, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=True)
big_sample = tf.nn.sigmoid(big_sample)
big_sample = tf.clip_by_value(big_sample, 0, 1) * 255.
big_sample = tf.reshape(
big_sample,
[25, image_size * 2, image_size * 2, 3])
big_sample = tf.transpose(
tf.reshape(
big_sample,
[5, image_size * 10, image_size * 2, 3]), [0, 2, 1, 3])
big_sample = tf.transpose(
tf.reshape(
big_sample,
[1, image_size * 10, image_size * 10, 3]),
[0, 2, 1, 3])
tf.summary.image(
"big_sample",
tf.cast(big_sample, tf.uint8),
max_outputs=1)
# SAMPLES x10
final_shape[0] *= 5
final_shape[1] *= 5
extra_large = standard_normal_sample([1] + final_shape)
extra_large, _ = decoder(
input_=extra_large, hps=hps, n_scale=hps.n_scale,
use_batch_norm=hps.use_batch_norm, weight_norm=True,
train=True)
extra_large = tf.nn.sigmoid(extra_large)
extra_large = tf.clip_by_value(extra_large, 0, 1) * 255.
tf.summary.image(
"extra_large",
tf.cast(extra_large, tf.uint8),
max_outputs=1)
def eval_epoch(self, hps):
"""Evaluate bits/dim."""
n_eval_dict = {
"imnet": 50000,
"lsun": 300,
"celeba": 19962,
"svhn": 26032,
}
if FLAGS.eval_set_size == 0:
num_examples_eval = n_eval_dict[FLAGS.dataset]
else:
num_examples_eval = FLAGS.eval_set_size
n_epoch = num_examples_eval / hps.batch_size
eval_costs = []
bar_len = 70
for epoch_idx in xrange(n_epoch):
n_equal = epoch_idx * bar_len * 1. / n_epoch
n_equal = numpy.ceil(n_equal)
n_equal = int(n_equal)
n_dash = bar_len - n_equal
progress_bar = "[" + "=" * n_equal + "-" * n_dash + "]\r"
print progress_bar,
cost = self.bit_per_dim.eval()
eval_costs.append(cost)
print ""
return float(numpy.mean(eval_costs))
def train_model(hps, logdir):
"""Training."""
with tf.Graph().as_default():
with tf.device(tf.train.replica_device_setter(0)):
with tf.variable_scope("model"):
model = RealNVP(hps)
saver = tf.train.Saver(tf.global_variables())
# Build the summary operation from the last tower summaries.
summary_op = tf.summary.merge_all()
# Build an initialization operation to run below.
init = tf.global_variables_initializer()
# Start running operations on the Graph. allow_soft_placement must be set to
# True to build towers on GPU, as some of the ops do not have GPU
# implementations.
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=True))
sess.run(init)
ckpt_state = tf.train.get_checkpoint_state(logdir)
if ckpt_state and ckpt_state.model_checkpoint_path:
print "Loading file %s" % ckpt_state.model_checkpoint_path
saver.restore(sess, ckpt_state.model_checkpoint_path)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.summary.FileWriter(
logdir,
graph=sess.graph)
local_step = 0
while True:
fetches = [model.step, model.bit_per_dim, model.train_step]
# The chief worker evaluates the summaries every 10 steps.
should_eval_summaries = local_step % 100 == 0
if should_eval_summaries:
fetches += [summary_op]
start_time = time.time()
outputs = sess.run(fetches)
global_step_val = outputs[0]
loss = outputs[1]
duration = time.time() - start_time
assert not numpy.isnan(
loss), 'Model diverged with loss = NaN'
if local_step % 10 == 0:
examples_per_sec = hps.batch_size / float(duration)
format_str = ('%s: step %d, loss = %.2f '
'(%.1f examples/sec; %.3f '
'sec/batch)')
print format_str % (datetime.now(), global_step_val, loss,
examples_per_sec, duration)
if should_eval_summaries:
summary_str = outputs[-1]
summary_writer.add_summary(summary_str, global_step_val)
# Save the model checkpoint periodically.
if local_step % 1000 == 0 or (local_step + 1) == FLAGS.train_steps:
checkpoint_path = os.path.join(logdir, 'model.ckpt')
saver.save(
sess,
checkpoint_path,
global_step=global_step_val)
if outputs[0] >= FLAGS.train_steps:
break
local_step += 1
def evaluate(hps, logdir, traindir, subset="valid", return_val=False):
"""Evaluation."""
hps.batch_size = 100
with tf.Graph().as_default():
with tf.device("/cpu:0"):
with tf.variable_scope("model") as var_scope:
eval_model = RealNVP(hps)
summary_writer = tf.summary.FileWriter(logdir)
var_scope.reuse_variables()
saver = tf.train.Saver()
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=True))
tf.train.start_queue_runners(sess)
previous_global_step = 0 # don"t run eval for step = 0
with sess.as_default():
while True:
ckpt_state = tf.train.get_checkpoint_state(traindir)
if not (ckpt_state and ckpt_state.model_checkpoint_path):
print "No model to eval yet at %s" % traindir
time.sleep(30)
continue
print "Loading file %s" % ckpt_state.model_checkpoint_path
saver.restore(sess, ckpt_state.model_checkpoint_path)
current_step = tf.train.global_step(sess, eval_model.step)
if current_step == previous_global_step:
print "Waiting for the checkpoint to be updated."
time.sleep(30)
continue
previous_global_step = current_step
print "Evaluating..."
bit_per_dim = eval_model.eval_epoch(hps)
print ("Epoch: %d, %s -> %.3f bits/dim"
% (current_step, subset, bit_per_dim))
print "Writing summary..."
summary = tf.Summary()
summary.value.extend(
[tf.Summary.Value(
tag="bit_per_dim",
simple_value=bit_per_dim)])
summary_writer.add_summary(summary, current_step)
if return_val:
return current_step, bit_per_dim
def sample_from_model(hps, logdir, traindir):
"""Sampling."""
hps.batch_size = 100
with tf.Graph().as_default():
with tf.device("/cpu:0"):
with tf.variable_scope("model") as var_scope:
eval_model = RealNVP(hps, sampling=True)
summary_writer = tf.summary.FileWriter(logdir)
var_scope.reuse_variables()
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=True))
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
previous_global_step = 0 # don"t run eval for step = 0
initialized = False
with sess.as_default():
while True:
ckpt_state = tf.train.get_checkpoint_state(traindir)
if not (ckpt_state and ckpt_state.model_checkpoint_path):
if not initialized:
print "No model to eval yet at %s" % traindir
time.sleep(30)
continue
else:
print ("Loading file %s"
% ckpt_state.model_checkpoint_path)
saver.restore(sess, ckpt_state.model_checkpoint_path)
current_step = tf.train.global_step(sess, eval_model.step)
if current_step == previous_global_step:
print "Waiting for the checkpoint to be updated."
time.sleep(30)
continue
previous_global_step = current_step
fetches = [summary_op]
outputs = sess.run(fetches)
summary_writer.add_summary(outputs[0], current_step)
coord.request_stop()
coord.join(threads)
def main(unused_argv):
hps = get_default_hparams().update_config(FLAGS.hpconfig)
if FLAGS.mode == "train":
train_model(hps=hps, logdir=FLAGS.logdir)
elif FLAGS.mode == "sample":
sample_from_model(hps=hps, logdir=FLAGS.logdir,
traindir=FLAGS.traindir)
else:
hps.batch_size = 100
evaluate(hps=hps, logdir=FLAGS.logdir,
traindir=FLAGS.traindir, subset=FLAGS.mode)
if __name__ == "__main__":
tf.app.run()
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Utility functions for Real NVP.
"""
# pylint: disable=dangerous-default-value
import numpy
import tensorflow as tf
from tensorflow.python.framework import ops
DEFAULT_BN_LAG = .0
def stable_var(input_, mean=None, axes=[0]):
"""Numerically more stable variance computation."""
if mean is None:
mean = tf.reduce_mean(input_, axes)
res = tf.square(input_ - mean)
max_sqr = tf.reduce_max(res, axes)
res /= max_sqr
res = tf.reduce_mean(res, axes)
res *= max_sqr
return res
def variable_on_cpu(name, shape, initializer, trainable=True):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
trainable: boolean defining if the variable is for training
Returns:
Variable Tensor
"""
var = tf.get_variable(
name, shape, initializer=initializer, trainable=trainable)
return var
# layers
def conv_layer(input_,
filter_size,
dim_in,
dim_out,
name,
stddev=1e-2,
strides=[1, 1, 1, 1],
padding="SAME",
nonlinearity=None,
bias=False,
weight_norm=False,
scale=False):
"""Convolutional layer."""
with tf.variable_scope(name) as scope:
weights = variable_on_cpu(
"weights",
filter_size + [dim_in, dim_out],
tf.random_uniform_initializer(
minval=-stddev, maxval=stddev))
# weight normalization
if weight_norm:
weights /= tf.sqrt(tf.reduce_sum(tf.square(weights), [0, 1, 2]))
if scale:
magnitude = variable_on_cpu(
"magnitude", [dim_out],
tf.constant_initializer(
stddev * numpy.sqrt(dim_in * numpy.prod(filter_size) / 12.)))
weights *= magnitude
res = input_
# handling filter size bigger than image size
if hasattr(input_, "shape"):
if input_.get_shape().as_list()[1] < filter_size[0]:
pad_1 = tf.zeros([
input_.get_shape().as_list()[0],
filter_size[0] - input_.get_shape().as_list()[1],
input_.get_shape().as_list()[2],
input_.get_shape().as_list()[3]
])
pad_2 = tf.zeros([
input_.get_shape().as_list[0],
filter_size[0],
filter_size[1] - input_.get_shape().as_list()[2],
input_.get_shape().as_list()[3]
])
res = tf.concat(1, [pad_1, res])
res = tf.concat(2, [pad_2, res])
res = tf.nn.conv2d(
input=res,
filter=weights,
strides=strides,
padding=padding,
name=scope.name)
if hasattr(input_, "shape"):
if input_.get_shape().as_list()[1] < filter_size[0]:
res = tf.slice(res, [
0, filter_size[0] - input_.get_shape().as_list()[1],
filter_size[1] - input_.get_shape().as_list()[2], 0
], [-1, -1, -1, -1])
if bias:
biases = variable_on_cpu("biases", [dim_out], tf.constant_initializer(0.))
res = tf.nn.bias_add(res, biases)
if nonlinearity is not None:
res = nonlinearity(res)
return res
def max_pool_2x2(input_):
"""Max pooling."""
return tf.nn.max_pool(
input_, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def depool_2x2(input_, stride=2):
"""Depooling."""
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
res = tf.concat(
2, [res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
res = tf.concat(4, [
res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
])
res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])
return res
# random flip on a batch of images
def batch_random_flip(input_):
"""Simultaneous horizontal random flip."""
if isinstance(input_, (float, int)):
return input_
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
res = tf.split(0, batch_size, input_)
res = [elem[0, :, :, :] for elem in res]
res = [tf.image.random_flip_left_right(elem) for elem in res]
res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
res = tf.concat(0, res)
return res
# build a one hot representation corresponding to the integer tensor
# the one-hot dimension is appended to the integer tensor shape
def as_one_hot(input_, n_indices):
"""Convert indices to one-hot."""
shape = input_.get_shape().as_list()
n_elem = numpy.prod(shape)
indices = tf.range(n_elem)
indices = tf.cast(indices, tf.int64)
indices_input = tf.concat(0, [indices, tf.reshape(input_, [-1])])
indices_input = tf.reshape(indices_input, [2, -1])
indices_input = tf.transpose(indices_input)
res = tf.sparse_to_dense(
indices_input, [n_elem, n_indices], 1., 0., name="flat_one_hot")
res = tf.reshape(res, [elem for elem in shape] + [n_indices])
return res
def squeeze_2x2(input_):
"""Squeezing operation: reshape to convert space to channels."""
return squeeze_nxn(input_, n_factor=2)
def squeeze_nxn(input_, n_factor=2):
"""Squeezing operation: reshape to convert space to channels."""
if isinstance(input_, (float, int)):
return input_
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
if height % n_factor != 0:
raise ValueError("Height not divisible by %d." % n_factor)
if width % n_factor != 0:
raise ValueError("Width not divisible by %d." % n_factor)
res = tf.reshape(
input_,
[batch_size,
height // n_factor,
n_factor, width // n_factor,
n_factor, channels])
res = tf.transpose(res, [0, 1, 3, 5, 2, 4])
res = tf.reshape(
res,
[batch_size,
height // n_factor,
width // n_factor,
channels * n_factor * n_factor])
return res
def unsqueeze_2x2(input_):
"""Unsqueezing operation: reshape to convert channels into space."""
if isinstance(input_, (float, int)):
return input_
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
if channels % 4 != 0:
raise ValueError("Number of channels not divisible by 4.")
res = tf.reshape(input_, [batch_size, height, width, channels // 4, 2, 2])
res = tf.transpose(res, [0, 1, 4, 2, 5, 3])
res = tf.reshape(res, [batch_size, 2 * height, 2 * width, channels // 4])
return res
# batch norm
def batch_norm(input_,
dim,
name,
scale=True,
train=True,
epsilon=1e-8,
decay=.1,
axes=[0],
bn_lag=DEFAULT_BN_LAG):
"""Batch normalization."""
# create variables
with tf.variable_scope(name):
var = variable_on_cpu(
"var", [dim], tf.constant_initializer(1.), trainable=False)
mean = variable_on_cpu(
"mean", [dim], tf.constant_initializer(0.), trainable=False)
step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False)
if scale:
gamma = variable_on_cpu("gamma", [dim], tf.constant_initializer(1.))
beta = variable_on_cpu("beta", [dim], tf.constant_initializer(0.))
# choose the appropriate moments
if train:
used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
cur_mean, cur_var = used_mean, used_var
if bn_lag > 0.:
used_mean -= (1. - bn_lag) * (used_mean - tf.stop_gradient(mean))
used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
used_mean /= (1. - bn_lag**(step + 1))
used_var /= (1. - bn_lag**(step + 1))
else:
used_mean, used_var = mean, var
cur_mean, cur_var = used_mean, used_var
# normalize
res = (input_ - used_mean) / tf.sqrt(used_var + epsilon)
# de-normalize
if scale:
res *= gamma
res += beta
# update variables
if train:
with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
with ops.colocate_with(mean):
new_mean = tf.assign_sub(
mean,
tf.check_numerics(decay * (mean - cur_mean), "NaN in moving mean."))
with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
with ops.colocate_with(var):
new_var = tf.assign_sub(
var,
tf.check_numerics(decay * (var - cur_var),
"NaN in moving variance."))
with tf.name_scope(name, "IncrementTime", [step]):
with ops.colocate_with(step):
new_step = tf.assign_add(step, 1.)
res += 0. * new_mean * new_var * new_step
return res
# batch normalization taking into account the volume transformation
def batch_norm_log_diff(input_,
dim,
name,
train=True,
epsilon=1e-8,
decay=.1,
axes=[0],
reuse=None,
bn_lag=DEFAULT_BN_LAG):
"""Batch normalization with corresponding log determinant Jacobian."""
if reuse is None:
reuse = not train
# create variables
with tf.variable_scope(name) as scope:
if reuse:
scope.reuse_variables()
var = variable_on_cpu(
"var", [dim], tf.constant_initializer(1.), trainable=False)
mean = variable_on_cpu(
"mean", [dim], tf.constant_initializer(0.), trainable=False)
step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False)
# choose the appropriate moments
if train:
used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
cur_mean, cur_var = used_mean, used_var
if bn_lag > 0.:
used_var = stable_var(input_=input_, mean=used_mean, axes=axes)
cur_var = used_var
used_mean -= (1 - bn_lag) * (used_mean - tf.stop_gradient(mean))
used_mean /= (1. - bn_lag**(step + 1))
used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
used_var /= (1. - bn_lag**(step + 1))
else:
used_mean, used_var = mean, var
cur_mean, cur_var = used_mean, used_var
# update variables
if train:
with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
with ops.colocate_with(mean):
new_mean = tf.assign_sub(
mean,
tf.check_numerics(
decay * (mean - cur_mean), "NaN in moving mean."))
with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
with ops.colocate_with(var):
new_var = tf.assign_sub(
var,
tf.check_numerics(decay * (var - cur_var),
"NaN in moving variance."))
with tf.name_scope(name, "IncrementTime", [step]):
with ops.colocate_with(step):
new_step = tf.assign_add(step, 1.)
used_var += 0. * new_mean * new_var * new_step
used_var += epsilon
return used_mean, used_var
def convnet(input_,
dim_in,
dim_hid,
filter_sizes,
dim_out,
name,
use_batch_norm=True,
train=True,
nonlinearity=tf.nn.relu):
"""Chaining of convolutional layers."""
dims_in = [dim_in] + dim_hid[:-1]
dims_out = dim_hid
res = input_
bias = (not use_batch_norm)
with tf.variable_scope(name):
for layer_idx in xrange(len(dim_hid)):
res = conv_layer(
input_=res,
filter_size=filter_sizes[layer_idx],
dim_in=dims_in[layer_idx],
dim_out=dims_out[layer_idx],
name="h_%d" % layer_idx,
stddev=1e-2,
nonlinearity=None,
bias=bias)
if use_batch_norm:
res = batch_norm(
input_=res,
dim=dims_out[layer_idx],
name="bn_%d" % layer_idx,
scale=(nonlinearity == tf.nn.relu),
train=train,
epsilon=1e-8,
axes=[0, 1, 2])
if nonlinearity is not None:
res = nonlinearity(res)
res = conv_layer(
input_=res,
filter_size=filter_sizes[-1],
dim_in=dims_out[-1],
dim_out=dim_out,
name="out",
stddev=1e-2,
nonlinearity=None)
return res
# distributions
# log-likelihood estimation
def standard_normal_ll(input_):
"""Log-likelihood of standard Gaussian distribution."""
res = -.5 * (tf.square(input_) + numpy.log(2. * numpy.pi))
return res
def standard_normal_sample(shape):
"""Samples from standard Gaussian distribution."""
return tf.random_normal(shape)
SQUEEZE_MATRIX = numpy.array([[[[1., 0., 0., 0.]], [[0., 0., 1., 0.]]],
[[[0., 0., 0., 1.]], [[0., 1., 0., 0.]]]])
def squeeze_2x2_ordered(input_, reverse=False):
"""Squeezing operation with a controlled ordering."""
shape = input_.get_shape().as_list()
batch_size = shape[0]
height = shape[1]
width = shape[2]
channels = shape[3]
if reverse:
if channels % 4 != 0:
raise ValueError("Number of channels not divisible by 4.")
channels /= 4
else:
if height % 2 != 0:
raise ValueError("Height not divisible by 2.")
if width % 2 != 0:
raise ValueError("Width not divisible by 2.")
weights = numpy.zeros((2, 2, channels, 4 * channels))
for idx_ch in xrange(channels):
slice_2 = slice(idx_ch, (idx_ch + 1))
slice_3 = slice((idx_ch * 4), ((idx_ch + 1) * 4))
weights[:, :, slice_2, slice_3] = SQUEEZE_MATRIX
shuffle_channels = [idx_ch * 4 for idx_ch in xrange(channels)]
shuffle_channels += [idx_ch * 4 + 1 for idx_ch in xrange(channels)]
shuffle_channels += [idx_ch * 4 + 2 for idx_ch in xrange(channels)]
shuffle_channels += [idx_ch * 4 + 3 for idx_ch in xrange(channels)]
shuffle_channels = numpy.array(shuffle_channels)
weights = weights[:, :, :, shuffle_channels].astype("float32")
if reverse:
res = tf.nn.conv2d_transpose(
value=input_,
filter=weights,
output_shape=[batch_size, height * 2, width * 2, channels],
strides=[1, 2, 2, 1],
padding="SAME",
name="unsqueeze_2x2")
else:
res = tf.nn.conv2d(
input=input_,
filter=weights,
strides=[1, 2, 2, 1],
padding="SAME",
name="squeeze_2x2")
return res
...@@ -225,7 +225,7 @@ class ResNet(object): ...@@ -225,7 +225,7 @@ class ResNet(object):
def _bottleneck_residual(self, x, in_filter, out_filter, stride, def _bottleneck_residual(self, x, in_filter, out_filter, stride,
activate_before_residual=False): activate_before_residual=False):
"""Bottleneck resisual unit with 3 sub layers.""" """Bottleneck residual unit with 3 sub layers."""
if activate_before_residual: if activate_before_residual:
with tf.variable_scope('common_bn_relu'): with tf.variable_scope('common_bn_relu'):
x = self._batch_norm('init_bn', x) x = self._batch_norm('init_bn', x)
......
...@@ -454,7 +454,7 @@ class VGSLImageModel(object): ...@@ -454,7 +454,7 @@ class VGSLImageModel(object):
self.labels = tf.slice(self.labels, [0, 0], [-1, 1]) self.labels = tf.slice(self.labels, [0, 0], [-1, 1])
self.labels = tf.reshape(self.labels, [-1]) self.labels = tf.reshape(self.labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, self.labels, name='xent') logits=logits, labels=self.labels, name='xent')
else: else:
# TODO(rays) Labels need an extra dimension for logistic, so different # TODO(rays) Labels need an extra dimension for logistic, so different
# padding functions are needed, as well as a different loss function. # padding functions are needed, as well as a different loss function.
......
Subproject commit 8ed00233c0cd530fec78cfad5b34f54b6f902e31 Subproject commit aab099711d7e04034cf742ddb9b00dd15edbe99c
...@@ -123,7 +123,7 @@ y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 ...@@ -123,7 +123,7 @@ y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# %% Define loss/eval/training functions # %% Define loss/eval/training functions
cross_entropy = tf.reduce_mean( cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(y_logits, y)) tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, targets=y))
opt = tf.train.AdamOptimizer() opt = tf.train.AdamOptimizer()
optimizer = opt.minimize(cross_entropy) optimizer = opt.minimize(cross_entropy)
grads = opt.compute_gradients(cross_entropy, [b_fc_loc2]) grads = opt.compute_gradients(cross_entropy, [b_fc_loc2])
......
# Tutorial Models
This repository contains models referenced to from the [TensorFlow tutorials](https://www.tensorflow.org/tutorials/). We recommend installing TensorFlow from the [nightly builds](https://github.com/tensorflow/tensorflow#installation) rather than the r0.12 release before running these models.
# Description:
# TensorFlow model for word2vec
package(default_visibility = ["//tensorflow:internal"])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py")
py_library(
name = "package",
srcs = [
"__init__.py",
],
srcs_version = "PY2AND3",
visibility = ["//tensorflow:__subpackages__"],
deps = [
":gen_word2vec",
":word2vec",
":word2vec_optimized",
],
)
py_binary(
name = "word2vec",
srcs = [
"word2vec.py",
],
srcs_version = "PY2AND3",
deps = [
":gen_word2vec",
":word2vec_kernels",
"//tensorflow:tensorflow_py",
"//tensorflow/python:platform",
],
)
py_binary(
name = "word2vec_optimized",
srcs = [
"word2vec_optimized.py",
],
srcs_version = "PY2AND3",
deps = [
":gen_word2vec",
":word2vec_kernels",
"//tensorflow:tensorflow_py",
"//tensorflow/python:platform",
],
)
py_test(
name = "word2vec_test",
size = "small",
srcs = ["word2vec_test.py"],
srcs_version = "PY2AND3",
tags = [
"notsan", # b/25864127
],
deps = [
":word2vec",
"//tensorflow:tensorflow_py",
],
)
py_test(
name = "word2vec_optimized_test",
size = "small",
srcs = ["word2vec_optimized_test.py"],
srcs_version = "PY2AND3",
tags = [
"notsan",
],
deps = [
":word2vec_optimized",
"//tensorflow:tensorflow_py",
],
)
cc_library(
name = "word2vec_ops",
srcs = [
"word2vec_ops.cc",
],
linkstatic = 1,
visibility = ["//tensorflow:internal"],
deps = [
"//tensorflow/core:framework",
],
alwayslink = 1,
)
cc_library(
name = "word2vec_kernels",
srcs = [
"word2vec_kernels.cc",
],
linkstatic = 1,
visibility = ["//tensorflow:internal"],
deps = [
":word2vec_ops",
"//tensorflow/core",
],
alwayslink = 1,
)
tf_gen_op_wrapper_py(
name = "gen_word2vec",
out = "gen_word2vec.py",
deps = [":word2vec_ops"],
)
filegroup(
name = "all_files",
srcs = glob(
["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
],
),
visibility = ["//tensorflow:__subpackages__"],
)
...@@ -7,9 +7,9 @@ ICLR 2013. ...@@ -7,9 +7,9 @@ ICLR 2013.
Detailed instructions on how to get started and use them are available in the Detailed instructions on how to get started and use them are available in the
tutorials. Brief instructions are below. tutorials. Brief instructions are below.
* [Word2Vec Tutorial](http://tensorflow.org/tutorials/word2vec/index.md) * [Word2Vec Tutorial](http://tensorflow.org/tutorials/word2vec)
To download the example text and evaluation data: Assuming you have cloned the git repository, navigate into this directory. To download the example text and evaluation data:
```shell ```shell
wget http://mattmahoney.net/dc/text8.zip -O text8.zip wget http://mattmahoney.net/dc/text8.zip -O text8.zip
...@@ -19,21 +19,18 @@ unzip -p source-archive.zip word2vec/trunk/questions-words.txt > questions-word ...@@ -19,21 +19,18 @@ unzip -p source-archive.zip word2vec/trunk/questions-words.txt > questions-word
rm source-archive.zip rm source-archive.zip
``` ```
Assuming you are using the pip package install and have cloned the git You will need to compile the ops as follows:
repository, navigate into this directory and run using:
```shell ```shell
cd tensorflow/models/embedding TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
python word2vec_optimized.py \ g++ -std=c++11 -shared word2vec_ops.cc word2vec_kernels.cc -o word2vec_ops.so -fPIC -I $TF_INC -O2 -D_GLIBCXX_USE_CXX11_ABI=0
--train_data=text8 \
--eval_data=questions-words.txt \
--save_path=/tmp/
``` ```
To run the code from sources using bazel: (For an explanation of what this is doing, see the tutorial on [Adding a New Op to TensorFlow](https://www.tensorflow.org/how_tos/adding_an_op/#building_the_op_library). The flag `-D_GLIBCXX_USE_CXX11_ABI=0` is included to support newer versions of g++.)
Then run using:
```shell ```shell
bazel run -c opt tensorflow/models/embedding/word2vec_optimized -- \ python word2vec_optimized.py \
--train_data=text8 \ --train_data=text8 \
--eval_data=questions-words.txt \ --eval_data=questions-words.txt \
--save_path=/tmp/ --save_path=/tmp/
......
...@@ -17,5 +17,3 @@ ...@@ -17,5 +17,3 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from tensorflow.models.embedding import gen_word2vec
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment