Unverified Commit 5a5d3305 authored by Lukasz Kaiser's avatar Lukasz Kaiser Committed by GitHub
Browse files

Merge pull request #2969 from coreylynch/master

Adding TCN.
parents 69cf6fca aa3d4422
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for svtcn_loss.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from estimators import svtcn_loss
import tensorflow as tf
class SVTCNLoss(tf.test.TestCase):
def testSVTCNLoss(self):
with self.test_session():
num_data = 64
num_sequences = 2
num_data_per_seq = num_data // num_sequences
feat_dim = 6
margin = 1.0
times = np.tile(np.arange(num_data_per_seq, dtype=np.int32),
num_sequences)
times = np.reshape(times, [times.shape[0], 1])
sequence_ids = np.concatenate(
[np.ones(num_data_per_seq)*i for i in range(num_sequences)])
sequence_ids = np.reshape(sequence_ids, [sequence_ids.shape[0], 1])
pos_radius = 6
neg_radius = 12
embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
# Compute the loss in NP
# Get a positive mask, i.e. indices for each time index
# that are inside the positive range.
in_pos_range = np.less_equal(
np.abs(times - times.transpose()), pos_radius)
# Get a negative mask, i.e. indices for each time index
# that are inside the negative range (> t + (neg_mult * pos_radius)
# and < t - (neg_mult * pos_radius).
in_neg_range = np.greater(np.abs(times - times.transpose()), neg_radius)
sequence_adjacency = sequence_ids == sequence_ids.T
sequence_adjacency_not = np.logical_not(sequence_adjacency)
pdist_matrix = euclidean_distances(embedding, squared=True)
loss_np = 0.0
num_positives = 0.0
for i in range(num_data):
for j in range(num_data):
if in_pos_range[i, j] and i != j and sequence_adjacency[i, j]:
num_positives += 1.0
pos_distance = pdist_matrix[i][j]
neg_distances = []
for k in range(num_data):
if in_neg_range[i, k] or sequence_adjacency_not[i, k]:
neg_distances.append(pdist_matrix[i][k])
neg_distances.sort() # sort by distance
chosen_neg_distance = neg_distances[0]
for l in range(len(neg_distances)):
chosen_neg_distance = neg_distances[l]
if chosen_neg_distance > pos_distance:
break
loss_np += np.maximum(
0.0, margin - chosen_neg_distance + pos_distance)
loss_np /= num_positives
# Compute the loss in TF
loss_tf = svtcn_loss.singleview_tcn_loss(
embeddings=tf.convert_to_tensor(embedding),
timesteps=tf.convert_to_tensor(times),
pos_radius=pos_radius,
neg_radius=neg_radius,
margin=margin,
sequence_ids=tf.convert_to_tensor(sequence_ids),
multiseq=True
)
loss_tf = loss_tf.eval()
self.assertAllClose(loss_np, loss_tf)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Calculates running validation of TCN models (and baseline comparisons)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from estimators.get_estimator import get_estimator
from utils import util
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string('master', 'local',
'BNS name of the TensorFlow master to use')
tf.app.flags.DEFINE_string(
'logdir', '/tmp/tcn', 'Directory where to write event logs.')
FLAGS = tf.app.flags.FLAGS
def main(_):
"""Runs main eval loop."""
# Parse config dict from yaml config files / command line flags.
logdir = FLAGS.logdir
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Choose an estimator based on training strategy.
estimator = get_estimator(config, logdir)
# Wait for the first checkpoint file to be written.
while not tf.train.latest_checkpoint(logdir):
tf.logging.info('Waiting for a checkpoint file...')
time.sleep(10)
# Run validation.
while True:
estimator.evaluate()
if __name__ == '__main__':
tf.app.run()
This image diff could not be displayed because it is too large. You can view the blob instead.
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Generates imitation videos.
Generate single pairwise imitation videos:
blaze build -c opt --config=cuda --copt=-mavx \
learning/brain/research/tcn/generate_videos && \
blaze-bin/learning/brain/research/tcn/generate_videos \
--logtostderr \
--config_paths $config_paths \
--checkpointdir $checkpointdir \
--checkpoint_iter $checkpoint_iter \
--query_records_dir $query_records_dir \
--target_records_dir $target_records_dir \
--outdir $outdir \
--mode single \
--num_query_sequences 1 \
--num_target_sequences -1
# Generate imitation videos with multiple sequences in the target set:
query_records_path
blaze build -c opt --config=cuda --copt=-mavx \
learning/brain/research/tcn/generate_videos && \
blaze-bin/learning/brain/research/tcn/generate_videos \
--logtostderr \
--config_paths $config_paths \
--checkpointdir $checkpointdir \
--checkpoint_iter $checkpoint_iter \
--query_records_dir $query_records_dir \
--target_records_dir $target_records_dir \
--outdir $outdir \
--num_multi_targets 1 \
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import tensorflow as tf
import os
import matplotlib
matplotlib.use("pdf")
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
from estimators.get_estimator import get_estimator
from utils import util
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string(
'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
tf.app.flags.DEFINE_string(
'checkpoint_iter', '', 'Checkpoint iter to use.')
tf.app.flags.DEFINE_integer(
'num_multi_targets', -1,
'Number of imitation vids in the target set per imitation video.')
tf.app.flags.DEFINE_string(
'outdir', '/tmp/tcn', 'Path to write embeddings to.')
tf.app.flags.DEFINE_string(
'mode', 'single', 'single | multi. Single means generate imitation vids'
'where query is being imitated by single sequence. Multi'
'means generate imitation vids where query is being'
'imitated by multiple.')
tf.app.flags.DEFINE_string('query_records_dir', '',
'Directory of image tfrecords.')
tf.app.flags.DEFINE_string('target_records_dir', '',
'Directory of image tfrecords.')
tf.app.flags.DEFINE_integer('query_view', 1,
'Viewpoint of the query video.')
tf.app.flags.DEFINE_integer('target_view', 0,
'Viewpoint of the imitation video.')
tf.app.flags.DEFINE_integer('smoothing_window', 5,
'Number of frames to smooth over.')
tf.app.flags.DEFINE_integer('num_query_sequences', -1,
'Number of query sequences to embed.')
tf.app.flags.DEFINE_integer('num_target_sequences', -1,
'Number of target sequences to embed.')
FLAGS = tf.app.flags.FLAGS
def SmoothEmbeddings(embs):
"""Temporally smoothes a sequence of embeddings."""
new_embs = []
window = int(FLAGS.smoothing_window)
for i in range(len(embs)):
min_i = max(i-window, 0)
max_i = min(i+window, len(embs))
new_embs.append(np.mean(embs[min_i:max_i, :], axis=0))
return np.array(new_embs)
def MakeImitationVideo(
outdir, vidname, query_im_strs, knn_im_strs, height=640, width=360):
"""Creates a KNN imitation video.
For each frame in vid0, pair with the frame at index in knn_indices in
vids1. Write video to disk.
Args:
outdir: String, directory to write videos.
vidname: String, name of video.
query_im_strs: Numpy array holding query image strings.
knn_im_strs: Numpy array holding knn image strings.
height: Int, height of raw images.
width: Int, width of raw images.
"""
if not tf.gfile.Exists(outdir):
tf.gfile.MakeDirs(outdir)
vid_path = os.path.join(outdir, vidname)
combined = zip(query_im_strs, knn_im_strs)
# Create and write the video.
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_aspect('equal')
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
im = ax.imshow(
np.zeros((height, width*2, 3)), cmap='gray', interpolation='nearest')
im.set_clim([0, 1])
plt.tight_layout(pad=0, w_pad=0, h_pad=0)
# pylint: disable=invalid-name
def update_img(pair):
"""Decode pairs of image strings, update a video."""
im_i, im_j = pair
nparr_i = np.fromstring(str(im_i), np.uint8)
img_np_i = cv2.imdecode(nparr_i, 1)
img_np_i = img_np_i[..., [2, 1, 0]]
nparr_j = np.fromstring(str(im_j), np.uint8)
img_np_j = cv2.imdecode(nparr_j, 1)
img_np_j = img_np_j[..., [2, 1, 0]]
# Optionally reshape the images to be same size.
frame = np.concatenate([img_np_i, img_np_j], axis=1)
im.set_data(frame)
return im
ani = animation.FuncAnimation(fig, update_img, combined, interval=15)
writer = animation.writers['ffmpeg'](fps=15)
dpi = 100
tf.logging.info('Writing video to:\n %s \n' % vid_path)
ani.save('%s.mp4' % vid_path, writer=writer, dpi=dpi)
def GenerateImitationVideo(
vid_name, query_ims, query_embs, target_ims, target_embs, height, width):
"""Generates a single cross-sequence imitation video.
For each frame in some query sequence, find the nearest neighbor from
some target sequence in embedding space.
Args:
vid_name: String, the name of the video.
query_ims: Numpy array of shape [query sequence length, height, width, 3].
query_embs: Numpy array of shape [query sequence length, embedding size].
target_ims: Numpy array of shape [target sequence length, height, width,
3].
target_embs: Numpy array of shape [target sequence length, embedding
size].
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# For each query frame, find the index of the nearest neighbor in the
# target video.
knn_indices = [util.KNNIds(q, target_embs, k=1)[0] for q in query_embs]
# Create and write out the video.
assert knn_indices
knn_ims = np.array([target_ims[k] for k in knn_indices])
MakeImitationVideo(FLAGS.outdir, vid_name, query_ims, knn_ims, height, width)
def SingleImitationVideos(
query_records, target_records, config, height, width):
"""Generates pairwise imitation videos.
This creates all pairs of target imitating query videos, where each frame
on the left is matched to a nearest neighbor coming a single
embedded target video.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# Embed query and target data.
(query_sequences_to_data,
target_sequences_to_data) = EmbedQueryTargetData(
query_records, target_records, config)
qview = FLAGS.query_view
tview = FLAGS.target_view
# Loop over query videos.
for task_i, data_i in query_sequences_to_data.iteritems():
for task_j, data_j in target_sequences_to_data.iteritems():
i_ims = data_i['images']
i_embs = data_i['embeddings']
query_embs = SmoothEmbeddings(i_embs[qview])
query_ims = i_ims[qview]
j_ims = data_j['images']
j_embs = data_j['embeddings']
target_embs = SmoothEmbeddings(j_embs[tview])
target_ims = j_ims[tview]
tf.logging.info('Generating %s imitating %s video.' % (task_j, task_i))
vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_j, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
target_ims, target_embs, height, width)
def MultiImitationVideos(
query_records, target_records, config, height, width):
"""Creates multi-imitation videos.
This creates videos where every frame on the left is matched to a nearest
neighbor coming from a set of multiple embedded target videos.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# Embed query and target data.
(query_sequences_to_data,
target_sequences_to_data) = EmbedQueryTargetData(
query_records, target_records, config)
qview = FLAGS.query_view
tview = FLAGS.target_view
# Loop over query videos.
for task_i, data_i in query_sequences_to_data.iteritems():
i_ims = data_i['images']
i_embs = data_i['embeddings']
query_embs = SmoothEmbeddings(i_embs[qview])
query_ims = i_ims[qview]
all_target_embs = []
all_target_ims = []
# If num_imitation_vids is -1, add all seq embeddings to the target set.
if FLAGS.num_multi_targets == -1:
num_multi_targets = len(target_sequences_to_data)
else:
# Else, add some specified number of seq embeddings to the target set.
num_multi_targets = FLAGS.num_multi_targets
for j in range(num_multi_targets):
task_j = target_sequences_to_data.keys()[j]
data_j = target_sequences_to_data[task_j]
print('Adding %s to target set' % task_j)
j_ims = data_j['images']
j_embs = data_j['embeddings']
target_embs = SmoothEmbeddings(j_embs[tview])
target_ims = j_ims[tview]
all_target_embs.extend(target_embs)
all_target_ims.extend(target_ims)
# Generate a "j imitating i" video.
tf.logging.info('Generating all imitating %s video.' % task_i)
vid_name = 'q%sv%s_multiv%s' % (task_i, qview, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
all_target_ims, all_target_embs, height, width)
def SameSequenceVideos(query_records, config, height, width):
"""Generate same sequence, cross-view imitation videos."""
batch_size = config.data.embed_batch_size
# Choose an estimator based on training strategy.
estimator = get_estimator(config, FLAGS.checkpointdir)
# Choose a checkpoint path to restore.
checkpointdir = FLAGS.checkpointdir
checkpoint_path = os.path.join(checkpointdir,
'model.ckpt-%s' % FLAGS.checkpoint_iter)
# Embed num_sequences query sequences, store embeddings and image strings in
# query_sequences_to_data.
sequences_to_data = {}
for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
query_records, checkpoint_path, batch_size,
num_sequences=FLAGS.num_query_sequences):
sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
# Loop over query videos.
qview = FLAGS.query_view
tview = FLAGS.target_view
for task_i, data_i in sequences_to_data.iteritems():
ims = data_i['images']
embs = data_i['embeddings']
query_embs = SmoothEmbeddings(embs[qview])
query_ims = ims[qview]
target_embs = SmoothEmbeddings(embs[tview])
target_ims = ims[tview]
tf.logging.info('Generating %s imitating %s video.' % (task_i, task_i))
vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_i, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
target_ims, target_embs, height, width)
def EmbedQueryTargetData(query_records, target_records, config):
"""Embeds the full set of query_records and target_records.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
Returns:
query_sequences_to_data: A dict holding 'embeddings' and 'images'
target_sequences_to_data: A dict holding 'embeddings' and 'images'
"""
batch_size = config.data.embed_batch_size
# Choose an estimator based on training strategy.
estimator = get_estimator(config, FLAGS.checkpointdir)
# Choose a checkpoint path to restore.
checkpointdir = FLAGS.checkpointdir
checkpoint_path = os.path.join(checkpointdir,
'model.ckpt-%s' % FLAGS.checkpoint_iter)
# Embed num_sequences query sequences, store embeddings and image strings in
# query_sequences_to_data.
num_query_sequences = FLAGS.num_query_sequences
num_target_sequences = FLAGS.num_target_sequences
query_sequences_to_data = {}
for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
query_records, checkpoint_path, batch_size,
num_sequences=num_query_sequences):
query_sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
if (query_records == target_records) and (
num_query_sequences == num_target_sequences):
target_sequences_to_data = query_sequences_to_data
else:
# Embed num_sequences target sequences, store embeddings and image strings
# in sequences_to_data.
target_sequences_to_data = {}
for (view_embeddings, view_raw_image_strings,
seqname) in estimator.inference(
target_records, checkpoint_path, batch_size,
num_sequences=num_target_sequences):
target_sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
return query_sequences_to_data, target_sequences_to_data
def main(_):
# Parse config dict from yaml config files / command line flags.
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Get tables to embed.
query_records_dir = FLAGS.query_records_dir
query_records = util.GetFilesRecursively(query_records_dir)
target_records_dir = FLAGS.target_records_dir
target_records = util.GetFilesRecursively(target_records_dir)
height = config.data.raw_height
width = config.data.raw_width
mode = FLAGS.mode
if mode == 'multi':
# Generate videos where target set is composed of multiple videos.
MultiImitationVideos(query_records, target_records, config,
height, width)
elif mode == 'single':
# Generate videos where target set is a single video.
SingleImitationVideos(query_records, target_records, config,
height, width)
elif mode == 'same':
# Generate videos where target set is the same as query, but diff view.
SameSequenceVideos(query_records, config, height, width)
else:
raise ValueError('Unknown mode %s' % mode)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates test Recall@K statistics on labeled classification problems."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import defaultdict
import os
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances
import data_providers
from estimators.get_estimator import get_estimator
from utils import util
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string(
'mode', 'validation',
'Which dataset to evaluate: `validation` | `test`.')
tf.app.flags.DEFINE_string('master', 'local',
'BNS name of the TensorFlow master to use')
tf.app.flags.DEFINE_string(
'checkpoint_iter', '', 'Evaluate this specific checkpoint.')
tf.app.flags.DEFINE_string(
'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.')
FLAGS = tf.app.flags.FLAGS
def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1):
"""Computes the n_neighbors nearest neighbors for every row in data.
Args:
data: A np.float32 array of shape [num_data, embedding size] holding
an embedded validation / test dataset.
tasks: A list of strings of size [num_data] holding the task or sequence
name that each row belongs to.
n_neighbors: The number of knn indices to return for each row.
Returns:
indices: an np.int32 array of size [num_data, n_neighbors] holding the
n_neighbors nearest indices for every row in data. These are
restricted to be from different named sequences (as defined in `tasks`).
"""
# Compute the pairwise sequence adjacency matrix from `tasks`.
num_data = data.shape[0]
tasks = np.array(tasks)
tasks = np.reshape(tasks, (num_data, 1))
assert len(tasks.shape) == 2
not_adjacent = (tasks != tasks.T)
# Compute the symmetric pairwise distance matrix.
pdist = pairwise_distances(data, metric='sqeuclidean')
# For every row in the pairwise distance matrix, only consider
# cross-sequence columns.
indices = np.zeros((num_data, n_neighbors), dtype=np.int32)
for idx in range(num_data):
# Restrict to cross_sequence neighbors.
distances = [(
pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]]
_, nearest_indices = zip(*sorted(
distances, key=lambda x: x[0])[:n_neighbors])
indices[idx] = nearest_indices
return indices
def compute_cross_sequence_recall_at_k(retrieved_labels, labels, k_list):
"""Compute recall@k for a given list of k values.
Recall is one if an example of the same class is retrieved among the
top k nearest neighbors given a query example and zero otherwise.
Counting the recall for all examples and averaging the counts returns
recall@k score.
Args:
retrieved_labels: 2-D Numpy array of KNN labels for every embedding.
labels: 1-D Numpy array of shape [number of data].
k_list: List of k values to evaluate recall@k.
Returns:
recall_list: List of recall@k values.
"""
kvalue_to_recall = dict(zip(k_list, np.zeros(len(k_list))))
# For each value of K.
for k in k_list:
matches = defaultdict(float)
counts = defaultdict(float)
# For each (row index, label value) in the query labels.
for i, label_value in enumerate(labels):
# Loop over the K nearest retrieved labels.
if label_value in retrieved_labels[i][:k]:
matches[label_value] += 1.
# Increment the denominator.
counts[label_value] += 1.
kvalue_to_recall[k] = np.mean(
[matches[l]/counts[l] for l in matches])
return [kvalue_to_recall[i] for i in k_list]
def compute_cross_sequence_recalls_at_k(
embeddings, labels, label_attr_keys, tasks, k_list, summary_writer,
training_step):
"""Computes and reports the recall@k for each classification problem.
This takes an embedding matrix and an array of multiclass labels
with size [num_data, number of classification problems], then
computes the average recall@k for each classification problem
as well as the average across problems.
Args:
embeddings: A np.float32 array of size [num_data, embedding_size]
representing the embedded validation or test dataset.
labels: A np.int32 array of size [num_data, num_classification_problems]
holding multiclass labels for each embedding for each problem.
label_attr_keys: List of strings, holds the names of the classification
problems.
tasks: A list of strings describing the video sequence each row
belongs to. This is used to restrict the recall@k computation
to cross-sequence examples.
k_list: A list of ints, the k values to evaluate recall@k.
summary_writer: A tf.summary.FileWriter.
training_step: Int, the current training step we're evaluating.
"""
num_data = float(embeddings.shape[0])
assert labels.shape[0] == num_data
# Compute knn indices.
indices = nearest_cross_sequence_neighbors(
embeddings, tasks, n_neighbors=max(k_list))
retrieved_labels = labels[indices]
# Compute the recall@k for each classification problem.
recall_lists = []
for idx, label_attr in enumerate(label_attr_keys):
problem_labels = labels[:, idx]
# Take all indices, all k labels for the problem indexed by idx.
problem_retrieved = retrieved_labels[:, :, idx]
recall_list = compute_cross_sequence_recall_at_k(
retrieved_labels=problem_retrieved,
labels=problem_labels,
k_list=k_list)
recall_lists.append(recall_list)
for (k, recall) in zip(k_list, recall_list):
recall_error = 1-recall
summ = tf.Summary(value=[tf.Summary.Value(
tag='validation/classification/%s error@top%d' % (
label_attr, k),
simple_value=recall_error)])
print('%s recall@K=%d' % (label_attr, k), recall_error)
summary_writer.add_summary(summ, int(training_step))
# Report an average recall@k across problems.
recall_lists = np.array(recall_lists)
for i in range(recall_lists.shape[1]):
average_recall = np.mean(recall_lists[:, i])
recall_error = 1 - average_recall
summ = tf.Summary(value=[tf.Summary.Value(
tag='validation/classification/average error@top%d' % k_list[i],
simple_value=recall_error)])
print('Average recall@K=%d' % k_list[i], recall_error)
summary_writer.add_summary(summ, int(training_step))
def evaluate_once(
estimator, input_fn_by_view, batch_size, checkpoint_path,
label_attr_keys, embedding_size, num_views, k_list):
"""Compute the recall@k for a given checkpoint path.
Args:
estimator: an `Estimator` object to evaluate.
input_fn_by_view: An input_fn to an `Estimator's` predict method. Takes
a view index and returns a dict holding ops for getting raw images for
the view.
batch_size: Int, size of the labeled eval batch.
checkpoint_path: String, path to the specific checkpoint being evaluated.
label_attr_keys: A list of Strings, holding each attribute name.
embedding_size: Int, the size of the embedding.
num_views: Int, number of views in the dataset.
k_list: List of ints, list of K values to compute recall at K for.
"""
feat_matrix = np.zeros((0, embedding_size))
label_vect = np.zeros((0, len(label_attr_keys)))
tasks = []
eval_tensor_keys = ['embeddings', 'tasks', 'classification_labels']
# Iterate all views in the dataset.
for view_index in range(num_views):
# Set up a graph for embedding entire dataset.
predictions = estimator.inference(
input_fn_by_view(view_index), checkpoint_path,
batch_size, predict_keys=eval_tensor_keys)
# Enumerate predictions.
for i, p in enumerate(predictions):
if i % 100 == 0:
tf.logging.info('Embedded %d images for view %d' % (i, view_index))
label = p['classification_labels']
task = p['tasks']
embedding = p['embeddings']
# Collect (embedding, label, task) data.
feat_matrix = np.append(feat_matrix, [embedding], axis=0)
label_vect = np.append(label_vect, [label], axis=0)
tasks.append(task)
# Compute recall statistics.
ckpt_step = int(checkpoint_path.split('-')[-1])
summary_dir = os.path.join(FLAGS.outdir, 'labeled_eval_summaries')
summary_writer = tf.summary.FileWriter(summary_dir)
compute_cross_sequence_recalls_at_k(
feat_matrix, label_vect, label_attr_keys, tasks, k_list,
summary_writer, ckpt_step)
def get_labeled_tables(config):
"""Gets either labeled test or validation tables, based on flags."""
# Get a list of filenames corresponding to labeled data.
mode = FLAGS.mode
if mode == 'validation':
labeled_tables = util.GetFilesRecursively(config.data.labeled.validation)
elif mode == 'test':
labeled_tables = util.GetFilesRecursively(config.data.labeled.test)
else:
raise ValueError('Unknown dataset: %s' % mode)
return labeled_tables
def main(_):
"""Runs main labeled eval loop."""
# Parse config dict from yaml config files / command line flags.
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Choose an estimator based on training strategy.
checkpointdir = FLAGS.checkpointdir
estimator = get_estimator(config, checkpointdir)
# Get data configs.
image_attr_keys = config.data.labeled.image_attr_keys
label_attr_keys = config.data.labeled.label_attr_keys
embedding_size = config.embedding_size
num_views = config.data.num_views
k_list = config.val.recall_at_k_list
batch_size = config.data.batch_size
# Get either labeled validation or test tables.
labeled_tables = get_labeled_tables(config)
def input_fn_by_view(view_index):
"""Returns an input_fn for use with a tf.Estimator by view."""
def input_fn():
# Get raw labeled images.
(preprocessed_images, labels,
tasks) = data_providers.labeled_data_provider(
labeled_tables,
estimator.preprocess_data, view_index, image_attr_keys,
label_attr_keys, batch_size=batch_size)
return {
'batch_preprocessed': preprocessed_images,
'tasks': tasks,
'classification_labels': labels,
}, None
return input_fn
# If evaluating a specific checkpoint, do that.
if FLAGS.checkpoint_iter:
checkpoint_path = os.path.join(
'%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter))
evaluate_once(
estimator, input_fn_by_view, batch_size, checkpoint_path,
label_attr_keys, embedding_size, num_views, k_list)
else:
for checkpoint_path in tf.contrib.training.checkpoints_iterator(
checkpointdir):
evaluate_once(
estimator, input_fn_by_view, batch_size, checkpoint_path,
label_attr_keys, embedding_size, num_views, k_list)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tcn.labeled_eval."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import labeled_eval
import tensorflow as tf
class LabeledEvalTest(tf.test.TestCase):
def testNearestCrossSequenceNeighbors(self):
# Generate embeddings.
num_data = 64
embedding_size = 4
num_tasks = 8
n_neighbors = 2
data = np.random.randn(num_data, embedding_size)
tasks = np.repeat(range(num_tasks), num_data // num_tasks)
# Get nearest cross-sequence indices.
indices = labeled_eval.nearest_cross_sequence_neighbors(
data, tasks, n_neighbors=n_neighbors)
# Assert that no nearest neighbor indices come from the same task.
repeated_tasks = np.tile(np.reshape(tasks, (num_data, 1)), n_neighbors)
self.assertTrue(np.all(np.not_equal(repeated_tasks, tasks[indices])))
def testPerfectCrossSequenceRecall(self):
# Make sure cross-sequence recall@k returns 1.0 for near-duplicate features.
embeddings = np.random.randn(10, 2)
embeddings[5:, :] = 0.00001 + embeddings[:5, :]
tasks = np.repeat([0, 1], 5)
labels = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
# find k=1, k=2 nearest neighbors.
k_list = [1, 2]
# Compute knn indices.
indices = labeled_eval.nearest_cross_sequence_neighbors(
embeddings, tasks, n_neighbors=max(k_list))
retrieved_labels = labels[indices]
recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
retrieved_labels=retrieved_labels,
labels=labels,
k_list=k_list)
self.assertTrue(np.allclose(
np.array(recall_list), np.array([1.0, 1.0])))
def testRelativeRecall(self):
# Make sure cross-sequence recall@k is strictly non-decreasing over k.
num_data = 100
num_tasks = 10
embeddings = np.random.randn(100, 5)
tasks = np.repeat(range(num_tasks), num_data // num_tasks)
labels = np.random.randint(0, 5, 100)
k_list = [1, 2, 4, 8, 16, 32, 64]
indices = labeled_eval.nearest_cross_sequence_neighbors(
embeddings, tasks, n_neighbors=max(k_list))
retrieved_labels = labels[indices]
recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
retrieved_labels=retrieved_labels,
labels=labels,
k_list=k_list)
recall_list_sorted = sorted(recall_list)
self.assertTrue(np.allclose(
np.array(recall_list), np.array(recall_list_sorted)))
if __name__ == "__main__":
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model implementations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import inception
from tensorflow.contrib.slim.python.slim.nets import resnet_v2 as resnet_v2
from tensorflow.contrib.slim.python.slim.nets import resnet_utils as resnet_utils
def get_embedder(
embedder_strategy, config, images, is_training, reuse=False,
l2_normalize_embedding=True):
"""Returns an embedder based on config.
Args:
embedder_strategy: String, name of embedder version to return.
config: LuaTable object, training config.
images: 4-D float `Tensor` containing batch images.
is_training: Boolean or placeholder for boolean,
indicator for whether or not we're training.
reuse: Boolean: Reuse embedder variable scope.
l2_normalize_embedding: Boolean, whether or not to l2 normalize the
embedding.
Returns:
embedder: An `Embedder` object.
Raises:
ValueError: if unknown embedder_strategy specified.
"""
if embedder_strategy == 'inception_baseline':
pretrained_ckpt = config.inception_conv_ss_fc.pretrained_checkpoint
return InceptionBaselineEmbedder(
images,
pretrained_ckpt,
config.random_projection,
config.random_projection_dim)
strategy_to_embedder = {
'inception_conv_ss_fc': InceptionConvSSFCEmbedder,
'resnet': ResnetEmbedder,
}
if embedder_strategy not in strategy_to_embedder:
raise ValueError('unknown embedder_strategy', embedder_strategy)
embedding_size = config.embedding_size
l2_reg_weight = config.learning.l2_reg_weight
embedder = strategy_to_embedder[embedder_strategy](
config[embedder_strategy], images, embedding_size,
is_training, embedding_l2=l2_normalize_embedding,
l2_reg_weight=l2_reg_weight, reuse=reuse)
return embedder
def build_inceptionv3_graph(images, endpoint, is_training, checkpoint,
reuse=False):
"""Builds an InceptionV3 model graph.
Args:
images: A 4-D float32 `Tensor` of batch images.
endpoint: String, name of the InceptionV3 endpoint.
is_training: Boolean, whether or not to build a training or inference graph.
checkpoint: String, path to the pretrained model checkpoint.
reuse: Boolean, whether or not we are reusing the embedder.
Returns:
inception_output: `Tensor` holding the InceptionV3 output.
inception_variables: List of inception variables.
init_fn: Function to initialize the weights (if not reusing, then None).
"""
with slim.arg_scope(inception.inception_v3_arg_scope()):
_, endpoints = inception.inception_v3(
images, num_classes=1001, is_training=is_training)
inception_output = endpoints[endpoint]
inception_variables = slim.get_variables_to_restore()
inception_variables = [
i for i in inception_variables if 'global_step' not in i.name]
if is_training and not reuse:
init_saver = tf.train.Saver(inception_variables)
def init_fn(scaffold, sess):
del scaffold
init_saver.restore(sess, checkpoint)
else:
init_fn = None
return inception_output, inception_variables, init_fn
class InceptionBaselineEmbedder(object):
"""Produces pre-trained InceptionV3 embeddings."""
def __init__(self, images, pretrained_ckpt, reuse=False,
random_projection=False, random_projection_dim=32):
# Build InceptionV3 graph.
(inception_output,
self.inception_variables,
self.init_fn) = build_inceptionv3_graph(
images, 'Mixed_7c', False, pretrained_ckpt, reuse)
# Pool 8x8x2048 -> 1x1x2048.
embedding = slim.avg_pool2d(inception_output, [8, 8], stride=1)
embedding = tf.squeeze(embedding, [1, 2])
if random_projection:
embedding = tf.matmul(
embedding, tf.random_normal(
shape=[2048, random_projection_dim], seed=123))
self.embedding = embedding
class PretrainedEmbedder(object):
"""Base class for embedders that take pre-trained networks as input."""
__metaclass__ = ABCMeta
def __init__(self, config, images, embedding_size, is_training,
embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
"""Constructor.
Args:
config: A T object holding training config.
images: A 4-D float32 `Tensor` holding images to embed.
embedding_size: Int, the size of the embedding.
is_training: Boolean, whether or not this is a training or inference-time
graph.
embedding_l2: Boolean, whether or not to l2 normalize the embedding.
l2_reg_weight: Float, weight applied to l2 weight regularization.
reuse: Boolean, whether or not we're reusing this graph.
"""
# Pull out all the embedder hyperparameters.
self._config = config
self._embedding_size = embedding_size
self._l2_reg_weight = l2_reg_weight
self._embedding_l2 = embedding_l2
self._is_training = is_training
self._reuse = reuse
# Pull out pretrained hparams.
pretrained_checkpoint = config.pretrained_checkpoint
pretrained_layer = config.pretrained_layer
pretrained_keep_prob = config.dropout.keep_pretrained
# Build pretrained graph.
(pretrained_output,
self._pretrained_variables,
self.init_fn) = self.build_pretrained_graph(
images, pretrained_layer, pretrained_checkpoint, is_training, reuse)
# Optionally drop out the activations.
pretrained_output = slim.dropout(
pretrained_output, keep_prob=pretrained_keep_prob,
is_training=is_training)
self._pretrained_output = pretrained_output
@abstractmethod
def build_pretrained_graph(self, images, layer, pretrained_checkpoint,
is_training, reuse):
"""Builds the graph for the pre-trained network.
Method to be overridden by implementations.
Args:
images: A 4-D tf.float32 `Tensor` holding images to embed.
layer: String, defining which pretrained layer to take as input
to adaptation layers.
pretrained_checkpoint: String, path to a checkpoint used to load
pretrained weights.
is_training: Boolean, whether or not we're in training mode.
reuse: Boolean, whether or not to reuse embedder weights.
Returns:
pretrained_output: A 2 or 3-d tf.float32 `Tensor` holding pretrained
activations.
"""
pass
@abstractmethod
def construct_embedding(self):
"""Builds an embedding function on top of images.
Method to be overridden by implementations.
Returns:
embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size]
holding the embedded images.
"""
pass
def get_trainable_variables(self):
"""Gets a list of variables to optimize."""
if self._config.finetune:
return tf.trainable_variables()
else:
adaptation_only_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope)
return adaptation_only_vars
class ResnetEmbedder(PretrainedEmbedder):
"""Resnet TCN.
ResnetV2 -> resnet adaptation layers -> optional l2 normalize -> embedding.
"""
def __init__(self, config, images, embedding_size, is_training,
embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
super(ResnetEmbedder, self).__init__(
config, images, embedding_size, is_training, embedding_l2,
l2_reg_weight, reuse)
def build_pretrained_graph(
self, images, resnet_layer, checkpoint, is_training, reuse=False):
"""See baseclass."""
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
_, endpoints = resnet_v2.resnet_v2_50(
images, is_training=is_training, reuse=reuse)
resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
resnet_output = endpoints[resnet_layer]
resnet_variables = slim.get_variables_to_restore()
resnet_variables = [
i for i in resnet_variables if 'global_step' not in i.name]
if is_training and not reuse:
init_saver = tf.train.Saver(resnet_variables)
def init_fn(scaffold, sess):
del scaffold
init_saver.restore(sess, checkpoint)
else:
init_fn = None
return resnet_output, resnet_variables, init_fn
def construct_embedding(self):
"""Builds an embedding function on top of images.
Method to be overridden by implementations.
Returns:
embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size]
holding the embedded images.
"""
with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
self._adaptation_scope = vs.name
net = self._pretrained_output
# Define some adaptation blocks on top of the pre-trained resnet output.
adaptation_blocks = []
adaptation_block_params = [map(
int, i.split('_')) for i in self._config.adaptation_blocks.split('-')]
for i, (depth, num_units) in enumerate(adaptation_block_params):
block = resnet_v2.resnet_v2_block(
'adaptation_block_%d' % i, base_depth=depth, num_units=num_units,
stride=1)
adaptation_blocks.append(block)
# Stack them on top of the resent output.
net = resnet_utils.stack_blocks_dense(
net, adaptation_blocks, output_stride=None)
# Average pool the output.
net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True)
if self._config.emb_connection == 'fc':
# Use fully connected layer to project to embedding layer.
fc_hidden_sizes = self._config.fc_hidden_sizes
if fc_hidden_sizes == 'None':
fc_hidden_sizes = []
else:
fc_hidden_sizes = map(int, fc_hidden_sizes.split('_'))
fc_hidden_keep_prob = self._config.dropout.keep_fc
net = tf.squeeze(net)
for fc_hidden_size in fc_hidden_sizes:
net = slim.layers.fully_connected(net, fc_hidden_size)
if fc_hidden_keep_prob < 1.0:
net = slim.dropout(net, keep_prob=fc_hidden_keep_prob,
is_training=self._is_training)
# Connect last FC layer to embedding.
embedding = slim.layers.fully_connected(net, self._embedding_size,
activation_fn=None)
else:
# Use 1x1 conv layer to project to embedding layer.
embedding = slim.conv2d(
net, self._embedding_size, [1, 1], activation_fn=None,
normalizer_fn=None, scope='embedding')
embedding = tf.squeeze(embedding)
# Optionally L2 normalize the embedding.
if self._embedding_l2:
embedding = tf.nn.l2_normalize(embedding, dim=1)
return embedding
def get_trainable_variables(self):
"""Gets a list of variables to optimize."""
if self._config.finetune:
return tf.trainable_variables()
else:
adaptation_only_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope)
return adaptation_only_vars
class InceptionEmbedderBase(PretrainedEmbedder):
"""Base class for embedders that take pre-trained InceptionV3 activations."""
def __init__(self, config, images, embedding_size, is_training,
embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
super(InceptionEmbedderBase, self).__init__(
config, images, embedding_size, is_training, embedding_l2,
l2_reg_weight, reuse)
def build_pretrained_graph(
self, images, inception_layer, checkpoint, is_training, reuse=False):
"""See baseclass."""
# Build InceptionV3 graph.
inception_output, inception_variables, init_fn = build_inceptionv3_graph(
images, inception_layer, is_training, checkpoint, reuse)
return inception_output, inception_variables, init_fn
class InceptionConvSSFCEmbedder(InceptionEmbedderBase):
"""TCN Embedder V1.
InceptionV3 (mixed_5d) -> conv layers -> spatial softmax ->
fully connected -> optional l2 normalize -> embedding.
"""
def __init__(self, config, images, embedding_size, is_training,
embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
super(InceptionConvSSFCEmbedder, self).__init__(
config, images, embedding_size, is_training, embedding_l2,
l2_reg_weight, reuse)
# Pull out all the hyperparameters specific to this embedder.
self._additional_conv_sizes = config.additional_conv_sizes
self._conv_hidden_keep_prob = config.dropout.keep_conv
self._fc_hidden_sizes = config.fc_hidden_sizes
self._fc_hidden_keep_prob = config.dropout.keep_fc
def construct_embedding(self):
"""Builds a conv -> spatial softmax -> FC adaptation network."""
is_training = self._is_training
normalizer_params = {'is_training': is_training}
with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
self._adaptation_scope = vs.name
with slim.arg_scope(
[slim.layers.conv2d],
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=slim.regularizers.l2_regularizer(
self._l2_reg_weight),
biases_regularizer=slim.regularizers.l2_regularizer(
self._l2_reg_weight)):
with slim.arg_scope(
[slim.layers.fully_connected],
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=slim.regularizers.l2_regularizer(
self._l2_reg_weight),
biases_regularizer=slim.regularizers.l2_regularizer(
self._l2_reg_weight)):
# Input to embedder is pre-trained inception output.
net = self._pretrained_output
# Optionally add more conv layers.
for num_filters in self._additional_conv_sizes:
net = slim.layers.conv2d(
net, num_filters, kernel_size=[3, 3], stride=[1, 1])
net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob,
is_training=is_training)
# Take the spatial soft arg-max of the last convolutional layer.
# This is a form of spatial attention over the activations.
# See more here: http://arxiv.org/abs/1509.06113.
net = tf.contrib.layers.spatial_softmax(net)
self.spatial_features = net
# Add fully connected layers.
net = slim.layers.flatten(net)
for fc_hidden_size in self._fc_hidden_sizes:
net = slim.layers.fully_connected(net, fc_hidden_size)
if self._fc_hidden_keep_prob < 1.0:
net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob,
is_training=is_training)
# Connect last FC layer to embedding.
net = slim.layers.fully_connected(net, self._embedding_size,
activation_fn=None)
# Optionally L2 normalize the embedding.
if self._embedding_l2:
net = tf.nn.l2_normalize(net, dim=1)
return net
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image preprocessing helpers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
from scipy import ndimage
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
def apply_with_random_selector(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
return control_flow_ops.merge([
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
for case in range(num_cases)])[0]
def distorted_bounding_box_crop(image,
bbox,
min_object_covered=0.1,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.05, 1.0),
max_attempts=100,
scope=None):
"""Generates cropped_image using a one of the bboxes randomly distorted.
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
See `tf.image.sample_distorted_bounding_box` for more documentation.
Args:
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
image.
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
area of the image must contain at least this fraction of any bounding box
supplied.
aspect_ratio_range: An optional list of `floats`. The cropped area of the
image must have an aspect ratio = width / height within this range.
area_range: An optional list of `floats`. The cropped area of the image
must contain a fraction of the supplied image within in this range.
max_attempts: An optional `int`. Number of attempts at generating a cropped
region of the image of the specified constraints. After `max_attempts`
failures, return the entire image.
scope: Optional scope for name_scope.
Returns:
A tuple, a 3-D Tensor cropped_image and the distorted bbox
"""
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
# Each bounding box has shape [1, num_boxes, box coords] and
# the coordinates are ordered [ymin, xmin, ymax, xmax].
# A large fraction of image datasets contain a human-annotated bounding
# box delineating the region of the image containing the object of interest.
# We choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.shape(image),
bounding_boxes=bbox,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
# Crop the image to the specified bounding box.
cropped_image = tf.slice(image, bbox_begin, bbox_size)
return cropped_image, distort_bbox
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
"""Distort the color of a Tensor image.
TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
pipfied.
Source:
https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
Each color distortion is non-commutative and thus ordering of the color ops
matters. Ideally we would randomly permute the ordering of the color ops.
Rather then adding that level of complication, we select a distinct ordering
of color ops for each preprocessing thread.
Args:
image: 3-D Tensor containing single image in [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0-3).
fast_mode: Avoids slower ops (random_hue and random_contrast)
scope: Optional scope for name_scope.
Returns:
3-D Tensor color-distorted image on range [0, 1]
Raises:
ValueError: if color_ordering not in [0, 3]
"""
with tf.name_scope(scope, 'distort_color', [image]):
if fast_mode:
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
else:
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
elif color_ordering == 1:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
elif color_ordering == 2:
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
elif color_ordering == 3:
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_brightness(image, max_delta=32. / 255.)
else:
raise ValueError('color_ordering must be in [0, 3]')
# The random_* ops do not necessarily clamp.
return tf.clip_by_value(image, 0.0, 1.0)
def crop_center(image):
"""Returns a cropped square image."""
shape = tf.shape(image)
new_shape = tf.minimum(shape[0], shape[1])
offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad(image):
"""Returns an image padded to be square."""
shape = tf.shape(image)
new_shape = tf.maximum(shape[0], shape[1])
height = shape[0]
width = shape[1]
offset_x = tf.maximum((height-width), 0) // 2
offset_y = tf.maximum((width-height), 0) // 2
image = tf.image.pad_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad_200(image):
"""Returns an image padded width-padded with 200 pixels."""
shape = tf.shape(image)
image = tf.image.pad_to_bounding_box(
image, 0, 200, shape[0], shape[1]+400)
shape = tf.shape(image)
new_shape = tf.minimum(shape[0], shape[1])
offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, new_shape, new_shape)
return image
def pad_crop_central(image, central_fraction=0.875):
"""Pads the image to the maximum length, crops the central fraction."""
# Pad the image to be square.
image = pad(image)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
image = tf.image.central_crop(image, central_fraction=central_fraction)
return image
def crop_image_by_strategy(image, cropping):
"""Crops an image according to a strategy defined in config.
Args:
image: 3-d image tensor.
cropping: str, name of cropping strategy.
Returns:
image: cropped image.
Raises:
ValueError: When unknown cropping strategy is specified.
"""
strategy_to_method = {
'crop_center': crop_center,
'pad': pad,
'pad200': pad_200,
'pad_crop_central': pad_crop_central
}
tf.logging.info('Cropping strategy: %s.' % cropping)
if cropping not in strategy_to_method:
raise ValueError('Unknown cropping strategy: %s' % cropping)
return strategy_to_method[cropping](image)
def scale_augment_crop(image, central_bbox, area_range, min_object_covered):
"""Training time scale augmentation.
Args:
image: 3-d float tensor.
central_bbox: Bounding box defining the central region of interest.
area_range: Range of allowed areas for the augmented bounding box.
min_object_covered: Constraint for the fraction of original image in
augmented bounding box.
Returns:
distort_image: The scaled, cropped image.
"""
(distorted_image, _) = distorted_bounding_box_crop(
image, central_bbox, area_range=area_range,
aspect_ratio_range=(1.0, 1.0),
min_object_covered=min_object_covered)
# Restore the shape since the dynamic slice based upon the bbox_size loses
# the third dimension.
distorted_image.set_shape([None, None, 3])
return distorted_image
def scale_to_inception_range(image):
"""Scales an image in the range [0,1] to [-1,1] as expected by inception."""
# Assert that incoming images have been properly scaled to [0,1].
with tf.control_dependencies(
[tf.assert_less_equal(tf.reduce_max(image), 1.),
tf.assert_greater_equal(tf.reduce_min(image), 0.)]):
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
return image
def resize_image(image, height, width):
"""Resizes an image to a target height and width."""
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, [0])
return image
def crop_or_pad(image, curr_height, curr_width, new, height=True, crop=True):
"""Crops or pads an image.
Args:
image: 3-D float32 `Tensor` image.
curr_height: Int, current height.
curr_width: Int, current width.
new: Int, new width or height.
height: Boolean, cropping or padding for height.
crop: Boolean, True if we're cropping, False if we're padding.
Returns:
image: 3-D float32 `Tensor` image.
"""
# Crop the image to fit the new shape.
abs_diff = tf.abs(new-curr_height)//2 if height else tf.abs(new-curr_width)//2
offset_x = 0 if height else abs_diff
offset_y = abs_diff if height else 0
# We process height first, so always pad/crop to new height.
target_height = new
# We process height first, so pad/crop to new width only if not doing height.
target_width = curr_width if height else new
if crop:
image = tf.image.crop_to_bounding_box(
image, offset_y, offset_x, target_height, target_width)
else:
image = tf.image.pad_to_bounding_box(
image, offset_y, offset_x, target_height, target_width)
return image
def get_central_bbox(min_side, new_size):
"""Gets the central bounding box for an image.
If image is square, returns bounding box [0,0,1,1].
Otherwise, returns the bounding box containing the central
smallest side x smallest side square.
Args:
min_side: Int, size of smallest side in pixels.
new_size: Int, resize image to a square of new_size x new_size pixels.
Returns:
bbox: A 4-D Int `Tensor`, holding the coordinates of the central bounding
box.
"""
max_shape = tf.cast(new_size, tf.float32)
min_shape = tf.cast(min_side, tf.float32)
top_xy = ((max_shape-min_shape)/2)/max_shape
bottom_xy = (min_shape+(max_shape-min_shape)/2)/max_shape
# Create a bbox for the center region of interest.
bbox = tf.stack([[[top_xy, top_xy, bottom_xy, bottom_xy]]])
bbox.set_shape([1, 1, 4])
return bbox
def pad_to_max(image, max_scale):
"""Pads an image to max_scale times the current center crop size.
E.g.: For an image with dimensions 1920x1080 and a max_scale of 1.5,
returns an image that is 1.5 * (1080x1080).
Args:
image: 3-D float32 `Tensor` image.
max_scale: Float, maximum scale of the image, as a multiplier on the
central bounding box.
Returns:
image: 3-D float32 `Tensor` image.
"""
orig_shape = tf.shape(image)
orig_height = orig_shape[0]
orig_width = orig_shape[1]
# Find the smallest side and corresponding new size.
min_side = tf.cast(tf.minimum(orig_height, orig_width), tf.float32)
new_shape = tf.cast(tf.sqrt(max_scale*min_side*min_side), tf.int32)
# Crop or pad height.
# pylint: disable=g-long-lambda
image = tf.cond(
orig_height >= new_shape,
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=True, crop=True),
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=True, crop=False))
# Crop or pad width.
image = tf.cond(
orig_width >= new_shape,
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=False, crop=True),
lambda: crop_or_pad(
image, orig_height, orig_width, new_shape, height=False, crop=False))
# Get the bounding box of the original centered box in the new resized image.
original_bounding_box = get_central_bbox(min_side, new_shape)
return image, original_bounding_box
def scale_up_augmentation(image, max_scale):
"""Scales an image randomly >100% up to some max scale."""
# Pad to max size.
image, original_central_bbox = pad_to_max(image, max_scale)
# Determine area range of the augmented crop, as a percentage of the
# new max area.
# aug_max == 100% of new max area.
aug_max = 1.0
# aug_min == original_area/new_area == original_area/(max_scale*original_area)
# == 1/max_scale.
aug_min = 1.0/max_scale
area_range = (aug_min, aug_max)
# Since we're doing >100% scale, always have the full original crop in frame.
min_object_covered = 1.0
# Get a random scaled, cropped image.
image = scale_augment_crop(image, original_central_bbox, area_range,
min_object_covered)
return image
def scale_down_augmentation(image, min_scale):
"""Scales an image randomly <100% down to some min scale."""
# Crop the center, and consider the whole image the bounding box ROI.
image = crop_center(image)
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
# Determine area range of the augmented crop, as a percentage of the
# original crop center area.
# aug_max == 100% of original area.
area_range = (min_scale, 1.0)
# Get a random scaled, cropped image.
image = scale_augment_crop(image, bbox, area_range, min_scale)
return image
def augment_image_scale(image, min_scale, max_scale, p_scale_up):
"""Training time scale augmentation.
Args:
image: 3-d float tensor representing image.
min_scale: minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Fraction of images scaled up.
Returns:
image: The scale-augmented image.
"""
assert max_scale >= 1.0
assert min_scale <= 1.0
if min_scale == max_scale == 1.0:
tf.logging.info('Min and max scale are 1.0, don`t augment.')
# Do no augmentation, just crop the center.
return crop_center(image)
elif (max_scale == 1.0) and (min_scale < 1.0):
tf.logging.info('Max scale is 1.0, only scale down augment.')
# Always do <100% augmentation.
return scale_down_augmentation(image, min_scale)
elif (min_scale == 1.0) and (max_scale > 1.0):
tf.logging.info('Min scale is 1.0, only scale up augment.')
# Always do >100% augmentation.
return scale_up_augmentation(image, max_scale)
else:
tf.logging.info('Sample both augmentations.')
# Choose to scale image up or down.
rn = tf.random_uniform([], minval=0., maxval=1., dtype=tf.float32)
image = tf.cond(rn >= p_scale_up,
lambda: scale_up_augmentation(image, max_scale),
lambda: scale_down_augmentation(image, min_scale))
return image
def decode_image(image_str):
"""Decodes a jpeg-encoded image string into a image in range [0,1]."""
# Decode jpeg string into np.uint8 tensor.
image = tf.image.decode_jpeg(image_str, channels=3)
# Convert the image to range [0,1].
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
return image
def decode_images(image_strs):
"""Decodes a tensor of image strings."""
return tf.map_fn(decode_image, image_strs, dtype=tf.float32)
def preprocess_training_images(images, height, width, min_scale, max_scale,
p_scale_up, aug_color=True, fast_mode=True):
"""Preprocesses a batch of images for training.
This applies training-time scale and color augmentation, crops/resizes,
and scales images to the [-1,1] range expected by pre-trained Inception nets.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
min_scale: Float, minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: Float, maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Float, fraction of images scaled up.
aug_color: Whether or not to do color augmentation.
fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
def _prepro_train(im):
"""Map this preprocessing function over each image in the batch."""
return preprocess_training_image(
im, height, width, min_scale, max_scale, p_scale_up,
aug_color=aug_color, fast_mode=fast_mode)
return tf.map_fn(_prepro_train, images)
def preprocess_training_image(
image, height, width, min_scale, max_scale, p_scale_up,
aug_color=True, fast_mode=True):
"""Preprocesses an image for training.
Args:
image: A 3-d float tensor representing the image.
height: Target image height.
width: Target image width.
min_scale: Minimum scale of bounding box (as a percentage of full
bounding box) used to crop image during scale augmentation.
max_scale: Minimum scale of bounding box (as a percentage of full
bounding box) used to crop image during scale augmentation.
p_scale_up: Fraction of images to scale >100%.
aug_color: Whether or not to do color augmentation.
fast_mode: Avoids slower ops (random_hue and random_contrast).
Returns:
scaled_image: An scaled image tensor in the range [-1,1].
"""
# Get a random scaled, cropped image.
image = augment_image_scale(image, min_scale, max_scale, p_scale_up)
# Resize image to desired height, width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, [0])
# Optionally augment the color.
# pylint: disable=g-long-lambda
if aug_color:
image = apply_with_random_selector(
image,
lambda x, ordering: distort_color(
x, ordering, fast_mode=fast_mode), num_cases=4)
# Scale to [-1,1] range as expected by inception.
scaled_image = scale_to_inception_range(image)
return scaled_image
def preprocess_test_image(image, height, width, crop_strategy):
"""Preprocesses an image for test/inference.
Args:
image: A 3-d float tensor representing the image.
height: Target image height.
width: Target image width.
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
scaled_image: An scaled image tensor in the range [-1,1].
"""
image = crop_image_by_strategy(image, crop_strategy)
# Resize.
image = resize_image(image, height, width)
# Scale the input range to [-1,1] as expected by inception.
image = scale_to_inception_range(image)
return image
def preprocess_test_images(images, height, width, crop_strategy):
"""Apply test-time preprocessing to a batch of images.
This crops images (given a named strategy for doing so), resizes them,
and scales them to the [-1,1] range expected by pre-trained Inception nets.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
def _prepro_test(im):
"""Map this preprocessing function over each image in the batch."""
return preprocess_test_image(im, height, width, crop_strategy)
if len(images.shape) == 3:
return _prepro_test(images)
else:
return tf.map_fn(_prepro_test, images)
def preprocess_images(
images, is_training, height, width,
min_scale=1.0, max_scale=1.0, p_scale_up=0.0,
aug_color=True, fast_mode=True,
crop_strategy='pad_crop_central'):
"""Preprocess a batch of images.
Args:
images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
is_training: Boolean, whether to preprocess them for training or test.
height: Int, height in pixels to resize image to.
width: Int, width in pixels to resize image to.
min_scale: Float, minimum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
max_scale: Float, maximum scale augmentation allowed, as a fraction of the
central min_side * min_side area of the original image.
p_scale_up: Float, fraction of images scaled up.
aug_color: Whether or not to do color augmentation.
fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
crop_strategy: String, name of the strategy used to crop test-time images.
Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
Returns:
preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
"""
if is_training:
return preprocess_training_images(
images, height, width, min_scale, max_scale,
p_scale_up, aug_color, fast_mode)
else:
return preprocess_test_images(
images, height, width, crop_strategy)
def cv2rotateimage(image, angle):
"""Efficient rotation if 90 degrees rotations, slow otherwise.
Not a tensorflow function, using cv2 and scipy on numpy arrays.
Args:
image: a numpy array with shape [height, width, channels].
angle: the rotation angle in degrees in the range [-180, 180].
Returns:
The rotated image.
"""
# Limit angle to [-180, 180] degrees.
assert angle <= 180 and angle >= -180
if angle == 0:
return image
# Efficient rotations.
if angle == -90:
image = cv2.transpose(image)
image = cv2.flip(image, 0)
elif angle == 90:
image = cv2.transpose(image)
image = cv2.flip(image, 1)
elif angle == 180 or angle == -180:
image = cv2.flip(image, 0)
image = cv2.flip(image, 1)
else: # Slow rotation.
image = ndimage.interpolation.rotate(image, 270)
return image
def cv2resizeminedge(image, min_edge_size):
"""Resize smallest edge of image to min_edge_size."""
assert min_edge_size >= 0
height, width = (image.shape[0], image.shape[1])
new_height, new_width = (0, 0)
if height > width:
new_width = min_edge_size
new_height = int(height * new_width / float(width))
else:
new_height = min_edge_size
new_width = int(width * new_height / float(height))
return cv2.resize(image, (new_width, new_height),
interpolation=cv2.INTER_AREA)
def shapestring(array):
"""Returns a compact string describing shape of an array."""
shape = array.shape
s = str(shape[0])
for i in range(1, len(shape)):
s += 'x' + str(shape[i])
return s
def unscale_jpeg_encode(ims):
"""Unscales pixel values and jpeg encodes preprocessed image.
Args:
ims: A 4-D float32 `Tensor` holding preprocessed images.
Returns:
im_strings: A 1-D string `Tensor` holding images that have been unscaled
(reversing the inception [-1,1] scaling), and jpeg encoded.
"""
ims /= 2.0
ims += 0.5
ims *= 255.0
ims = tf.clip_by_value(ims, 0, 255)
ims = tf.cast(ims, tf.uint8)
im_strings = tf.map_fn(
lambda x: tf.image.encode_jpeg(x, format='rgb', quality=100),
ims, dtype=tf.string)
return im_strings
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Trains TCN models (and baseline comparisons)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from estimators.get_estimator import get_estimator
from utils import util
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string('master', 'local',
'BNS name of the TensorFlow master to use')
tf.app.flags.DEFINE_string(
'logdir', '/tmp/tcn', 'Directory where to write event logs.')
tf.app.flags.DEFINE_integer(
'task', 0, 'Task id of the replica running the training.')
tf.app.flags.DEFINE_integer(
'ps_tasks', 0, 'Number of tasks in the ps job. If 0 no ps job is used.')
FLAGS = tf.app.flags.FLAGS
def main(_):
"""Runs main training loop."""
# Parse config dict from yaml config files / command line flags.
config = util.ParseConfigsToLuaTable(
FLAGS.config_paths, FLAGS.model_params, save=True, logdir=FLAGS.logdir)
# Choose an estimator based on training strategy.
estimator = get_estimator(config, FLAGS.logdir)
# Run training
estimator.train()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=line-too-long,g-explicit-length-test
"""A convenience class replicating some lua table syntax with a python dict.
In general, should behave like a dictionary except that we can use dot notation
to access keys. Users should be careful to only provide keys suitable for
instance variable names.
Nota bene: do not use the key "keys" since it will collide with the method keys.
Usage example:
>>> t = T(a=5,b='kaw', c=T(v=[],x=33))
>>> t.a
5
>>> t.z = None
>>> print t
T(a=5, z=None, c=T(x=33, v=[]), b='kaw')
>>> t2 = T({'h':'f','x':4})
>>> t2
T(h='f', x=4)
>>> t2['x']
4
"""
class T(object):
"""Class for emulating lua tables."""
def __init__(self, *args, **kwargs):
if len(args) > 1 or (len(args) == 1 and len(kwargs) > 0):
errmsg = '''constructor only allows a single dict as a positional
argument or keyword arguments'''
raise ValueError(errmsg)
if len(args) == 1 and isinstance(args[0], dict):
self.__dict__.update(args[0])
else:
self.__dict__.update(kwargs)
def __repr__(self):
fmt = ', '.join('%s=%s' for i in range(len(self.__dict__)))
kwargstr = fmt % tuple(
x for tup in self.__dict__.items() for x in [str(tup[0]), repr(tup[1])])
return 'T(' + kwargstr + ')'
def __getitem__(self, key):
return self.__dict__[key]
def __setitem__(self, key, val):
self.__dict__[key] = val
def __delitem__(self, key):
del self.__dict__[key]
def __iter__(self):
return iter(self.__dict__)
def __len__(self):
return len(self.__dict__)
def keys(self): # Needed for dict(T( ... )) to work.
return self.__dict__.keys()
def iteritems(self):
return self.__dict__.iteritems()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A utility class for reporting processing progress."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
class Progress(object):
"""A utility class for reporting processing progress."""
def __init__(self, target_size):
self.target_size = target_size
self.current_size = 0
self.start_time = datetime.datetime.now()
def Update(self, current_size):
"""Replaces internal current_size with current_size."""
self.current_size = current_size
def Add(self, size):
"""Increments internal current_size by size."""
self.current_size += size
def __str__(self):
processed = 1e-5 + self.current_size / float(self.target_size)
current_time = datetime.datetime.now()
elapsed = current_time - self.start_time
eta = datetime.timedelta(
seconds=elapsed.total_seconds() / processed - elapsed.total_seconds())
return "%d / %d (elapsed %s eta %s)" % (
self.current_size, self.target_size,
str(elapsed).split(".")[0],
str(eta).split(".")[0])
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""General utility functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import six
from utils.luatables import T
import tensorflow as tf
import yaml
from yaml.constructor import ConstructorError
# pylint: disable=invalid-name
def GetFilesRecursively(topdir):
"""Gets all records recursively for some topdir.
Args:
topdir: String, path to top directory.
Returns:
allpaths: List of Strings, full paths to all leaf records.
Raises:
ValueError: If there are no files found for this directory.
"""
assert topdir
topdir = os.path.expanduser(topdir)
allpaths = []
for path, _, leaffiles in tf.gfile.Walk(topdir):
if leaffiles:
allpaths.extend([os.path.join(path, i) for i in leaffiles])
if not allpaths:
raise ValueError('No files found for top directory %s' % topdir)
return allpaths
def NoDuplicatesConstructor(loader, node, deep=False):
"""Check for duplicate keys."""
mapping = {}
for key_node, value_node in node.value:
key = loader.construct_object(key_node, deep=deep)
value = loader.construct_object(value_node, deep=deep)
if key in mapping:
raise ConstructorError('while constructing a mapping', node.start_mark,
'found duplicate key (%s)' % key,
key_node.start_mark)
mapping[key] = value
return loader.construct_mapping(node, deep)
def WriteConfigAsYaml(config, logdir, filename):
"""Writes a config dict as yaml to logdir/experiment.yml."""
if not tf.gfile.Exists(logdir):
tf.gfile.MakeDirs(logdir)
config_filename = os.path.join(logdir, filename)
with tf.gfile.GFile(config_filename, 'w') as f:
f.write(yaml.dump(config))
tf.logging.info('wrote config to %s', config_filename)
def LoadConfigDict(config_paths, model_params):
"""Loads config dictionary from specified yaml files or command line yaml."""
# Ensure that no duplicate keys can be loaded (causing pain).
yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
NoDuplicatesConstructor)
# Handle either ',' or '#' separated config lists, since borg will only
# accept '#'.
sep = ',' if ',' in config_paths else '#'
# Load flags from config file.
final_config = {}
if config_paths:
for config_path in config_paths.split(sep):
config_path = config_path.strip()
if not config_path:
continue
config_path = os.path.abspath(config_path)
tf.logging.info('Loading config from %s', config_path)
with tf.gfile.GFile(config_path.strip()) as config_file:
config_flags = yaml.load(config_file)
final_config = DeepMergeDict(final_config, config_flags)
if model_params:
model_params = MaybeLoadYaml(model_params)
final_config = DeepMergeDict(final_config, model_params)
tf.logging.info('Final Config:\n%s', yaml.dump(final_config))
return final_config
def MaybeLoadYaml(item):
"""Parses item if it's a string. If it's a dictionary it's returned as-is."""
if isinstance(item, six.string_types):
return yaml.load(item)
elif isinstance(item, dict):
return item
else:
raise ValueError('Got {}, expected YAML string or dict', type(item))
def DeepMergeDict(dict_x, dict_y, path=None):
"""Recursively merges dict_y into dict_x."""
if path is None: path = []
for key in dict_y:
if key in dict_x:
if isinstance(dict_x[key], dict) and isinstance(dict_y[key], dict):
DeepMergeDict(dict_x[key], dict_y[key], path + [str(key)])
elif dict_x[key] == dict_y[key]:
pass # same leaf value
else:
dict_x[key] = dict_y[key]
else:
dict_x[key] = dict_y[key]
return dict_x
def ParseConfigsToLuaTable(config_paths, extra_model_params=None,
save=False, save_name='final_training_config.yml',
logdir=None):
"""Maps config_paths and extra_model_params to a Luatable-like object."""
# Parse config dict from yaml config files / command line flags.
config = LoadConfigDict(config_paths, extra_model_params)
if save:
WriteConfigAsYaml(config, logdir, save_name)
# Convert config dictionary to T object with dot notation.
config = RecursivelyConvertToLuatable(config)
return config
def SetNestedValue(d, keys, value):
"""Sets a value in a nested dictionary.
Example:
d = {}, keys = ['data','augmentation','minscale'], value = 1.0.
returns {'data': {'augmentation' : {'minscale': 1.0 }}}
Args:
d: A dictionary to set a nested value in.
keys: list of dict keys nesting left to right.
value: the nested value to set.
Returns:
None
"""
for key in keys[:-1]:
d = d.setdefault(key, {})
d[keys[-1]] = value
def RecursivelyConvertToLuatable(yaml_dict):
"""Converts a dictionary to a LuaTable-like T object."""
if isinstance(yaml_dict, dict):
yaml_dict = T(yaml_dict)
for key, item in yaml_dict.iteritems():
if isinstance(item, dict):
yaml_dict[key] = RecursivelyConvertToLuatable(item)
return yaml_dict
def KNNIds(query_vec, target_seq, k=1):
"""Gets the knn ids to the query vec from the target sequence."""
sorted_distances = KNNIdsWithDistances(query_vec, target_seq, k)
return [i[0] for i in sorted_distances]
def KNNIdsWithDistances(query_vec, target_seq, k=1):
"""Gets the knn ids to the query vec from the target sequence."""
if not isinstance(np.array(target_seq), np.ndarray):
target_seq = np.array(target_seq)
assert np.shape(query_vec) == np.shape(target_seq[0])
distances = [(i, np.linalg.norm(query_vec-target_vec)) for (
i, target_vec) in enumerate(target_seq)]
sorted_distances = sorted(distances, key=lambda x: x[1])
return sorted_distances[:k]
def CopyLocalConfigsToCNS(outdir, configs, gfs_user):
"""Copies experiment yaml config files to the job_logdir on /cns."""
assert configs
assert outdir
conf_files = configs.split(',')
for conf_file in conf_files:
copy_command = 'fileutil --gfs_user %s cp -f %s %s' % (
gfs_user, conf_file, outdir)
tf.logging.info(copy_command)
os.system(copy_command)
def pairwise_distances(feature, squared=True):
"""Computes the pairwise distance matrix in numpy.
Args:
feature: 2-D numpy array of size [number of data, feature dimension]
squared: Boolean. If true, output is the pairwise squared euclidean
distance matrix; else, output is the pairwise euclidean distance matrix.
Returns:
pdists: 2-D numpy array of size
[number of data, number of data].
"""
triu = np.triu_indices(feature.shape[0], 1)
upper_tri_pdists = np.linalg.norm(feature[triu[1]] - feature[triu[0]], axis=1)
if squared:
upper_tri_pdists **= 2.
num_data = feature.shape[0]
pdists = np.zeros((num_data, num_data))
pdists[np.triu_indices(num_data, 1)] = upper_tri_pdists
# Make symmetrical.
pdists = pdists + pdists.T - np.diag(
pdists.diagonal())
return pdists
def is_tfrecord_input(inp):
"""Checks if input is a TFRecord or list of TFRecords."""
def _is_tfrecord(inp):
if not isinstance(inp, str):
return False
_, extension = os.path.splitext(inp)
return extension == '.tfrecord'
if isinstance(inp, str):
return _is_tfrecord(inp)
if isinstance(inp, list):
return all(map(_is_tfrecord, inp))
return False
def is_np_array(inp):
if isinstance(inp, np.ndarray):
return True
if isinstance(inp, list):
return all([isinstance(i, np.ndarray) for i in inp])
return False
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Visualizes embeddings in tensorboard.
Usage:
root=experimental/users/sermanet/imitation/mirror && \
blaze build -c opt --copt=-mavx --config=cuda $root:visualize_embeddings && \
blaze-bin/$root/visualize_embeddings \
--checkpointdir $checkpointdir \
--checkpoint_iter $checkpoint_iter \
--embedding_records $embedding_records \
--outdir $outdir \
--num_embed 1000 \
--sprite_dim 64 \
--config_paths $configs \
--logtostderr
blaze build third_party/tensorboard && \
blaze-bin/third_party/tensorboard/tensorboard --logdir=$outdir
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import cv2
import numpy as np
from scipy.misc import imresize
from scipy.misc import imsave
from estimators.get_estimator import get_estimator
from utils import util
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string(
'checkpoint_iter', '', 'Evaluate this specific checkpoint.')
tf.app.flags.DEFINE_string(
'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
tf.app.flags.DEFINE_string(
'outdir', '/tmp/tcn', 'Path to write tensorboard info to.')
tf.app.flags.DEFINE_integer(
'num_embed', 4000, 'Number of embeddings.')
tf.app.flags.DEFINE_integer(
'num_sequences', -1, 'Number of sequences, -1 for all.')
tf.app.flags.DEFINE_integer(
'sprite_dim', 64, 'Height, width of the square sprite image.')
tf.app.flags.DEFINE_string(
'embedding_records', None, 'path to embedding records')
FLAGS = tf.app.flags.FLAGS
def images_to_sprite(data):
"""Creates the sprite image along with any necessary padding.
Taken from: https://github.com/tensorflow/tensorflow/issues/6322
Args:
data: NxHxW[x3] tensor containing the images.
Returns:
data: Properly shaped HxWx3 image with any necessary padding.
"""
if len(data.shape) == 3:
data = np.tile(data[..., np.newaxis], (1, 1, 1, 3))
data = data.astype(np.float32)
min_v = np.min(data.reshape((data.shape[0], -1)), axis=1)
data = (data.transpose(1, 2, 3, 0) - min_v).transpose(3, 0, 1, 2)
max_v = np.max(data.reshape((data.shape[0], -1)), axis=1)
data = (data.transpose(1, 2, 3, 0) / max_v).transpose(3, 0, 1, 2)
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = ((0, n ** 2 - data.shape[0]), (0, 0),
(0, 0)) + ((0, 0),) * (data.ndim - 3)
data = np.pad(data, padding, mode='constant',
constant_values=0)
# Tile the individual thumbnails into an image.
data = data.reshape((n, n) + data.shape[1:]).transpose(
(0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
data = (data * 255).astype(np.uint8)
return data
def main(_):
"""Runs main labeled eval loop."""
# Parse config dict from yaml config files / command line flags.
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Choose an estimator based on training strategy.
checkpointdir = FLAGS.checkpointdir
checkpoint_path = os.path.join(
'%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter))
estimator = get_estimator(config, checkpointdir)
# Get records to embed.
validation_dir = FLAGS.embedding_records
validation_records = util.GetFilesRecursively(validation_dir)
sequences_to_data = {}
for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
validation_records, checkpoint_path, config.data.embed_batch_size,
num_sequences=FLAGS.num_sequences):
sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
all_embeddings = np.zeros((0, config.embedding_size))
all_ims = []
all_seqnames = []
num_embeddings = FLAGS.num_embed
# Concatenate all views from all sequences into a big flat list.
for seqname, data in sequences_to_data.iteritems():
embs = data['embeddings']
ims = data['images']
for v in range(config.data.num_views):
for (emb, im) in zip(embs[v], ims[v]):
all_embeddings = np.append(all_embeddings, [emb], axis=0)
all_ims.append(im)
all_seqnames.append(seqname)
# Choose N indices uniformly from all images.
random_indices = range(all_embeddings.shape[0])
random.shuffle(random_indices)
viz_indices = random_indices[:num_embeddings]
# Extract embs.
viz_embs = np.array(all_embeddings[viz_indices])
# Extract and decode ims.
viz_ims = list(np.array(all_ims)[viz_indices])
decoded_ims = []
sprite_dim = FLAGS.sprite_dim
for i, im in enumerate(viz_ims):
if i % 100 == 0:
print('Decoding image %d/%d.' % (i, num_embeddings))
nparr_i = np.fromstring(str(im), np.uint8)
img_np = cv2.imdecode(nparr_i, 1)
img_np = img_np[..., [2, 1, 0]]
img_np = imresize(img_np, [sprite_dim, sprite_dim, 3])
decoded_ims.append(img_np)
decoded_ims = np.array(decoded_ims)
# Extract sequence names.
outdir = FLAGS.outdir
# The embedding variable, which needs to be stored
# Note this must a Variable not a Tensor!
embedding_var = tf.Variable(viz_embs, name='viz_embs')
with tf.Session() as sess:
sess.run(embedding_var.initializer)
summary_writer = tf.summary.FileWriter(outdir)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# Comment out if you don't want sprites
embedding.sprite.image_path = os.path.join(outdir, 'sprite.png')
embedding.sprite.single_image_dim.extend(
[decoded_ims.shape[1], decoded_ims.shape[1]])
projector.visualize_embeddings(summary_writer, config)
saver = tf.train.Saver([embedding_var])
saver.save(sess, os.path.join(outdir, 'model2.ckpt'), 1)
sprite = images_to_sprite(decoded_ims)
imsave(os.path.join(outdir, 'sprite.png'), sprite)
if __name__ == '__main__':
tf.app.run(main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment