Commit cbd571f2 authored by Corey Lynch's avatar Corey Lynch
Browse files

Adding TCN.

parent 69cf6fca
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SVTCN estimator implementation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import data_providers
import model as model_module
from estimators import base_estimator
from estimators import svtcn_loss
import tensorflow as tf
class SVTCNEstimator(base_estimator.BaseEstimator):
"""Single-view TCN Estimator base class."""
def __init__(self, config, logdir):
super(SVTCNEstimator, self).__init__(config, logdir)
def construct_input_fn(self, records, is_training):
"""See base class."""
config = self._config
num_views = config.data.num_views
num_parallel_calls = config.data.num_parallel_calls
sequence_prefetch_size = config.data.sequence_prefetch_size
batch_prefetch_size = config.data.batch_prefetch_size
def input_fn():
"""Provides input to SVTCN models."""
(images_preprocessed,
images_raw,
timesteps) = data_providers.singleview_tcn_provider(
file_list=records,
preprocess_fn=self.preprocess_data,
num_views=num_views,
is_training=is_training,
batch_size=self._batch_size,
num_parallel_calls=num_parallel_calls,
sequence_prefetch_size=sequence_prefetch_size,
batch_prefetch_size=batch_prefetch_size)
if config.logging.summary.image_summaries and is_training:
tf.summary.image('training/svtcn_images', images_raw)
features = {'batch_preprocessed': images_preprocessed}
return (features, timesteps)
return input_fn
def forward(self, images, is_training, reuse=False):
"""See base class."""
embedder_strategy = self._config.embedder_strategy
embedder = model_module.get_embedder(
embedder_strategy,
self._config,
images,
is_training=is_training, reuse=reuse)
embeddings = embedder.construct_embedding()
if is_training:
self.variables_to_train = embedder.get_trainable_variables()
self.pretrained_init_fn = embedder.init_fn
return embeddings
class SVTCNTripletEstimator(SVTCNEstimator):
"""Single-View TCN with semihard triplet loss."""
def __init__(self, config, logdir):
super(SVTCNTripletEstimator, self).__init__(config, logdir)
def define_loss(self, embeddings, timesteps, is_training):
"""See base class."""
pos_radius = self._config.svtcn.pos_radius
neg_radius = self._config.svtcn.neg_radius
margin = self._config.triplet_semihard.margin
loss = svtcn_loss.singleview_tcn_loss(
embeddings, timesteps, pos_radius, neg_radius, margin=margin)
self._loss = loss
if is_training:
tf.summary.scalar('training/svtcn_loss', loss)
return loss
def define_eval_metric_ops(self):
"""See base class."""
return {'validation/svtcn_loss': tf.metrics.mean(self._loss)}
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""This implements single view TCN triplet loss."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def pairwise_squared_distance(feature):
"""Computes the squared pairwise distance matrix.
output[i, j] = || feature[i, :] - feature[j, :] ||_2^2
Args:
feature: 2-D Tensor of size [number of data, feature dimension]
Returns:
pairwise_squared_distances: 2-D Tensor of size
[number of data, number of data]
"""
pairwise_squared_distances = tf.add(
tf.reduce_sum(
tf.square(feature), axis=1, keep_dims=True),
tf.reduce_sum(
tf.square(tf.transpose(feature)), axis=0,
keep_dims=True)) - 2.0 * tf.matmul(feature, tf.transpose(feature))
# Deal with numerical inaccuracies. Set small negatives to zero.
pairwise_squared_distances = tf.maximum(pairwise_squared_distances, 0.0)
return pairwise_squared_distances
def masked_maximum(data, mask, dim=1):
"""Computes the axis wise maximum over chosen elements.
Args:
data: N-D Tensor.
mask: N-D Tensor of zeros or ones.
dim: The dimension over which to compute the maximum.
Returns:
masked_maximums: N-D Tensor.
The maximized dimension is of size 1 after the operation.
"""
axis_minimums = tf.reduce_min(data, dim, keep_dims=True)
masked_maximums = tf.reduce_max(
tf.multiply(
data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums
return masked_maximums
def masked_minimum(data, mask, dim=1):
"""Computes the axis wise minimum over chosen elements.
Args:
data: 2-D Tensor of size [n, m].
mask: 2-D Boolean Tensor of size [n, m].
dim: The dimension over which to compute the minimum.
Returns:
masked_minimums: N-D Tensor.
The minimized dimension is of size 1 after the operation.
"""
axis_maximums = tf.reduce_max(data, dim, keep_dims=True)
masked_minimums = tf.reduce_min(
tf.multiply(
data - axis_maximums, mask), dim, keep_dims=True) + axis_maximums
return masked_minimums
def singleview_tcn_loss(
embeddings, timesteps, pos_radius, neg_radius, margin=1.0,
sequence_ids=None, multiseq=False):
"""Computes the single view triplet loss with semi-hard negative mining.
The loss encourages the positive distances (between a pair of embeddings with
the same labels) to be smaller than the minimum negative distance among
which are at least greater than the positive distance plus the margin constant
(called semi-hard negative) in the mini-batch. If no such negative exists,
uses the largest negative distance instead.
Anchor, positive, negative selection is as follow:
Anchors: We consider every embedding timestep as an anchor.
Positives: pos_radius defines a radius (in timesteps) around each anchor from
which positives can be drawn. E.g. An anchor with t=10 and a pos_radius of
2 produces a set of 4 (anchor,pos) pairs [(a=10, p=8), ... (a=10, p=12)].
Negatives: neg_radius defines a boundary (in timesteps) around each anchor,
outside of which negatives can be drawn. E.g. An anchor with t=10 and a
neg_radius of 4 means negatives can be any t_neg where t_neg < 6 and
t_neg > 14.
Args:
embeddings: 2-D Tensor of embedding vectors.
timesteps: 1-D Tensor with shape [batch_size, 1] of sequence timesteps.
pos_radius: int32; the size of the window (in timesteps) around each anchor
timestep that a positive can be drawn from.
neg_radius: int32; the size of the window (in timesteps) around each anchor
timestep that defines a negative boundary. Negatives can only be chosen
where negative timestep t is < negative boundary min or > negative
boundary max.
margin: Float; the triplet loss margin hyperparameter.
sequence_ids: (Optional) 1-D Tensor with shape [batch_size, 1] of sequence
ids. Together (sequence_id, sequence_timestep) give us a unique index for
each image if we have multiple sequences in a batch.
multiseq: Boolean, whether or not the batch is composed of multiple
sequences (with possibly colliding timesteps).
Returns:
triplet_loss: tf.float32 scalar.
"""
assert neg_radius > pos_radius
# If timesteps shape isn't [batchsize, 1], reshape to [batch_size, 1].
tshape = tf.shape(timesteps)
assert tshape.shape == 2 or tshape.shape == 1
if tshape.shape == 1:
timesteps = tf.reshape(timesteps, [tshape[0], 1])
# Build pairwise squared distance matrix.
pdist_matrix = pairwise_squared_distance(embeddings)
# Build pairwise binary adjacency matrix, where adjacency[i,j] is True
# if timestep j is inside the positive range for timestep i and both
# timesteps come from the same sequence.
pos_radius = tf.cast(pos_radius, tf.int32)
if multiseq:
# If sequence_ids shape isn't [batchsize, 1], reshape to [batch_size, 1].
tshape = tf.shape(sequence_ids)
assert tshape.shape == 2 or tshape.shape == 1
if tshape.shape == 1:
sequence_ids = tf.reshape(sequence_ids, [tshape[0], 1])
# Build pairwise binary adjacency matrix based on sequence_ids
sequence_adjacency = tf.equal(sequence_ids, tf.transpose(sequence_ids))
# Invert so we can select negatives only.
sequence_adjacency_not = tf.logical_not(sequence_adjacency)
in_pos_range = tf.logical_and(
tf.less_equal(
tf.abs(timesteps - tf.transpose(timesteps)), pos_radius),
sequence_adjacency)
# Build pairwise binary discordance matrix, where discordance[i,j] is True
# if timestep j is inside the negative range for timestep i or if the
# timesteps come from different sequences.
in_neg_range = tf.logical_or(
tf.greater(tf.abs(timesteps - tf.transpose(timesteps)), neg_radius),
sequence_adjacency_not
)
else:
in_pos_range = tf.less_equal(
tf.abs(timesteps - tf.transpose(timesteps)), pos_radius)
in_neg_range = tf.greater(tf.abs(timesteps - tf.transpose(timesteps)),
neg_radius)
batch_size = tf.size(timesteps)
# compute the mask
pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
mask = tf.logical_and(
tf.tile(in_neg_range, [batch_size, 1]),
tf.greater(pdist_matrix_tile,
tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
mask_final = tf.reshape(
tf.greater(
tf.reduce_sum(
tf.cast(
mask, dtype=tf.float32), 1, keep_dims=True),
0.0), [batch_size, batch_size])
mask_final = tf.transpose(mask_final)
in_neg_range = tf.cast(in_neg_range, dtype=tf.float32)
mask = tf.cast(mask, dtype=tf.float32)
# negatives_outside: smallest D_an where D_an > D_ap
negatives_outside = tf.reshape(
masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
negatives_outside = tf.transpose(negatives_outside)
# negatives_inside: largest D_an
negatives_inside = tf.tile(
masked_maximum(pdist_matrix, in_neg_range), [1, batch_size])
semi_hard_negatives = tf.where(
mask_final, negatives_outside, negatives_inside)
loss_mat = tf.add(margin, pdist_matrix - semi_hard_negatives)
mask_positives = tf.cast(
in_pos_range, dtype=tf.float32) - tf.diag(tf.ones([batch_size]))
# In lifted-struct, the authors multiply 0.5 for upper triangular
# in semihard, they take all positive pairs except the diagonal.
num_positives = tf.reduce_sum(mask_positives)
triplet_loss = tf.truediv(
tf.reduce_sum(tf.maximum(tf.multiply(loss_mat, mask_positives), 0.0)),
num_positives,
name='triplet_svtcn_loss')
return triplet_loss
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for svtcn_loss.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
from estimators import svtcn_loss
import tensorflow as tf
class SVTCNLoss(tf.test.TestCase):
def testSVTCNLoss(self):
with self.test_session():
num_data = 64
num_sequences = 2
num_data_per_seq = num_data // num_sequences
feat_dim = 6
margin = 1.0
times = np.tile(np.arange(num_data_per_seq, dtype=np.int32),
num_sequences)
times = np.reshape(times, [times.shape[0], 1])
sequence_ids = np.concatenate(
[np.ones(num_data_per_seq)*i for i in range(num_sequences)])
sequence_ids = np.reshape(sequence_ids, [sequence_ids.shape[0], 1])
pos_radius = 6
neg_radius = 12
embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
# Compute the loss in NP
# Get a positive mask, i.e. indices for each time index
# that are inside the positive range.
in_pos_range = np.less_equal(
np.abs(times - times.transpose()), pos_radius)
# Get a negative mask, i.e. indices for each time index
# that are inside the negative range (> t + (neg_mult * pos_radius)
# and < t - (neg_mult * pos_radius).
in_neg_range = np.greater(np.abs(times - times.transpose()), neg_radius)
sequence_adjacency = sequence_ids == sequence_ids.T
sequence_adjacency_not = np.logical_not(sequence_adjacency)
pdist_matrix = euclidean_distances(embedding, squared=True)
loss_np = 0.0
num_positives = 0.0
for i in range(num_data):
for j in range(num_data):
if in_pos_range[i, j] and i != j and sequence_adjacency[i, j]:
num_positives += 1.0
pos_distance = pdist_matrix[i][j]
neg_distances = []
for k in range(num_data):
if in_neg_range[i, k] or sequence_adjacency_not[i, k]:
neg_distances.append(pdist_matrix[i][k])
neg_distances.sort() # sort by distance
chosen_neg_distance = neg_distances[0]
for l in range(len(neg_distances)):
chosen_neg_distance = neg_distances[l]
if chosen_neg_distance > pos_distance:
break
loss_np += np.maximum(
0.0, margin - chosen_neg_distance + pos_distance)
loss_np /= num_positives
# Compute the loss in TF
loss_tf = svtcn_loss.singleview_tcn_loss(
embeddings=tf.convert_to_tensor(embedding),
timesteps=tf.convert_to_tensor(times),
pos_radius=pos_radius,
neg_radius=neg_radius,
margin=margin,
sequence_ids=tf.convert_to_tensor(sequence_ids),
multiseq=True
)
loss_tf = loss_tf.eval()
self.assertAllClose(loss_np, loss_tf)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Calculates running validation of TCN models (and baseline comparisons)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from estimators.get_estimator import get_estimator
from utils import util
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string('master', 'local',
'BNS name of the TensorFlow master to use')
tf.app.flags.DEFINE_string(
'logdir', '/tmp/tcn', 'Directory where to write event logs.')
FLAGS = tf.app.flags.FLAGS
def main(_):
"""Runs main eval loop."""
# Parse config dict from yaml config files / command line flags.
logdir = FLAGS.logdir
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Choose an estimator based on training strategy.
estimator = get_estimator(config, logdir)
# Wait for the first checkpoint file to be written.
while not tf.train.latest_checkpoint(logdir):
tf.logging.info('Waiting for a checkpoint file...')
time.sleep(10)
# Run validation.
while True:
estimator.evaluate()
if __name__ == '__main__':
tf.app.run()
This image diff could not be displayed because it is too large. You can view the blob instead.
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Generates imitation videos.
Generate single pairwise imitation videos:
blaze build -c opt --config=cuda --copt=-mavx \
learning/brain/research/tcn/generate_videos && \
blaze-bin/learning/brain/research/tcn/generate_videos \
--logtostderr \
--config_paths $config_paths \
--checkpointdir $checkpointdir \
--checkpoint_iter $checkpoint_iter \
--query_records_dir $query_records_dir \
--target_records_dir $target_records_dir \
--outdir $outdir \
--mode single \
--num_query_sequences 1 \
--num_target_sequences -1
# Generate imitation videos with multiple sequences in the target set:
query_records_path
blaze build -c opt --config=cuda --copt=-mavx \
learning/brain/research/tcn/generate_videos && \
blaze-bin/learning/brain/research/tcn/generate_videos \
--logtostderr \
--config_paths $config_paths \
--checkpointdir $checkpointdir \
--checkpoint_iter $checkpoint_iter \
--query_records_dir $query_records_dir \
--target_records_dir $target_records_dir \
--outdir $outdir \
--num_multi_targets 1 \
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import tensorflow as tf
import os
import matplotlib
matplotlib.use("pdf")
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
from estimators.get_estimator import get_estimator
from utils import util
tf.logging.set_verbosity(tf.logging.INFO)
tf.flags.DEFINE_string(
'config_paths', '',
"""
Path to a YAML configuration files defining FLAG values. Multiple files
can be separated by the `#` symbol. Files are merged recursively. Setting
a key in these files is equivalent to setting the FLAG value with
the same name.
""")
tf.flags.DEFINE_string(
'model_params', '{}', 'YAML configuration string for the model parameters.')
tf.app.flags.DEFINE_string(
'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
tf.app.flags.DEFINE_string(
'checkpoint_iter', '', 'Checkpoint iter to use.')
tf.app.flags.DEFINE_integer(
'num_multi_targets', -1,
'Number of imitation vids in the target set per imitation video.')
tf.app.flags.DEFINE_string(
'outdir', '/tmp/tcn', 'Path to write embeddings to.')
tf.app.flags.DEFINE_string(
'mode', 'single', 'single | multi. Single means generate imitation vids'
'where query is being imitated by single sequence. Multi'
'means generate imitation vids where query is being'
'imitated by multiple.')
tf.app.flags.DEFINE_string('query_records_dir', '',
'Directory of image tfrecords.')
tf.app.flags.DEFINE_string('target_records_dir', '',
'Directory of image tfrecords.')
tf.app.flags.DEFINE_integer('query_view', 1,
'Viewpoint of the query video.')
tf.app.flags.DEFINE_integer('target_view', 0,
'Viewpoint of the imitation video.')
tf.app.flags.DEFINE_integer('smoothing_window', 5,
'Number of frames to smooth over.')
tf.app.flags.DEFINE_integer('num_query_sequences', -1,
'Number of query sequences to embed.')
tf.app.flags.DEFINE_integer('num_target_sequences', -1,
'Number of target sequences to embed.')
FLAGS = tf.app.flags.FLAGS
def SmoothEmbeddings(embs):
"""Temporally smoothes a sequence of embeddings."""
new_embs = []
window = int(FLAGS.smoothing_window)
for i in range(len(embs)):
min_i = max(i-window, 0)
max_i = min(i+window, len(embs))
new_embs.append(np.mean(embs[min_i:max_i, :], axis=0))
return np.array(new_embs)
def MakeImitationVideo(
outdir, vidname, query_im_strs, knn_im_strs, height=640, width=360):
"""Creates a KNN imitation video.
For each frame in vid0, pair with the frame at index in knn_indices in
vids1. Write video to disk.
Args:
outdir: String, directory to write videos.
vidname: String, name of video.
query_im_strs: Numpy array holding query image strings.
knn_im_strs: Numpy array holding knn image strings.
height: Int, height of raw images.
width: Int, width of raw images.
"""
if not tf.gfile.Exists(outdir):
tf.gfile.MakeDirs(outdir)
vid_path = os.path.join(outdir, vidname)
combined = zip(query_im_strs, knn_im_strs)
# Create and write the video.
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_aspect('equal')
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
im = ax.imshow(
np.zeros((height, width*2, 3)), cmap='gray', interpolation='nearest')
im.set_clim([0, 1])
plt.tight_layout(pad=0, w_pad=0, h_pad=0)
# pylint: disable=invalid-name
def update_img(pair):
"""Decode pairs of image strings, update a video."""
im_i, im_j = pair
nparr_i = np.fromstring(str(im_i), np.uint8)
img_np_i = cv2.imdecode(nparr_i, 1)
img_np_i = img_np_i[..., [2, 1, 0]]
nparr_j = np.fromstring(str(im_j), np.uint8)
img_np_j = cv2.imdecode(nparr_j, 1)
img_np_j = img_np_j[..., [2, 1, 0]]
# Optionally reshape the images to be same size.
frame = np.concatenate([img_np_i, img_np_j], axis=1)
im.set_data(frame)
return im
ani = animation.FuncAnimation(fig, update_img, combined, interval=15)
writer = animation.writers['ffmpeg'](fps=15)
dpi = 100
tf.logging.info('Writing video to:\n %s \n' % vid_path)
ani.save('%s.mp4' % vid_path, writer=writer, dpi=dpi)
def GenerateImitationVideo(
vid_name, query_ims, query_embs, target_ims, target_embs, height, width):
"""Generates a single cross-sequence imitation video.
For each frame in some query sequence, find the nearest neighbor from
some target sequence in embedding space.
Args:
vid_name: String, the name of the video.
query_ims: Numpy array of shape [query sequence length, height, width, 3].
query_embs: Numpy array of shape [query sequence length, embedding size].
target_ims: Numpy array of shape [target sequence length, height, width,
3].
target_embs: Numpy array of shape [target sequence length, embedding
size].
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# For each query frame, find the index of the nearest neighbor in the
# target video.
knn_indices = [util.KNNIds(q, target_embs, k=1)[0] for q in query_embs]
# Create and write out the video.
assert knn_indices
knn_ims = np.array([target_ims[k] for k in knn_indices])
MakeImitationVideo(FLAGS.outdir, vid_name, query_ims, knn_ims, height, width)
def SingleImitationVideos(
query_records, target_records, config, height, width):
"""Generates pairwise imitation videos.
This creates all pairs of target imitating query videos, where each frame
on the left is matched to a nearest neighbor coming a single
embedded target video.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# Embed query and target data.
(query_sequences_to_data,
target_sequences_to_data) = EmbedQueryTargetData(
query_records, target_records, config)
qview = FLAGS.query_view
tview = FLAGS.target_view
# Loop over query videos.
for task_i, data_i in query_sequences_to_data.iteritems():
for task_j, data_j in target_sequences_to_data.iteritems():
i_ims = data_i['images']
i_embs = data_i['embeddings']
query_embs = SmoothEmbeddings(i_embs[qview])
query_ims = i_ims[qview]
j_ims = data_j['images']
j_embs = data_j['embeddings']
target_embs = SmoothEmbeddings(j_embs[tview])
target_ims = j_ims[tview]
tf.logging.info('Generating %s imitating %s video.' % (task_j, task_i))
vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_j, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
target_ims, target_embs, height, width)
def MultiImitationVideos(
query_records, target_records, config, height, width):
"""Creates multi-imitation videos.
This creates videos where every frame on the left is matched to a nearest
neighbor coming from a set of multiple embedded target videos.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
height: Int, height of the raw image.
width: Int, width of the raw image.
"""
# Embed query and target data.
(query_sequences_to_data,
target_sequences_to_data) = EmbedQueryTargetData(
query_records, target_records, config)
qview = FLAGS.query_view
tview = FLAGS.target_view
# Loop over query videos.
for task_i, data_i in query_sequences_to_data.iteritems():
i_ims = data_i['images']
i_embs = data_i['embeddings']
query_embs = SmoothEmbeddings(i_embs[qview])
query_ims = i_ims[qview]
all_target_embs = []
all_target_ims = []
# If num_imitation_vids is -1, add all seq embeddings to the target set.
if FLAGS.num_multi_targets == -1:
num_multi_targets = len(target_sequences_to_data)
else:
# Else, add some specified number of seq embeddings to the target set.
num_multi_targets = FLAGS.num_multi_targets
for j in range(num_multi_targets):
task_j = target_sequences_to_data.keys()[j]
data_j = target_sequences_to_data[task_j]
print('Adding %s to target set' % task_j)
j_ims = data_j['images']
j_embs = data_j['embeddings']
target_embs = SmoothEmbeddings(j_embs[tview])
target_ims = j_ims[tview]
all_target_embs.extend(target_embs)
all_target_ims.extend(target_ims)
# Generate a "j imitating i" video.
tf.logging.info('Generating all imitating %s video.' % task_i)
vid_name = 'q%sv%s_multiv%s' % (task_i, qview, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
all_target_ims, all_target_embs, height, width)
def SameSequenceVideos(query_records, config, height, width):
"""Generate same sequence, cross-view imitation videos."""
batch_size = config.data.embed_batch_size
# Choose an estimator based on training strategy.
estimator = get_estimator(config, FLAGS.checkpointdir)
# Choose a checkpoint path to restore.
checkpointdir = FLAGS.checkpointdir
checkpoint_path = os.path.join(checkpointdir,
'model.ckpt-%s' % FLAGS.checkpoint_iter)
# Embed num_sequences query sequences, store embeddings and image strings in
# query_sequences_to_data.
sequences_to_data = {}
for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
query_records, checkpoint_path, batch_size,
num_sequences=FLAGS.num_query_sequences):
sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
# Loop over query videos.
qview = FLAGS.query_view
tview = FLAGS.target_view
for task_i, data_i in sequences_to_data.iteritems():
ims = data_i['images']
embs = data_i['embeddings']
query_embs = SmoothEmbeddings(embs[qview])
query_ims = ims[qview]
target_embs = SmoothEmbeddings(embs[tview])
target_ims = ims[tview]
tf.logging.info('Generating %s imitating %s video.' % (task_i, task_i))
vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_i, tview)
vid_name = vid_name.replace('/', '_')
GenerateImitationVideo(vid_name, query_ims, query_embs,
target_ims, target_embs, height, width)
def EmbedQueryTargetData(query_records, target_records, config):
"""Embeds the full set of query_records and target_records.
Args:
query_records: List of Strings, paths to tfrecord datasets to use as
queries.
target_records: List of Strings, paths to tfrecord datasets to use as
targets.
config: A T object describing training config.
Returns:
query_sequences_to_data: A dict holding 'embeddings' and 'images'
target_sequences_to_data: A dict holding 'embeddings' and 'images'
"""
batch_size = config.data.embed_batch_size
# Choose an estimator based on training strategy.
estimator = get_estimator(config, FLAGS.checkpointdir)
# Choose a checkpoint path to restore.
checkpointdir = FLAGS.checkpointdir
checkpoint_path = os.path.join(checkpointdir,
'model.ckpt-%s' % FLAGS.checkpoint_iter)
# Embed num_sequences query sequences, store embeddings and image strings in
# query_sequences_to_data.
num_query_sequences = FLAGS.num_query_sequences
num_target_sequences = FLAGS.num_target_sequences
query_sequences_to_data = {}
for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
query_records, checkpoint_path, batch_size,
num_sequences=num_query_sequences):
query_sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
if (query_records == target_records) and (
num_query_sequences == num_target_sequences):
target_sequences_to_data = query_sequences_to_data
else:
# Embed num_sequences target sequences, store embeddings and image strings
# in sequences_to_data.
target_sequences_to_data = {}
for (view_embeddings, view_raw_image_strings,
seqname) in estimator.inference(
target_records, checkpoint_path, batch_size,
num_sequences=num_target_sequences):
target_sequences_to_data[seqname] = {
'embeddings': view_embeddings,
'images': view_raw_image_strings,
}
return query_sequences_to_data, target_sequences_to_data
def main(_):
# Parse config dict from yaml config files / command line flags.
config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
# Get tables to embed.
query_records_dir = FLAGS.query_records_dir
query_records = util.GetFilesRecursively(query_records_dir)
target_records_dir = FLAGS.target_records_dir
target_records = util.GetFilesRecursively(target_records_dir)
height = config.data.raw_height
width = config.data.raw_width
mode = FLAGS.mode
if mode == 'multi':
# Generate videos where target set is composed of multiple videos.
MultiImitationVideos(query_records, target_records, config,
height, width)
elif mode == 'single':
# Generate videos where target set is a single video.
SingleImitationVideos(query_records, target_records, config,
height, width)
elif mode == 'same':
# Generate videos where target set is the same as query, but diff view.
SameSequenceVideos(query_records, config, height, width)
else:
raise ValueError('Unknown mode %s' % mode)
if __name__ == '__main__':
tf.app.run()
This diff is collapsed.
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tcn.labeled_eval."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import labeled_eval
import tensorflow as tf
class LabeledEvalTest(tf.test.TestCase):
def testNearestCrossSequenceNeighbors(self):
# Generate embeddings.
num_data = 64
embedding_size = 4
num_tasks = 8
n_neighbors = 2
data = np.random.randn(num_data, embedding_size)
tasks = np.repeat(range(num_tasks), num_data // num_tasks)
# Get nearest cross-sequence indices.
indices = labeled_eval.nearest_cross_sequence_neighbors(
data, tasks, n_neighbors=n_neighbors)
# Assert that no nearest neighbor indices come from the same task.
repeated_tasks = np.tile(np.reshape(tasks, (num_data, 1)), n_neighbors)
self.assertTrue(np.all(np.not_equal(repeated_tasks, tasks[indices])))
def testPerfectCrossSequenceRecall(self):
# Make sure cross-sequence recall@k returns 1.0 for near-duplicate features.
embeddings = np.random.randn(10, 2)
embeddings[5:, :] = 0.00001 + embeddings[:5, :]
tasks = np.repeat([0, 1], 5)
labels = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
# find k=1, k=2 nearest neighbors.
k_list = [1, 2]
# Compute knn indices.
indices = labeled_eval.nearest_cross_sequence_neighbors(
embeddings, tasks, n_neighbors=max(k_list))
retrieved_labels = labels[indices]
recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
retrieved_labels=retrieved_labels,
labels=labels,
k_list=k_list)
self.assertTrue(np.allclose(
np.array(recall_list), np.array([1.0, 1.0])))
def testRelativeRecall(self):
# Make sure cross-sequence recall@k is strictly non-decreasing over k.
num_data = 100
num_tasks = 10
embeddings = np.random.randn(100, 5)
tasks = np.repeat(range(num_tasks), num_data // num_tasks)
labels = np.random.randint(0, 5, 100)
k_list = [1, 2, 4, 8, 16, 32, 64]
indices = labeled_eval.nearest_cross_sequence_neighbors(
embeddings, tasks, n_neighbors=max(k_list))
retrieved_labels = labels[indices]
recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
retrieved_labels=retrieved_labels,
labels=labels,
k_list=k_list)
recall_list_sorted = sorted(recall_list)
self.assertTrue(np.allclose(
np.array(recall_list), np.array(recall_list_sorted)))
if __name__ == "__main__":
tf.test.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A utility class for reporting processing progress."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
class Progress(object):
"""A utility class for reporting processing progress."""
def __init__(self, target_size):
self.target_size = target_size
self.current_size = 0
self.start_time = datetime.datetime.now()
def Update(self, current_size):
"""Replaces internal current_size with current_size."""
self.current_size = current_size
def Add(self, size):
"""Increments internal current_size by size."""
self.current_size += size
def __str__(self):
processed = 1e-5 + self.current_size / float(self.target_size)
current_time = datetime.datetime.now()
elapsed = current_time - self.start_time
eta = datetime.timedelta(
seconds=elapsed.total_seconds() / processed - elapsed.total_seconds())
return "%d / %d (elapsed %s eta %s)" % (
self.current_size, self.target_size,
str(elapsed).split(".")[0],
str(eta).split(".")[0])
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment