Commit 68a18b70 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge pull request #1 from tensorflow/master

update to tensorflow/model master
parents bc70271a 2c4fea8d
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for setting up the CMP graph.
"""
import os, numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
import logging
from src import utils
import src.file_utils as fu
from tfcode import tf_utils
resnet_v2 = tf_utils.resnet_v2
custom_residual_block = tf_utils.custom_residual_block
def value_iteration_network(
fr, num_iters, val_neurons, action_neurons, kernel_size, share_wts=False,
name='vin', wt_decay=0.0001, activation_fn=None, shape_aware=False):
"""
Constructs a Value Iteration Network, convolutions and max pooling across
channels.
Input:
fr: NxWxHxC
val_neurons: Number of channels for maintaining the value.
action_neurons: Computes action_neurons * val_neurons at each iteration to
max pool over.
Output:
value image: NxHxWx(val_neurons)
"""
init_var = np.sqrt(2.0/(kernel_size**2)/(val_neurons*action_neurons))
vals = []
with tf.variable_scope(name) as varscope:
if shape_aware == False:
fr_shape = tf.unstack(tf.shape(fr))
val_shape = tf.stack(fr_shape[:-1] + [val_neurons])
val = tf.zeros(val_shape, name='val_init')
else:
val = tf.expand_dims(tf.zeros_like(fr[:,:,:,0]), dim=-1) * \
tf.constant(0., dtype=tf.float32, shape=[1,1,1,val_neurons])
val_shape = tf.shape(val)
vals.append(val)
for i in range(num_iters):
if share_wts:
# The first Value Iteration maybe special, so it can have its own
# paramterss.
scope = 'conv'
if i == 0: scope = 'conv_0'
if i > 1: varscope.reuse_variables()
else:
scope = 'conv_{:d}'.format(i)
val = slim.conv2d(tf.concat([val, fr], 3, name='concat_{:d}'.format(i)),
num_outputs=action_neurons*val_neurons,
kernel_size=kernel_size, stride=1, activation_fn=activation_fn,
scope=scope, normalizer_fn=None,
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer())
val = tf.reshape(val, [-1, action_neurons*val_neurons, 1, 1],
name='re_{:d}'.format(i))
val = slim.max_pool2d(val, kernel_size=[action_neurons,1],
stride=[action_neurons,1], padding='VALID',
scope='val_{:d}'.format(i))
val = tf.reshape(val, val_shape, name='unre_{:d}'.format(i))
vals.append(val)
return val, vals
def rotate_preds(loc_on_map, relative_theta, map_size, preds,
output_valid_mask):
with tf.name_scope('rotate'):
flow_op = tf_utils.get_flow(loc_on_map, relative_theta, map_size=map_size)
if type(preds) != list:
rotated_preds, valid_mask_warps = tf_utils.dense_resample(preds, flow_op,
output_valid_mask)
else:
rotated_preds = [] ;valid_mask_warps = []
for pred in preds:
rotated_pred, valid_mask_warp = tf_utils.dense_resample(pred, flow_op,
output_valid_mask)
rotated_preds.append(rotated_pred)
valid_mask_warps.append(valid_mask_warp)
return rotated_preds, valid_mask_warps
def get_visual_frustum(map_size, shape_like, expand_dims=[0,0]):
with tf.name_scope('visual_frustum'):
l = np.tril(np.ones(map_size)) ;l = l + l[:,::-1]
l = (l == 2).astype(np.float32)
for e in expand_dims:
l = np.expand_dims(l, axis=e)
confs_probs = tf.constant(l, dtype=tf.float32)
confs_probs = tf.ones_like(shape_like, dtype=tf.float32) * confs_probs
return confs_probs
def deconv(x, is_training, wt_decay, neurons, strides, layers_per_block,
kernel_size, conv_fn, name, offset=0):
"""Generates a up sampling network with residual connections.
"""
batch_norm_param = {'center': True, 'scale': True,
'activation_fn': tf.nn.relu,
'is_training': is_training}
outs = []
for i, (neuron, stride) in enumerate(zip(neurons, strides)):
for s in range(layers_per_block):
scope = '{:s}_{:d}_{:d}'.format(name, i+1+offset,s+1)
x = custom_residual_block(x, neuron, kernel_size, stride, scope,
is_training, wt_decay, use_residual=True,
residual_stride_conv=True, conv_fn=conv_fn,
batch_norm_param=batch_norm_param)
stride = 1
outs.append((x,True))
return x, outs
def fr_v2(x, output_neurons, inside_neurons, is_training, name='fr',
wt_decay=0.0001, stride=1, updates_collections=tf.GraphKeys.UPDATE_OPS):
"""Performs fusion of information between the map and the reward map.
Inputs
x: NxHxWxC1
Outputs
fr map: NxHxWx(output_neurons)
"""
if type(stride) != list:
stride = [stride]
with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(
is_training=is_training, weight_decay=wt_decay)):
with slim.arg_scope([slim.batch_norm], updates_collections=updates_collections) as arg_sc:
# Change the updates_collections for the conv normalizer_params to None
for i in range(len(arg_sc.keys())):
if 'convolution' in arg_sc.keys()[i]:
arg_sc.values()[i]['normalizer_params']['updates_collections'] = updates_collections
with slim.arg_scope(arg_sc):
bottleneck = resnet_v2.bottleneck
blocks = []
for i, s in enumerate(stride):
b = resnet_v2.resnet_utils.Block(
'block{:d}'.format(i + 1), bottleneck, [{
'depth': output_neurons,
'depth_bottleneck': inside_neurons,
'stride': stride[i]
}])
blocks.append(b)
x, outs = resnet_v2.resnet_v2(x, blocks, num_classes=None, global_pool=False,
output_stride=None, include_root_block=False,
reuse=False, scope=name)
return x, outs
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various losses for training navigation agents.
Defines various loss functions for navigation agents,
compute_losses_multi_or.
"""
import os, numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim import arg_scope
from tensorflow.contrib.slim.nets import resnet_v2
from tensorflow.python.training import moving_averages
import logging
from src import utils
import src.file_utils as fu
from tfcode import tf_utils
def compute_losses_multi_or(logits, actions_one_hot, weights=None,
num_actions=-1, data_loss_wt=1., reg_loss_wt=1.,
ewma_decay=0.99, reg_loss_op=None):
assert(num_actions > 0), 'num_actions must be specified and must be > 0.'
with tf.name_scope('loss'):
if weights is None:
weight = tf.ones_like(actions_one_hot, dtype=tf.float32, name='weight')
actions_one_hot = tf.cast(tf.reshape(actions_one_hot, [-1, num_actions],
're_actions_one_hot'), tf.float32)
weights = tf.reduce_sum(tf.reshape(weights, [-1, num_actions], 're_weight'),
reduction_indices=1)
total = tf.reduce_sum(weights)
action_prob = tf.nn.softmax(logits)
action_prob = tf.reduce_sum(tf.multiply(action_prob, actions_one_hot),
reduction_indices=1)
example_loss = -tf.log(tf.maximum(tf.constant(1e-4), action_prob))
data_loss_op = tf.reduce_sum(example_loss * weights) / total
if reg_loss_op is None:
if reg_loss_wt > 0:
reg_loss_op = tf.add_n(tf.losses.get_regularization_losses())
else:
reg_loss_op = tf.constant(0.)
if reg_loss_wt > 0:
total_loss_op = data_loss_wt*data_loss_op + reg_loss_wt*reg_loss_op
else:
total_loss_op = data_loss_wt*data_loss_op
is_correct = tf.cast(tf.greater(action_prob, 0.5, name='pred_class'), tf.float32)
acc_op = tf.reduce_sum(is_correct*weights) / total
ewma_acc_op = moving_averages.weighted_moving_average(
acc_op, ewma_decay, weight=total, name='ewma_acc')
acc_ops = [ewma_acc_op]
return reg_loss_op, data_loss_op, total_loss_op, acc_ops
def get_repr_from_image(images_reshaped, modalities, data_augment, encoder,
freeze_conv, wt_decay, is_training):
# Pass image through lots of convolutional layers, to obtain pool5
if modalities == ['rgb']:
with tf.name_scope('pre_rgb'):
x = (images_reshaped + 128.) / 255. # Convert to brightness between 0 and 1.
if data_augment.relight and is_training:
x = tf_utils.distort_image(x, fast_mode=data_augment.relight_fast)
x = (x-0.5)*2.0
scope_name = encoder
elif modalities == ['depth']:
with tf.name_scope('pre_d'):
d_image = images_reshaped
x = 2*(d_image[...,0] - 80.0)/100.0
y = d_image[...,1]
d_image = tf.concat([tf.expand_dims(x, -1), tf.expand_dims(y, -1)], 3)
x = d_image
scope_name = 'd_'+encoder
resnet_is_training = is_training and (not freeze_conv)
with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(resnet_is_training)):
fn = getattr(tf_utils, encoder)
x, end_points = fn(x, num_classes=None, global_pool=False,
output_stride=None, reuse=None,
scope=scope_name)
vars_ = slim.get_variables_to_restore()
conv_feat = x
return conv_feat, vars_
def default_train_step_kwargs(m, obj, logdir, rng_seed, is_chief, num_steps,
iters, train_display_interval,
dagger_sample_bn_false):
train_step_kwargs = {}
train_step_kwargs['obj'] = obj
train_step_kwargs['m'] = m
# rng_data has 2 independent rngs, one for sampling episodes and one for
# sampling perturbs (so that we can make results reproducible.
train_step_kwargs['rng_data'] = [np.random.RandomState(rng_seed),
np.random.RandomState(rng_seed)]
train_step_kwargs['rng_action'] = np.random.RandomState(rng_seed)
if is_chief:
train_step_kwargs['writer'] = tf.summary.FileWriter(logdir) #, m.tf_graph)
else:
train_step_kwargs['writer'] = None
train_step_kwargs['iters'] = iters
train_step_kwargs['train_display_interval'] = train_display_interval
train_step_kwargs['num_steps'] = num_steps
train_step_kwargs['logdir'] = logdir
train_step_kwargs['dagger_sample_bn_false'] = dagger_sample_bn_false
return train_step_kwargs
# Utilities for visualizing and analysing validation output.
def save_d_at_t(outputs, global_step, output_dir, metric_summary, N):
"""Save distance to goal at all time steps.
Args:
outputs : [gt_dist_to_goal].
global_step : number of iterations.
output_dir : output directory.
metric_summary : to append scalars to summary.
N : number of outputs to process.
"""
d_at_t = np.concatenate(map(lambda x: x[0][:,:,0]*1, outputs), axis=0)
fig, axes = utils.subplot(plt, (1,1), (5,5))
axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.')
axes.set_xlabel('time step')
axes.set_ylabel('dist to next goal')
axes.grid('on')
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True)
plt.close(fig)
return None
def save_all(outputs, global_step, output_dir, metric_summary, N):
"""Save numerous statistics.
Args:
outputs : [locs, goal_loc, gt_dist_to_goal, node_ids, perturbs]
global_step : number of iterations.
output_dir : output directory.
metric_summary : to append scalars to summary.
N : number of outputs to process.
"""
all_locs = np.concatenate(map(lambda x: x[0], outputs), axis=0)
all_goal_locs = np.concatenate(map(lambda x: x[1], outputs), axis=0)
all_d_at_t = np.concatenate(map(lambda x: x[2][:,:,0]*1, outputs), axis=0)
all_node_ids = np.concatenate(map(lambda x: x[3], outputs), axis=0)
all_perturbs = np.concatenate(map(lambda x: x[4], outputs), axis=0)
file_name = os.path.join(output_dir, 'all_locs_at_t_{:d}.pkl'.format(global_step))
vars = [all_locs, all_goal_locs, all_d_at_t, all_node_ids, all_perturbs]
var_names = ['all_locs', 'all_goal_locs', 'all_d_at_t', 'all_node_ids', 'all_perturbs']
utils.save_variables(file_name, vars, var_names, overwrite=True)
return None
def eval_ap(outputs, global_step, output_dir, metric_summary, N, num_classes=4):
"""Processes the collected outputs to compute AP for action prediction.
Args:
outputs : [logits, labels]
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
num_classes : number of classes to compute AP over, and to reshape tensors.
"""
if N >= 0:
outputs = outputs[:N]
logits = np.concatenate(map(lambda x: x[0], outputs), axis=0).reshape((-1, num_classes))
labels = np.concatenate(map(lambda x: x[1], outputs), axis=0).reshape((-1, num_classes))
aps = []
for i in range(logits.shape[1]):
ap, rec, prec = utils.calc_pr(labels[:,i], logits[:,i])
ap = ap[0]
tf_utils.add_value_to_summary(metric_summary, 'aps/ap_{:d}: '.format(i), ap)
aps.append(ap)
return aps
def eval_dist(outputs, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs during validation to
1. Plot the distance over time curve.
2. Compute mean and median distances.
3. Plots histogram of end distances.
Args:
outputs : [locs, goal_loc, gt_dist_to_goal].
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
SUCCESS_THRESH = 3
if N >= 0:
outputs = outputs[:N]
# Plot distance at time t.
d_at_t = []
for i in range(len(outputs)):
locs, goal_loc, gt_dist_to_goal = outputs[i]
d_at_t.append(gt_dist_to_goal[:,:,0]*1)
# Plot the distance.
fig, axes = utils.subplot(plt, (1,1), (5,5))
d_at_t = np.concatenate(d_at_t, axis=0)
axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.')
axes.set_xlabel('time step')
axes.set_ylabel('dist to next goal')
axes.grid('on')
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True)
plt.close(fig)
# Plot the trajectories and the init_distance and final distance.
d_inits = []
d_ends = []
for i in range(len(outputs)):
locs, goal_loc, gt_dist_to_goal = outputs[i]
d_inits.append(gt_dist_to_goal[:,0,0]*1)
d_ends.append(gt_dist_to_goal[:,-1,0]*1)
# Plot the distance.
fig, axes = utils.subplot(plt, (1,1), (5,5))
d_inits = np.concatenate(d_inits, axis=0)
d_ends = np.concatenate(d_ends, axis=0)
axes.plot(d_inits+np.random.rand(*(d_inits.shape))-0.5,
d_ends+np.random.rand(*(d_ends.shape))-0.5, '.', mec='red', mew=1.0)
axes.set_xlabel('init dist'); axes.set_ylabel('final dist');
axes.grid('on'); axes.axis('equal');
title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}'
title_str = title_str.format(
np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75),
100*(np.mean(d_ends <= SUCCESS_THRESH)))
axes.set_title(title_str)
file_name = os.path.join(output_dir, 'dist_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
file_name = os.path.join(output_dir, 'dist_{:d}.pkl'.format(global_step))
utils.save_variables(file_name, [d_inits, d_ends], ['d_inits', 'd_ends'],
overwrite=True)
plt.close(fig)
# Plot the histogram of the end_distance.
with plt.style.context('seaborn-white'):
d_ends_ = np.sort(d_ends)
d_inits_ = np.sort(d_inits)
leg = [];
fig, ax = utils.subplot(plt, (1,1), (5,5))
ax.grid('on')
ax.set_xlabel('Distance from goal'); ax.xaxis.label.set_fontsize(16);
ax.set_ylabel('Fraction of data'); ax.yaxis.label.set_fontsize(16);
ax.plot(d_ends_, np.arange(d_ends_.size)*1./d_ends_.size, 'r')
ax.plot(d_inits_, np.arange(d_inits_.size)*1./d_inits_.size, 'k')
leg.append('Final'); leg.append('Init');
ax.legend(leg, fontsize='x-large');
ax.set_axis_on()
title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}'
title_str = title_str.format(
np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75),
100*(np.mean(d_ends <= SUCCESS_THRESH)))
ax.set_title(title_str)
file_name = os.path.join(output_dir, 'dist_hist_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
# Log distance metrics.
tf_utils.add_value_to_summary(metric_summary, 'dists/success_init: ',
100*(np.mean(d_inits <= SUCCESS_THRESH)))
tf_utils.add_value_to_summary(metric_summary, 'dists/success_end: ',
100*(np.mean(d_ends <= SUCCESS_THRESH)))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (75): ',
np.percentile(d_inits, q=75))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (75): ',
np.percentile(d_ends, q=75))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (median): ',
np.median(d_inits))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (median): ',
np.median(d_ends))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (mean): ',
np.mean(d_inits))
tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (mean): ',
np.mean(d_ends))
return np.median(d_inits), np.median(d_ends), np.mean(d_inits), np.mean(d_ends), \
np.percentile(d_inits, q=75), np.percentile(d_ends, q=75), \
100*(np.mean(d_inits) <= SUCCESS_THRESH), 100*(np.mean(d_ends) <= SUCCESS_THRESH)
def plot_trajectories(outputs, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs during validation to plot the trajectories
in the top view.
Args:
outputs : [locs, orig_maps, goal_loc].
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
if N >= 0:
outputs = outputs[:N]
N = len(outputs)
plt.set_cmap('gray')
fig, axes = utils.subplot(plt, (N, outputs[0][1].shape[0]), (5,5))
axes = axes.ravel()[::-1].tolist()
for i in range(N):
locs, orig_maps, goal_loc = outputs[i]
is_semantic = np.isnan(goal_loc[0,0,1])
for j in range(orig_maps.shape[0]):
ax = axes.pop();
ax.plot(locs[j,0,0], locs[j,0,1], 'ys')
# Plot one by one, so that they come in different colors.
for k in range(goal_loc.shape[1]):
if not is_semantic:
ax.plot(goal_loc[j,k,0], goal_loc[j,k,1], 's')
if False:
ax.plot(locs[j,:,0], locs[j,:,1], 'r.', ms=3)
ax.imshow(orig_maps[j,0,:,:,0], origin='lower')
ax.set_axis_off();
else:
ax.scatter(locs[j,:,0], locs[j,:,1], c=np.arange(locs.shape[1]),
cmap='jet', s=10, lw=0)
ax.imshow(orig_maps[j,0,:,:,0], origin='lower', vmin=-1.0, vmax=2.0)
if not is_semantic:
xymin = np.minimum(np.min(goal_loc[j,:,:], axis=0), np.min(locs[j,:,:], axis=0))
xymax = np.maximum(np.max(goal_loc[j,:,:], axis=0), np.max(locs[j,:,:], axis=0))
else:
xymin = np.min(locs[j,:,:], axis=0)
xymax = np.max(locs[j,:,:], axis=0)
xy1 = (xymax+xymin)/2. - np.maximum(np.max(xymax-xymin), 12)
xy2 = (xymax+xymin)/2. + np.maximum(np.max(xymax-xymin), 12)
ax.set_xlim([xy1[0], xy2[0]])
ax.set_ylim([xy1[1], xy2[1]])
ax.set_axis_off()
file_name = os.path.join(output_dir, 'trajectory_{:d}.png'.format(global_step))
with fu.fopen(file_name, 'w') as f:
fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0)
plt.close(fig)
return None
def add_default_summaries(mode, arop_full_summary_iters, summarize_ops,
summarize_names, to_aggregate, action_prob_op,
input_tensors, scope_name):
assert(mode == 'train' or mode == 'val' or mode == 'test'), \
'add_default_summaries mode is neither train or val or test.'
s_ops = tf_utils.get_default_summary_ops()
if mode == 'train':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
summarize_ops, summarize_names, mode, to_aggregate=False,
scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
elif mode == 'val':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
summarize_ops, summarize_names, mode, to_aggregate=to_aggregate,
scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
elif mode == 'test':
s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries(
[], [], mode, to_aggregate=[], scope_name=scope_name)
s_ops.additional_return_ops += additional_return_ops
s_ops.arop_summary_iters += arop_summary_iters
s_ops.arop_eval_fns += arop_eval_fns
if mode == 'val':
arop = s_ops.additional_return_ops
arop += [[action_prob_op, input_tensors['train']['action']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['orig_maps'],
input_tensors['common']['goal_loc']]]
s_ops.arop_summary_iters += [-1, arop_full_summary_iters,
arop_full_summary_iters]
s_ops.arop_eval_fns += [eval_ap, eval_dist, plot_trajectories]
elif mode == 'test':
arop = s_ops.additional_return_ops
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['gt_dist_to_goal']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['goal_loc'],
input_tensors['step']['gt_dist_to_goal'],
input_tensors['step']['node_ids'],
input_tensors['step']['perturbs']]]
arop += [[input_tensors['step']['loc_on_map'],
input_tensors['common']['orig_maps'],
input_tensors['common']['goal_loc']]]
s_ops.arop_summary_iters += [-1, -1, -1, arop_full_summary_iters]
s_ops.arop_eval_fns += [eval_dist, save_d_at_t, save_all,
plot_trajectories]
return s_ops
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import sys
import tensorflow as tf
import src.utils as utils
import logging
from tensorflow.contrib import slim
from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops
from tensorflow.contrib.slim import arg_scope
from tensorflow.contrib.slim.nets import resnet_v2
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope
sys.path.insert(0, '../slim')
from preprocessing import inception_preprocessing as ip
resnet_v2_50 = resnet_v2.resnet_v2_50
def custom_residual_block(x, neurons, kernel_size, stride, name, is_training,
wt_decay=0.0001, use_residual=True,
residual_stride_conv=True, conv_fn=slim.conv2d,
batch_norm_param=None):
# batch norm x and relu
init_var = np.sqrt(2.0/(kernel_size**2)/neurons)
with arg_scope([conv_fn],
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer()):
if batch_norm_param is None:
batch_norm_param = {'center': True, 'scale': False,
'activation_fn':tf.nn.relu,
'is_training': is_training}
y = slim.batch_norm(x, scope=name+'_bn', **batch_norm_param)
y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, stride=stride,
activation_fn=None, scope=name+'_1',
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param)
y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size,
stride=1, activation_fn=None, scope=name+'_2')
if use_residual:
if stride != 1 or x.get_shape().as_list()[-1] != neurons:
batch_norm_param_ = dict(batch_norm_param)
batch_norm_param_['activation_fn'] = None
x = conv_fn(x, num_outputs=neurons, kernel_size=1,
stride=stride if residual_stride_conv else 1,
activation_fn=None, scope=name+'_0_1x1',
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param_)
if not residual_stride_conv:
x = slim.avg_pool2d(x, 1, stride=stride, scope=name+'_0_avg')
y = tf.add(x, y, name=name+'_add')
return y
def step_gt_prob(step, step_number_op):
# Change samping probability from 1 to -1 at step steps.
with tf.name_scope('step_gt_prob'):
out = tf.cond(tf.less(step_number_op, step),
lambda: tf.constant(1.), lambda: tf.constant(-1.))
return out
def inverse_sigmoid_decay(k, global_step_op):
with tf.name_scope('inverse_sigmoid_decay'):
k = tf.constant(k, dtype=tf.float32)
tmp = k*tf.exp(-tf.cast(global_step_op, tf.float32)/k)
tmp = tmp / (1. + tmp)
return tmp
def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'):
""" Resample reward at particular locations.
Args:
im: ...xHxWxC matrix to sample from.
flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given
by the flow_im.
"""
with tf.name_scope(name):
valid_mask = None
x, y = tf.unstack(flow_im, axis=-1)
x = tf.cast(tf.reshape(x, [-1]), tf.float32)
y = tf.cast(tf.reshape(y, [-1]), tf.float32)
# constants
shape = tf.unstack(tf.shape(im))
channels = shape[-1]
width = shape[-2]
height = shape[-3]
num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32')
zero = tf.constant(0, dtype=tf.int32)
# Round up and down.
x0 = tf.cast(tf.floor(x), 'int32'); x1 = x0 + 1;
y0 = tf.cast(tf.floor(y), 'int32'); y1 = y0 + 1;
if output_valid_mask:
valid_mask = tf.logical_and(
tf.logical_and(tf.less_equal(x, tf.cast(width, tf.float32)-1.), tf.greater_equal(x, 0.)),
tf.logical_and(tf.less_equal(y, tf.cast(height, tf.float32)-1.), tf.greater_equal(y, 0.)))
valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1])
x0 = tf.clip_by_value(x0, zero, width-1)
x1 = tf.clip_by_value(x1, zero, width-1)
y0 = tf.clip_by_value(y0, zero, height-1)
y1 = tf.clip_by_value(y1, zero, height-1)
dim2 = width; dim1 = width * height;
# Create base index
base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1,1])
base = tf.reshape(tf.tile(base, [1, height*width]), shape=[-1])
base_y0 = base + y0 * dim2
base_y1 = base + y1 * dim2
idx_a = base_y0 + x0
idx_b = base_y1 + x0
idx_c = base_y0 + x1
idx_d = base_y1 + x1
# use indices to lookup pixels in the flat image and restore channels dim
sh = tf.stack([tf.constant(-1,dtype=tf.int32), channels])
im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32)
pixel_a = tf.gather(im_flat, idx_a)
pixel_b = tf.gather(im_flat, idx_b)
pixel_c = tf.gather(im_flat, idx_c)
pixel_d = tf.gather(im_flat, idx_d)
# and finally calculate interpolated values
x1_f = tf.to_float(x1)
y1_f = tf.to_float(y1)
wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1)
wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1)
wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1)
wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1)
output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d])
output = tf.reshape(output, shape=tf.shape(im))
return output, valid_mask
def get_flow(t, theta, map_size, name_scope='gen_flow'):
"""
Rotates the map by theta and translates the rotated map by t.
Assume that the robot rotates by an angle theta and then moves forward by
translation t. This function returns the flow field field. For every pixel in
the new image it tells us which pixel in the original image it came from:
NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)).
Assume there is a point p in the original image. Robot rotates by R and moves
forward by t. p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction.
So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function
calculates.
t: ... x 2 (translation for B batches of N motions each).
theta: ... x 1 (rotation for B batches of N motions each).
Output: ... x map_size x map_size x 2
"""
with tf.name_scope(name_scope):
tx, ty = tf.unstack(tf.reshape(t, shape=[-1, 1, 1, 1, 2]), axis=4)
theta = tf.reshape(theta, shape=[-1, 1, 1, 1])
c = tf.constant((map_size-1.)/2., dtype=tf.float32)
x, y = np.meshgrid(np.arange(map_size), np.arange(map_size))
x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x',
shape=[1, map_size, map_size, 1])
y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y',
shape=[1,map_size, map_size, 1])
x = x-(-tx+c)
y = y-(-ty+c)
sin_theta = tf.sin(theta)
cos_theta = tf.cos(theta)
xr = cos_theta*x - sin_theta*y
yr = sin_theta*x + cos_theta*y
xr = xr + c
yr = yr + c
flow = tf.stack([xr, yr], axis=-1)
sh = tf.unstack(tf.shape(t), axis=0)
sh = tf.stack(sh[:-1]+[tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]])
flow = tf.reshape(flow, shape=sh)
return flow
def distort_image(im, fast_mode=False):
# All images in the same batch are transformed the same way, but over
# iterations you see different distortions.
# im should be float with values between 0 and 1.
im_ = tf.reshape(im, shape=(-1,1,3))
im_ = ip.apply_with_random_selector(
im_, lambda x, ordering: ip.distort_color(x, ordering, fast_mode),
num_cases=4)
im_ = tf.reshape(im_, tf.shape(im))
return im_
def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0,
batch_norm_param=None, dropout_ratio=0.0, is_training=None):
if dropout_ratio > 0:
assert(is_training is not None), \
'is_training needs to be defined when trainnig with dropout.'
repr = []
for i, neuron in enumerate(neurons):
init_var = np.sqrt(2.0/neuron)
if batch_norm_param is not None:
x = slim.fully_connected(x, neuron, activation_fn=None,
weights_initializer=tf.random_normal_initializer(stddev=init_var),
weights_regularizer=slim.l2_regularizer(wt_decay),
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param,
biases_initializer=tf.zeros_initializer(),
scope='{:s}_{:d}'.format(name, offset+i))
else:
x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu,
weights_initializer=tf.random_normal_initializer(stddev=init_var),
weights_regularizer=slim.l2_regularizer(wt_decay),
biases_initializer=tf.zeros_initializer(),
scope='{:s}_{:d}'.format(name, offset+i))
if dropout_ratio > 0:
x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training,
scope='{:s}_{:d}'.format('dropout_'+name, offset+i))
repr.append(x)
if num_pred is not None:
init_var = np.sqrt(2.0/num_pred)
x = slim.fully_connected(x, num_pred,
weights_regularizer=slim.l2_regularizer(wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var),
biases_initializer=tf.zeros_initializer(),
activation_fn=None,
scope='{:s}_pred'.format(name))
return x, repr
def concat_state_x_list(f, names):
af = {}
for i, k in enumerate(names):
af[k] = np.concatenate([x[i] for x in f], axis=1)
return af
def concat_state_x(f, names):
af = {}
for k in names:
af[k] = np.concatenate([x[k] for x in f], axis=1)
# af[k] = np.swapaxes(af[k], 0, 1)
return af
def sample_action(rng, action_probs, optimal_action, sample_gt_prob,
type='sample', combine_type='one_or_other'):
optimal_action_ = optimal_action/np.sum(optimal_action+0., 1, keepdims=True)
action_probs_ = action_probs/np.sum(action_probs+0.001, 1, keepdims=True)
batch_size = action_probs_.shape[0]
action = np.zeros((batch_size), dtype=np.int32)
action_sample_wt = np.zeros((batch_size), dtype=np.float32)
if combine_type == 'add':
sample_gt_prob_ = np.minimum(np.maximum(sample_gt_prob, 0.), 1.)
for i in range(batch_size):
if combine_type == 'one_or_other':
sample_gt = rng.rand() < sample_gt_prob
if sample_gt: distr_ = optimal_action_[i,:]*1.
else: distr_ = action_probs_[i,:]*1.
elif combine_type == 'add':
distr_ = optimal_action_[i,:]*sample_gt_prob_ + \
(1.-sample_gt_prob_)*action_probs_[i,:]
distr_ = distr_ / np.sum(distr_)
if type == 'sample':
action[i] = np.argmax(rng.multinomial(1, distr_, size=1))
elif type == 'argmax':
action[i] = np.argmax(distr_)
action_sample_wt[i] = action_probs_[i, action[i]] / distr_[action[i]]
return action, action_sample_wt
def train_step_custom_online_sampling(sess, train_op, global_step,
train_step_kwargs, mode='train'):
m = train_step_kwargs['m']
obj = train_step_kwargs['obj']
rng_data = train_step_kwargs['rng_data']
rng_action = train_step_kwargs['rng_action']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
num_steps = train_step_kwargs['num_steps']
logdir = train_step_kwargs['logdir']
dagger_sample_bn_false = train_step_kwargs['dagger_sample_bn_false']
train_display_interval = train_step_kwargs['train_display_interval']
if 'outputs' not in m.train_ops:
m.train_ops['outputs'] = []
s_ops = m.summary_ops[mode]
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
v_op = [_.value() for _ in v]
v_op_value = sess.run(v_op)
filter = lambda x, y: 'Adam' in x.name
# filter = lambda x, y: np.is_any_nan(y)
ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)]
v = [v[i] for i in ind]
v_op_value = [v_op_value[i] for i in ind]
for i in range(len(v)):
logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.',
v[i].name, np.any(np.isnan(v_op_value[i])),
np.linalg.norm(v_op_value[i]))
tt = utils.Timer()
for i in range(iters):
tt.tic()
# Sample a room.
e = obj.sample_env(rng_data)
# Initialize the agent.
init_env_state = e.reset(rng_data)
# Get and process the common data.
input = e.get_common_data()
input = e.pre_common_data(input)
feed_dict = prepare_feed_dict(m.input_tensors['common'], input)
if dagger_sample_bn_false:
feed_dict[m.train_ops['batch_norm_is_training_op']] = False
common_data = sess.run(m.train_ops['common'], feed_dict=feed_dict)
states = []
state_features = []
state_targets = []
net_state_to_input = []
step_data_cache = []
executed_actions = []
rewards = []
action_sample_wts = []
states.append(init_env_state)
net_state = sess.run(m.train_ops['init_state'], feed_dict=feed_dict)
net_state = dict(zip(m.train_ops['state_names'], net_state))
net_state_to_input.append(net_state)
for j in range(num_steps):
f = e.get_features(states[j], j)
f = e.pre_features(f)
f.update(net_state)
f['step_number'] = np.ones((1,1,1), dtype=np.int32)*j
state_features.append(f)
feed_dict = prepare_feed_dict(m.input_tensors['step'], state_features[-1])
optimal_action = e.get_optimal_action(states[j], j)
for x, v in zip(m.train_ops['common'], common_data):
feed_dict[x] = v
if dagger_sample_bn_false:
feed_dict[m.train_ops['batch_norm_is_training_op']] = False
outs = sess.run([m.train_ops['step'], m.sample_gt_prob_op,
m.train_ops['step_data_cache'],
m.train_ops['updated_state'],
m.train_ops['outputs']], feed_dict=feed_dict)
action_probs = outs[0]
sample_gt_prob = outs[1]
step_data_cache.append(dict(zip(m.train_ops['step_data_cache'], outs[2])))
net_state = outs[3]
if hasattr(e, 'update_state'):
outputs = outs[4]
outputs = dict(zip(m.train_ops['output_names'], outputs))
e.update_state(outputs, j)
state_targets.append(e.get_targets(states[j], j))
if j < num_steps-1:
# Sample from action_probs and optimal action.
action, action_sample_wt = sample_action(
rng_action, action_probs, optimal_action, sample_gt_prob,
m.sample_action_type, m.sample_action_combine_type)
next_state, reward = e.take_action(states[j], action, j)
executed_actions.append(action)
states.append(next_state)
rewards.append(reward)
action_sample_wts.append(action_sample_wt)
net_state = dict(zip(m.train_ops['state_names'], net_state))
net_state_to_input.append(net_state)
# Concatenate things together for training.
rewards = np.array(rewards).T
action_sample_wts = np.array(action_sample_wts).T
executed_actions = np.array(executed_actions).T
all_state_targets = concat_state_x(state_targets, e.get_targets_name())
all_state_features = concat_state_x(state_features,
e.get_features_name()+['step_number'])
# all_state_net = concat_state_x(net_state_to_input,
# m.train_ops['state_names'])
all_step_data_cache = concat_state_x(step_data_cache,
m.train_ops['step_data_cache'])
dict_train = dict(input)
dict_train.update(all_state_features)
dict_train.update(all_state_targets)
# dict_train.update(all_state_net)
dict_train.update(net_state_to_input[0])
dict_train.update(all_step_data_cache)
dict_train.update({'rewards': rewards,
'action_sample_wts': action_sample_wts,
'executed_actions': executed_actions})
feed_dict = prepare_feed_dict(m.input_tensors['train'], dict_train)
for x in m.train_ops['step_data_cache']:
feed_dict[x] = all_step_data_cache[x]
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops],
feed_dict=feed_dict)
logging.error("")
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, s_ops.summary_ops], feed_dict=feed_dict)
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode != 'train':
arop = [[] for j in range(len(s_ops.additional_return_ops))]
for j in range(len(s_ops.additional_return_ops)):
if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]:
arop[j] = s_ops.additional_return_ops[j]
val = sess.run(arop, feed_dict=feed_dict)
val_additional_ops.append(val)
tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters),
type='time')
if mode != 'train':
# Write the default val summaries.
summary, print_summary, np_global_step = sess.run(
[s_ops.summary_ops, s_ops.print_summary_ops, global_step])
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
# write custom validation ops
val_summarys = []
val_additional_ops = zip(*val_additional_ops)
if len(s_ops.arop_eval_fns) > 0:
val_metric_summary = tf.summary.Summary()
for i in range(len(s_ops.arop_eval_fns)):
val_summary = None
if s_ops.arop_eval_fns[i] is not None:
val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i],
np_global_step, logdir,
val_metric_summary,
s_ops.arop_summary_iters[i])
val_summarys.append(val_summary)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
# Return the additional val_ops
total_loss = (val_additional_ops, val_summarys)
should_stop = None
return total_loss, should_stop
def train_step_custom_v2(sess, train_op, global_step, train_step_kwargs,
mode='train'):
m = train_step_kwargs['m']
obj = train_step_kwargs['obj']
rng = train_step_kwargs['rng']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
logdir = train_step_kwargs['logdir']
train_display_interval = train_step_kwargs['train_display_interval']
s_ops = m.summary_ops[mode]
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
v_op = [_.value() for _ in v]
v_op_value = sess.run(v_op)
filter = lambda x, y: 'Adam' in x.name
# filter = lambda x, y: np.is_any_nan(y)
ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)]
v = [v[i] for i in ind]
v_op_value = [v_op_value[i] for i in ind]
for i in range(len(v)):
logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.',
v[i].name, np.any(np.isnan(v_op_value[i])),
np.linalg.norm(v_op_value[i]))
tt = utils.Timer()
for i in range(iters):
tt.tic()
e = obj.sample_env(rng)
rngs = e.gen_rng(rng)
input_data = e.gen_data(*rngs)
input_data = e.pre_data(input_data)
feed_dict = prepare_feed_dict(m.input_tensors, input_data)
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops],
feed_dict=feed_dict)
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, s_ops.summary_ops],
feed_dict=feed_dict)
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode != 'train':
arop = [[] for j in range(len(s_ops.additional_return_ops))]
for j in range(len(s_ops.additional_return_ops)):
if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]:
arop[j] = s_ops.additional_return_ops[j]
val = sess.run(arop, feed_dict=feed_dict)
val_additional_ops.append(val)
tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters),
type='time')
if mode != 'train':
# Write the default val summaries.
summary, print_summary, np_global_step = sess.run(
[s_ops.summary_ops, s_ops.print_summary_ops, global_step])
if writer is not None and summary is not None:
writer.add_summary(summary, np_global_step)
# write custom validation ops
val_summarys = []
val_additional_ops = zip(*val_additional_ops)
if len(s_ops.arop_eval_fns) > 0:
val_metric_summary = tf.summary.Summary()
for i in range(len(s_ops.arop_eval_fns)):
val_summary = None
if s_ops.arop_eval_fns[i] is not None:
val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i],
np_global_step, logdir,
val_metric_summary,
s_ops.arop_summary_iters[i])
val_summarys.append(val_summary)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
# Return the additional val_ops
total_loss = (val_additional_ops, val_summarys)
should_stop = None
return total_loss, should_stop
def train_step_custom(sess, train_op, global_step, train_step_kwargs,
mode='train'):
m = train_step_kwargs['m']
params = train_step_kwargs['params']
rng = train_step_kwargs['rng']
writer = train_step_kwargs['writer']
iters = train_step_kwargs['iters']
gen_rng = train_step_kwargs['gen_rng']
logdir = train_step_kwargs['logdir']
gen_data = train_step_kwargs['gen_data']
pre_data = train_step_kwargs['pre_data']
train_display_interval = train_step_kwargs['train_display_interval']
val_additional_ops = []
# Print all variables here.
if False:
v = tf.get_collection(tf.GraphKeys.VARIABLES)
for _ in v:
val = sess.run(_.value())
logging.info('variable: %30s, is_any_nan: %5s, norm: %f.', _.name,
np.any(np.isnan(val)), np.linalg.norm(val))
for i in range(iters):
rngs = gen_rng(params, rng)
input_data = gen_data(params, *rngs)
input_data = pre_data(params, input_data)
feed_dict = prepare_feed_dict(m.input_tensors, input_data)
if mode == 'train':
n_step = sess.run(global_step)
if np.mod(n_step, train_display_interval) == 0:
total_loss, np_global_step, summary, print_summary = sess.run(
[train_op, global_step, m.summary_op[mode], m.print_summary_op[mode]],
feed_dict=feed_dict)
else:
total_loss, np_global_step, summary = sess.run(
[train_op, global_step, m.summary_op[mode]],
feed_dict=feed_dict)
if writer is not None:
writer.add_summary(summary, np_global_step)
should_stop = sess.run(m.should_stop_op)
if mode == 'val':
val = sess.run(m.agg_update_op[mode] + m.additional_return_op[mode],
feed_dict=feed_dict)
val_additional_ops.append(val[len(m.agg_update_op[mode]):])
if mode == 'val':
summary, print_summary, np_global_step = sess.run(
[m.summary_op[mode], m.print_summary_op[mode], global_step])
if writer is not None:
writer.add_summary(summary, np_global_step)
sess.run([m.agg_reset_op[mode]])
# write custom validation ops
if m.eval_metrics_fn[mode] is not None:
val_metric_summary = m.eval_metrics_fn[mode](val_additional_ops,
np_global_step, logdir)
if writer is not None:
writer.add_summary(val_metric_summary, np_global_step)
total_loss = val_additional_ops
should_stop = None
return total_loss, should_stop
def setup_training(loss_op, initial_learning_rate, steps_per_decay,
learning_rate_decay, momentum, max_steps,
sync=False, adjust_lr_sync=True,
num_workers=1, replica_id=0, vars_to_optimize=None,
clip_gradient_norm=0, typ=None, momentum2=0.999,
adam_eps=1e-8):
if sync and adjust_lr_sync:
initial_learning_rate = initial_learning_rate * num_workers
max_steps = np.int(max_steps / num_workers)
steps_per_decay = np.int(steps_per_decay / num_workers)
global_step_op = slim.get_or_create_global_step()
lr_op = tf.train.exponential_decay(initial_learning_rate,
global_step_op, steps_per_decay, learning_rate_decay, staircase=True)
if typ == 'sgd':
optimizer = tf.train.MomentumOptimizer(lr_op, momentum)
elif typ == 'adam':
optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum,
beta2=momentum2, epsilon=adam_eps)
if sync:
sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer,
replicas_to_aggregate=num_workers,
replica_id=replica_id,
total_num_replicas=num_workers)
train_op = slim.learning.create_train_op(loss_op, sync_optimizer,
variables_to_train=vars_to_optimize,
clip_gradient_norm=clip_gradient_norm)
else:
sync_optimizer = None
train_op = slim.learning.create_train_op(loss_op, optimizer,
variables_to_train=vars_to_optimize,
clip_gradient_norm=clip_gradient_norm)
should_stop_op = tf.greater_equal(global_step_op, max_steps)
return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer
def add_value_to_summary(metric_summary, tag, val, log=True, tag_str=None):
"""Adds a scalar summary to the summary object. Optionally also logs to
logging."""
new_value = metric_summary.value.add();
new_value.tag = tag
new_value.simple_value = val
if log:
if tag_str is None:
tag_str = tag + '%f'
logging.info(tag_str, val)
def add_scalar_summary_op(tensor, name=None,
summary_key='summaries', print_summary_key='print_summaries', prefix=''):
collections = []
op = tf.summary.scalar(name, tensor, collections=collections)
if summary_key != print_summary_key:
tf.add_to_collection(summary_key, op)
op = tf.Print(op, [tensor], ' {:-<25s}: '.format(name) + prefix)
tf.add_to_collection(print_summary_key, op)
return op
def setup_inputs(inputs):
input_tensors = {}
input_shapes = {}
for (name, typ, sz) in inputs:
_ = tf.placeholder(typ, shape=sz, name=name)
input_tensors[name] = _
input_shapes[name] = sz
return input_tensors, input_shapes
def prepare_feed_dict(input_tensors, inputs):
feed_dict = {}
for n in input_tensors.keys():
feed_dict[input_tensors[n]] = inputs[n].astype(input_tensors[n].dtype.as_numpy_dtype)
return feed_dict
def simple_add_summaries(summarize_ops, summarize_names,
summary_key='summaries',
print_summary_key='print_summaries', prefix=''):
for op, name, in zip(summarize_ops, summarize_names):
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_op = tf.summary.merge_all(summary_key)
print_summary_op = tf.summary.merge_all(print_summary_key)
return summary_op, print_summary_op
def add_summary_ops(m, summarize_ops, summarize_names, to_aggregate=None,
summary_key='summaries',
print_summary_key='print_summaries', prefix=''):
if type(to_aggregate) != list:
to_aggregate = [to_aggregate for _ in summarize_ops]
# set up aggregating metrics
if np.any(to_aggregate):
agg_ops = []
for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate):
if to_agg:
# agg_ops.append(slim.metrics.streaming_mean(op, return_reset_op=True))
agg_ops.append(tf.contrib.metrics.streaming_mean(op))
# agg_ops.append(tf.contrib.metrics.streaming_mean(op, return_reset_op=True))
else:
agg_ops.append([None, None, None])
# agg_values_op, agg_update_op, agg_reset_op = zip(*agg_ops)
# agg_update_op = [x for x in agg_update_op if x is not None]
# agg_reset_op = [x for x in agg_reset_op if x is not None]
agg_values_op, agg_update_op = zip(*agg_ops)
agg_update_op = [x for x in agg_update_op if x is not None]
agg_reset_op = [tf.no_op()]
else:
agg_values_op = [None for _ in to_aggregate]
agg_update_op = [tf.no_op()]
agg_reset_op = [tf.no_op()]
for op, name, to_agg, agg_op in zip(summarize_ops, summarize_names, to_aggregate, agg_values_op):
if to_agg:
add_scalar_summary_op(agg_op, name, summary_key, print_summary_key, prefix)
else:
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_op = tf.summary.merge_all(summary_key)
print_summary_op = tf.summary.merge_all(print_summary_key)
return summary_op, print_summary_op, agg_update_op, agg_reset_op
def accum_val_ops(outputs, names, global_step, output_dir, metric_summary, N):
"""Processes the collected outputs to compute AP for action prediction.
Args:
outputs : List of scalar ops to summarize.
names : Name of the scalar ops.
global_step : global_step.
output_dir : where to store results.
metric_summary : summary object to add summaries to.
N : number of outputs to process.
"""
outs = []
if N >= 0:
outputs = outputs[:N]
for i in range(len(outputs[0])):
scalar = np.array(map(lambda x: x[i], outputs))
assert(scalar.ndim == 1)
add_value_to_summary(metric_summary, names[i], np.mean(scalar),
tag_str='{:>27s}: [{:s}]: %f'.format(names[i], ''))
outs.append(np.mean(scalar))
return outs
def get_default_summary_ops():
return utils.Foo(summary_ops=None, print_summary_ops=None,
additional_return_ops=[], arop_summary_iters=[],
arop_eval_fns=[])
def simple_summaries(summarize_ops, summarize_names, mode, to_aggregate=False,
scope_name='summary'):
if type(to_aggregate) != list:
to_aggregate = [to_aggregate for _ in summarize_ops]
summary_key = '{:s}_summaries'.format(mode)
print_summary_key = '{:s}_print_summaries'.format(mode)
prefix=' [{:s}]: '.format(mode)
# Default ops for things that dont need to be aggregated.
if not np.all(to_aggregate):
for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate):
if not to_agg:
add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix)
summary_ops = tf.summary.merge_all(summary_key)
print_summary_ops = tf.summary.merge_all(print_summary_key)
else:
summary_ops = tf.no_op()
print_summary_ops = tf.no_op()
# Default ops for things that dont need to be aggregated.
if np.any(to_aggregate):
additional_return_ops = [[summarize_ops[i]
for i, x in enumerate(to_aggregate )if x]]
arop_summary_iters = [-1]
s_names = ['{:s}/{:s}'.format(scope_name, summarize_names[i])
for i, x in enumerate(to_aggregate) if x]
fn = lambda outputs, global_step, output_dir, metric_summary, N: \
accum_val_ops(outputs, s_names, global_step, output_dir, metric_summary,
N)
arop_eval_fns = [fn]
else:
additional_return_ops = []
arop_summary_iters = []
arop_eval_fns = []
return summary_ops, print_summary_ops, additional_return_ops, \
arop_summary_iters, arop_eval_fns
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import tensorflow as tf
from tensorflow.contrib import slim
import logging
from tensorflow.python.platform import app
from tensorflow.python.platform import flags
from src import utils
import src.file_utils as fu
import tfcode.nav_utils as nu
from tfcode import tf_utils
setup_train_step_kwargs = nu.default_train_step_kwargs
compute_losses_multi_or = nu.compute_losses_multi_or
get_repr_from_image = nu.get_repr_from_image
_save_d_at_t = nu.save_d_at_t
_save_all = nu.save_all
_eval_ap = nu.eval_ap
_eval_dist = nu.eval_dist
_plot_trajectories = nu.plot_trajectories
def lstm_online(cell_fn, num_steps, inputs, state, varscope):
# inputs is B x num_steps x C, C channels.
# state is 2 tuple with B x 1 x C1, B x 1 x C2
# Output state is always B x 1 x C
inputs = tf.unstack(inputs, axis=1, num=num_steps)
state = tf.unstack(state, axis=1, num=1)[0]
outputs = []
if num_steps > 1:
varscope.reuse_variables()
for s in range(num_steps):
output, state = cell_fn(inputs[s], state)
outputs.append(output)
outputs = tf.stack(outputs, axis=1)
state = tf.stack([state], axis=1)
return outputs, state
def _inputs(problem, lstm_states, lstm_state_dims):
# Set up inputs.
with tf.name_scope('inputs'):
n_views = problem.n_views
inputs = []
inputs.append(('orig_maps', tf.float32,
(problem.batch_size, 1, None, None, 1)))
inputs.append(('goal_loc', tf.float32,
(problem.batch_size, problem.num_goals, 2)))
# For initing LSTM.
inputs.append(('rel_goal_loc_at_start', tf.float32,
(problem.batch_size, problem.num_goals,
problem.rel_goal_loc_dim)))
common_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
inputs.append(('imgs', tf.float32, (problem.batch_size, None, n_views,
problem.img_height, problem.img_width,
problem.img_channels)))
# Goal location as a tuple of delta location and delta theta.
inputs.append(('rel_goal_loc', tf.float32, (problem.batch_size, None,
problem.rel_goal_loc_dim)))
if problem.outputs.visit_count:
inputs.append(('visit_count', tf.int32, (problem.batch_size, None, 1)))
inputs.append(('last_visit', tf.int32, (problem.batch_size, None, 1)))
for i, (state, dim) in enumerate(zip(lstm_states, lstm_state_dims)):
inputs.append((state, tf.float32, (problem.batch_size, 1, dim)))
if problem.outputs.egomotion:
inputs.append(('incremental_locs', tf.float32,
(problem.batch_size, None, 2)))
inputs.append(('incremental_thetas', tf.float32,
(problem.batch_size, None, 1)))
inputs.append(('step_number', tf.int32, (1, None, 1)))
inputs.append(('node_ids', tf.int32, (problem.batch_size, None,
problem.node_ids_dim)))
inputs.append(('perturbs', tf.float32, (problem.batch_size, None,
problem.perturbs_dim)))
# For plotting result plots
inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2)))
inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1)))
step_input_data, _ = tf_utils.setup_inputs(inputs)
inputs = []
inputs.append(('executed_actions', tf.int32, (problem.batch_size, None)))
inputs.append(('rewards', tf.float32, (problem.batch_size, None)))
inputs.append(('action_sample_wts', tf.float32, (problem.batch_size, None)))
inputs.append(('action', tf.int32, (problem.batch_size, None,
problem.num_actions)))
train_data, _ = tf_utils.setup_inputs(inputs)
train_data.update(step_input_data)
train_data.update(common_input_data)
return common_input_data, step_input_data, train_data
def _add_summaries(m, summary_mode, arop_full_summary_iters):
summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op,
m.total_loss_op, m.data_loss_op, m.reg_loss_op] + m.acc_ops
summarize_names = ['lr', 'global_step', 'sample_gt_prob_op', 'total_loss',
'data_loss', 'reg_loss'] + \
['acc_{:d}'.format(i) for i in range(len(m.acc_ops))]
to_aggregate = [0, 0, 0, 1, 1, 1] + [1]*len(m.acc_ops)
scope_name = 'summary'
with tf.name_scope(scope_name):
s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters,
summarize_ops, summarize_names,
to_aggregate, m.action_prob_op,
m.input_tensors, scope_name=scope_name)
m.summary_ops = {summary_mode: s_ops}
def visit_count_fc(visit_count, last_visit, embed_neurons, wt_decay, fc_dropout):
with tf.variable_scope('embed_visit_count'):
visit_count = tf.reshape(visit_count, shape=[-1])
last_visit = tf.reshape(last_visit, shape=[-1])
visit_count = tf.clip_by_value(visit_count, clip_value_min=-1,
clip_value_max=15)
last_visit = tf.clip_by_value(last_visit, clip_value_min=-1,
clip_value_max=15)
visit_count = tf.one_hot(visit_count, depth=16, axis=1, dtype=tf.float32,
on_value=10., off_value=0.)
last_visit = tf.one_hot(last_visit, depth=16, axis=1, dtype=tf.float32,
on_value=10., off_value=0.)
f = tf.concat_v2([visit_count, last_visit], 1)
x, _ = tf_utils.fc_network(
f, neurons=embed_neurons, wt_decay=wt_decay, name='visit_count_embed',
offset=0, batch_norm_param=None, dropout_ratio=fc_dropout,
is_training=is_training)
return x
def lstm_setup(name, x, batch_size, is_single_step, lstm_dim, lstm_out,
num_steps, state_input_op):
# returns state_name, state_init_op, updated_state_op, out_op
with tf.name_scope('reshape_'+name):
sh = x.get_shape().as_list()
x = tf.reshape(x, shape=[batch_size, -1, sh[-1]])
with tf.variable_scope(name) as varscope:
cell = tf.contrib.rnn.LSTMCell(
num_units=lstm_dim, forget_bias=1.0, state_is_tuple=False,
num_proj=lstm_out, use_peepholes=True,
initializer=tf.random_uniform_initializer(-0.01, 0.01, seed=0),
cell_clip=None, proj_clip=None)
sh = [batch_size, 1, lstm_dim+lstm_out]
state_init_op = tf.constant(0., dtype=tf.float32, shape=sh)
fn = lambda ns: lstm_online(cell, ns, x, state_input_op, varscope)
out_op, updated_state_op = tf.cond(is_single_step, lambda: fn(1), lambda:
fn(num_steps))
return name, state_init_op, updated_state_op, out_op
def combine_setup(name, combine_type, embed_img, embed_goal, num_img_neuorons=None,
num_goal_neurons=None):
with tf.name_scope(name + '_' + combine_type):
if combine_type == 'add':
# Simple concat features from goal and image
out = embed_img + embed_goal
elif combine_type == 'multiply':
# Multiply things together
re_embed_img = tf.reshape(
embed_img, shape=[-1, num_img_neuorons / num_goal_neurons,
num_goal_neurons])
re_embed_goal = tf.reshape(embed_goal, shape=[-1, num_goal_neurons, 1])
x = tf.matmul(re_embed_img, re_embed_goal, transpose_a=False, transpose_b=False)
out = slim.flatten(x)
elif combine_type == 'none' or combine_type == 'imgonly':
out = embed_img
elif combine_type == 'goalonly':
out = embed_goal
else:
logging.fatal('Undefined combine_type: %s', combine_type)
return out
def preprocess_egomotion(locs, thetas):
with tf.name_scope('pre_ego'):
pre_ego = tf.concat_v2([locs, tf.sin(thetas), tf.cos(thetas)], 2)
sh = pre_ego.get_shape().as_list()
pre_ego = tf.reshape(pre_ego, [-1, sh[-1]])
return pre_ego
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
# Set up the model.
tf.set_random_seed(args.solver.seed)
task_params = args.navtask.task_params
num_steps = task_params.num_steps
num_goals = task_params.num_goals
num_actions = task_params.num_actions
num_actions_ = num_actions
n_views = task_params.n_views
batch_norm_is_training_op = \
tf.placeholder_with_default(batch_norm_is_training, shape=[],
name='batch_norm_is_training_op')
# Setup the inputs
m.input_tensors = {}
lstm_states = []; lstm_state_dims = [];
state_names = []; updated_state_ops = []; init_state_ops = [];
if args.arch.lstm_output:
lstm_states += ['lstm_output']
lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions]
if args.arch.lstm_ego:
lstm_states += ['lstm_ego']
lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out]
lstm_states += ['lstm_img']
lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
elif args.arch.lstm_img:
# An LSTM only on the image
lstm_states += ['lstm_img']
lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
else:
# No LSTMs involved here.
None
m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \
_inputs(task_params, lstm_states, lstm_state_dims)
with tf.name_scope('check_size'):
is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']),
num=6)[1], 1)
images_reshaped = tf.reshape(m.input_tensors['step']['imgs'],
shape=[-1, task_params.img_height, task_params.img_width,
task_params.img_channels], name='re_image')
rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'],
shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc')
x, vars_ = get_repr_from_image(
images_reshaped, task_params.modalities, task_params.data_augment,
args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay,
is_training)
# Reshape into nice things so that these can be accumulated over time steps
# for faster backprop.
sh_before = x.get_shape().as_list()
m.encoder_output = tf.reshape(
x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:])
x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:])
# Add a layer to reduce dimensions for a fc layer.
if args.arch.dim_reduce_neurons > 0:
ks = 1; neurons = args.arch.dim_reduce_neurons;
init_var = np.sqrt(2.0/(ks**2)/neurons)
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.conv_feat = slim.conv2d(
x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce',
weights_regularizer=slim.l2_regularizer(args.solver.wt_decay),
weights_initializer=tf.random_normal_initializer(stddev=init_var))
reshape_conv_feat = slim.flatten(m.conv_feat)
sh = reshape_conv_feat.get_shape().as_list()
m.reshape_conv_feat = tf.reshape(reshape_conv_feat,
shape=[-1, sh[1]*n_views])
# Restore these from a checkpoint.
if args.solver.pretrained_path is not None:
m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
vars_)
else:
m.init_fn = None
# Hit the goal_location with a bunch of fully connected layers, to embed it
# into some space.
with tf.variable_scope('embed_goal'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_goal, _ = tf_utils.fc_network(
rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons,
wt_decay=args.solver.wt_decay, name='goal_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
if args.arch.embed_goal_for_state:
with tf.variable_scope('embed_goal_for_state'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_goal_for_state, _ = tf_utils.fc_network(
m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:],
neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay,
name='goal_embed', offset=0, batch_norm_param=batch_norm_param,
dropout_ratio=args.arch.fc_dropout, is_training=is_training)
# Hit the goal_location with a bunch of fully connected layers, to embed it
# into some space.
with tf.variable_scope('embed_img'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_img, _ = tf_utils.fc_network(
m.reshape_conv_feat, neurons=args.arch.img_embed_neurons,
wt_decay=args.solver.wt_decay, name='img_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
# For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an
# LSTM, combine with image features and accumulate those in an LSTM. Finally
# combine what you get from the image LSTM with the goal to output an action.
if args.arch.lstm_ego:
ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'],
m.input_tensors['step']['incremental_thetas'])
with tf.variable_scope('embed_ego'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
m.embed_ego, _ = tf_utils.fc_network(
ego_reshaped, neurons=args.arch.ego_embed_neurons,
wt_decay=args.solver.wt_decay, name='ego_embed', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
is_training=is_training)
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step,
args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals,
m.input_tensors['step']['lstm_ego'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
# Combine the output with the vision features.
m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego,
m.embed_img, out_op,
args.arch.img_embed_neurons[-1],
args.arch.lstm_ego_out)
# LSTM on these vision features.
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step,
args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
m.input_tensors['step']['lstm_img'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
m.img_for_goal = out_op
num_img_for_goal_neurons = args.arch.lstm_img_out
elif args.arch.lstm_img:
# LSTM on just the image features.
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
'lstm_img', m.embed_img, task_params.batch_size, is_single_step,
args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
m.input_tensors['step']['lstm_img'])
state_names += [state_name]
init_state_ops += [state_init_op]
updated_state_ops += [updated_state_op]
m.img_for_goal = out_op
num_img_for_goal_neurons = args.arch.lstm_img_out
else:
m.img_for_goal = m.embed_img
num_img_for_goal_neurons = args.arch.img_embed_neurons[-1]
if args.arch.use_visit_count:
m.embed_visit_count = visit_count_fc(
m.input_tensors['step']['visit_count'],
m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons,
args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training)
m.embed_goal = m.embed_goal + m.embed_visit_count
m.combined_f = combine_setup('img_goal', args.arch.combine_type,
m.img_for_goal, m.embed_goal,
num_img_for_goal_neurons,
args.arch.goal_embed_neurons[-1])
# LSTM on the combined representation.
if args.arch.lstm_output:
name = 'lstm_output'
# A few fully connected layers here.
with tf.variable_scope('action_pred'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
x, _ = tf_utils.fc_network(
m.combined_f, neurons=args.arch.pred_neurons,
wt_decay=args.solver.wt_decay, name='pred', offset=0,
batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout)
if args.arch.lstm_output_init_state_from_goal:
# Use the goal embedding to initialize the LSTM state.
# UGLY CLUGGY HACK: if this is doing computation for a single time step
# then this will not involve back prop, so we can use the state input from
# the feed dict, otherwise we compute the state representation from the
# goal and feed that in. Necessary for using goal location to generate the
# state representation.
m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1)
state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name],
lambda: m.embed_goal_for_state)
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
name, x, task_params.batch_size, is_single_step,
args.arch.lstm_output_dim,
num_actions_,
num_steps*num_goals, state_op)
init_state_ops += [m.embed_goal_for_state]
else:
state_op = m.input_tensors['step'][name]
state_name, state_init_op, updated_state_op, out_op = lstm_setup(
name, x, task_params.batch_size, is_single_step,
args.arch.lstm_output_dim,
num_actions_, num_steps*num_goals, state_op)
init_state_ops += [state_init_op]
state_names += [state_name]
updated_state_ops += [updated_state_op]
out_op = tf.reshape(out_op, shape=[-1, num_actions_])
if num_actions_ > num_actions:
m.action_logits_op = out_op[:,:num_actions]
m.baseline_op = out_op[:,num_actions:]
else:
m.action_logits_op = out_op
m.baseline_op = None
m.action_prob_op = tf.nn.softmax(m.action_logits_op)
else:
# A few fully connected layers here.
with tf.variable_scope('action_pred'):
batch_norm_param = args.arch.batch_norm_param
batch_norm_param['is_training'] = batch_norm_is_training_op
out_op, _ = tf_utils.fc_network(
m.combined_f, neurons=args.arch.pred_neurons,
wt_decay=args.solver.wt_decay, name='pred', offset=0,
num_pred=num_actions_,
batch_norm_param=batch_norm_param,
dropout_ratio=args.arch.fc_dropout, is_training=is_training)
if num_actions_ > num_actions:
m.action_logits_op = out_op[:,:num_actions]
m.baseline_op = out_op[:,num_actions:]
else:
m.action_logits_op = out_op
m.baseline_op = None
m.action_prob_op = tf.nn.softmax(m.action_logits_op)
m.train_ops = {}
m.train_ops['step'] = m.action_prob_op
m.train_ops['common'] = [m.input_tensors['common']['orig_maps'],
m.input_tensors['common']['goal_loc'],
m.input_tensors['common']['rel_goal_loc_at_start']]
m.train_ops['state_names'] = state_names
m.train_ops['init_state'] = init_state_ops
m.train_ops['updated_state'] = updated_state_ops
m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op
# Flat list of ops which cache the step data.
m.train_ops['step_data_cache'] = [tf.no_op()]
if args.solver.freeze_conv:
m.train_ops['step_data_cache'] = [m.encoder_output]
else:
m.train_ops['step_data_cache'] = []
ewma_decay = 0.99 if is_training else 0.0
weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32,
name='weight')
m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
compute_losses_multi_or(
m.action_logits_op, m.input_tensors['train']['action'],
weights=weight, num_actions=num_actions,
data_loss_wt=args.solver.data_loss_wt,
reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay)
if args.solver.freeze_conv:
vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_))
else:
vars_to_optimize = None
m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
m.sync_optimizer = tf_utils.setup_training(
m.total_loss_op,
args.solver.initial_learning_rate,
args.solver.steps_per_decay,
args.solver.learning_rate_decay,
args.solver.momentum,
args.solver.max_steps,
args.solver.sync,
args.solver.adjust_lr_sync,
args.solver.num_workers,
args.solver.task,
vars_to_optimize=vars_to_optimize,
clip_gradient_norm=args.solver.clip_gradient_norm,
typ=args.solver.typ, momentum2=args.solver.momentum2,
adam_eps=args.solver.adam_eps)
if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k,
m.global_step_op)
elif args.arch.sample_gt_prob_type == 'zero':
m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)
elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
step = int(args.arch.sample_gt_prob_type.split('_')[1])
m.sample_gt_prob_op = tf_utils.step_gt_prob(
step, m.input_tensors['step']['step_number'][0,0,0])
m.sample_action_type = args.arch.action_sample_type
m.sample_action_combine_type = args.arch.action_sample_combine_type
_add_summaries(m, summary_mode, args.summary.arop_full_summary_iters)
m.init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
write_version=tf.train.SaverDef.V2)
return m
# Image Compression with Neural Networks
# Compression with Neural Networks
This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and
decompressing images using an already trained Residual GRU model as descibed
in [Full Resolution Image Compression with Recurrent Neural Networks]
(https://arxiv.org/abs/1608.05148). Please consult the paper for more details
on the architecture and compression results.
This is a [TensorFlow](http://www.tensorflow.org/) model repo containing
research on compression with neural networks. This repo currently contains
code for the following papers:
This code will allow you to perform the lossy compression on an model
already trained on compression. This code doesn't not currently contain the
Entropy Coding portions of our paper.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed. You will also need to [download]
(http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz)
and extract the model residual_gru.pb.
If you want to generate the perceptual similarity under MS-SSIM, you will also
need to [Install SciPy](https://www.scipy.org/install.html).
## Encoding
The Residual GRU network is fully convolutional, but requires the images
height and width in pixels by a multiple of 32. There is an image in this folder
called example.png that is 768x1024 if one is needed for testing. We also
rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at
time of release.
To encode an image, simply run the following command:
`python encoder.py --input_image=/your/image/here.png
--output_codes=output_codes.npz --iteration=15
--model=/path/to/model/residual_gru.pb
`
The iteration parameter specifies the lossy-quality to target for compression.
The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per
pixel) bpp and every increment results in an additional 1/8 bpp.
| Iteration | BPP | Compression Ratio |
|---: |---: |---: |
|0 | 0.125 | 192:1|
|1 | 0.250 | 96:1|
|2 | 0.375 | 64:1|
|3 | 0.500 | 48:1|
|4 | 0.625 | 38.4:1|
|5 | 0.750 | 32:1|
|6 | 0.875 | 27.4:1|
|7 | 1.000 | 24:1|
|8 | 1.125 | 21.3:1|
|9 | 1.250 | 19.2:1|
|10 | 1.375 | 17.4:1|
|11 | 1.500 | 16:1|
|12 | 1.625 | 14.7:1|
|13 | 1.750 | 13.7:1|
|14 | 1.875 | 12.8:1|
|15 | 2.000 | 12:1|
The output_codes file contains the numpy shape and a flattened, bit-packed
array of the codes. These can be inspected in python by using numpy.load().
## Decoding
After generating codes for an image, the lossy reconstructions for that image
can be done as follows:
`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/
--model=residual_gru.pb`
The output_directory will contain images decoded at each quality level.
## Comparing Similarity
One of our primary metrics for comparing how similar two images are
is MS-SSIM.
To generate these metrics on your images you can run:
`python msssim.py --original_image=/path/to/your/image.png
--compared_image=/tmp/decoded/image_15.png`
## Results
CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can
are available for reference. Each row of the CSV represents each of the Kodak
images in their dataset number (1-24). Each column of the CSV represents each
iteration of the model (1-16).
[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv)
[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv)
## FAQ
#### How do I train my own compression network?
We currently don't provide the code to build and train a compression
graph from scratch.
#### I get an InvalidArgumentError: Incompatible shapes.
This is usually due to the fact that our network only supports images that are
both height and width divisible by 32 pixel. Try padding your images to 32
pixel boundaries.
[Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148)
## Organization
[Image Encoder](image_encoder/): Encoding and decoding images into their binary representation.
[Entropy Coder](entropy_coder/): Lossless compression of the binary representation.
## Contact Info
Model repository maintained by Nick Johnston ([nickj-google](https://github.com/nickj-google)).
# Neural net based entropy coding
This is a [TensorFlow](http://www.tensorflow.org/) model for additional
lossless compression of bitstreams generated by neural net based image
encoders as described in
[https://arxiv.org/abs/1703.10114](https://arxiv.org/abs/1703.10114).
To be more specific, the entropy coder aims at compressing further binary
codes which have a 3D tensor structure with:
* the first two dimensions of the tensors corresponding to the height and
the width of the binary codes,
* the last dimension being the depth of the codes. The last dimension can be
sliced into N groups of K, where each additional group is used by the image
decoder to add more details to the reconstructed image.
The code in this directory only contains the underlying code probability model
but does not perform the actual compression using arithmetic coding.
The code probability model is enough to compute the theoretical compression
ratio.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed.
You will also need to add the top level source directory of the entropy coder
to your `PYTHONPATH`, for example:
`export PYTHONPATH=${PYTHONPATH}:/tmp/models/compression`
## Training the entropy coder
### Synthetic dataset
If you do not have a training dataset, there is a simple code generative model
that you can use to generate a dataset and play with the entropy coder.
The generative model is located under dataset/gen\_synthetic\_dataset.py. Note
that this simple generative model is not going to give good results on real
images as it is not supposed to be close to the statistics of the binary
representation of encoded images. Consider it as a toy dataset, no more, no
less.
To generate a synthetic dataset with 20000 samples:
`mkdir -p /tmp/dataset`
`python ./dataset/gen_synthetic_dataset.py --dataset_dir=/tmp/dataset/
--count=20000`
Note that the generator has not been optimized at all, generating the synthetic
dataset is currently pretty slow.
### Training
If you just want to play with the entropy coder trainer, here is the command
line that can be used to train the entropy coder on the synthetic dataset:
`mkdir -p /tmp/entropy_coder_train`
`python ./core/entropy_coder_train.py --task=0
--train_dir=/tmp/entropy_coder_train/
--model=progressive
--model_config=./configs/synthetic/model_config.json
--train_config=./configs/synthetic/train_config.json
--input_config=./configs/synthetic/input_config.json
`
Training is configured using 3 files formatted using JSON:
* One file is used to configure the underlying entropy coder model.
Currently, only the *progressive* model is supported.
This model takes 2 mandatory parameters and an optional one:
* `layer_depth`: the number of bits per layer (a.k.a. iteration).
Background: the image decoder takes each layer to add more detail
to the image.
* `layer_count`: the maximum number of layers that should be supported
by the model. This should be equal or greater than the maximum number
of layers in the input binary codes.
* `coded_layer_count`: This can be used to consider only partial codes,
keeping only the first `coded_layer_count` layers and ignoring the
remaining layers. If left empty, the binary codes are left unchanged.
* One file to configure the training, including the learning rate, ...
The meaning of the parameters are pretty straightforward. Note that this
file is only used during training and is not needed during inference.
* One file to specify the input dataset to use during training.
The dataset is formatted using tf.RecordIO.
## Inference: file size after entropy coding.
### Using a synthetic sample
Here is the command line to generate a single synthetic sample formatted
in the same way as what is provided by the image encoder:
`python ./dataset/gen_synthetic_single.py
--sample_filename=/tmp/dataset/sample_0000.npz`
To actually compute the additional compression ratio using the entropy coder
trained in the previous step:
`python ./core/entropy_coder_single.py
--model=progressive
--model_config=./configs/synthetic/model_config.json
--input_codes=/tmp/dataset/sample_0000.npz
--checkpoint=/tmp/entropy_coder_train/model.ckpt-209078`
where the checkpoint number should be adjusted accordingly.
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import rnn_cell python ops for backward compatibility."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""Import and register all the entropy coder models."""
raise ImportError("This module is deprecated. Use tf.contrib.rnn instead.")
# pylint: disable=unused-import
from entropy_coder.progressive import progressive
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Basic test of all registered models."""
import tensorflow as tf
# pylint: disable=unused-import
import all_models
# pylint: enable=unused-import
from entropy_coder.model import model_factory
class AllModelsTest(tf.test.TestCase):
def testBuildModelForTraining(self):
factory = model_factory.GetModelRegistry()
model_names = factory.GetAvailableModels()
for m in model_names:
tf.reset_default_graph()
global_step = tf.Variable(tf.zeros([], dtype=tf.int64),
trainable=False,
name='global_step')
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
batch_size = 3
height = 40
width = 20
depth = 5
binary_codes = tf.placeholder(dtype=tf.float32,
shape=[batch_size, height, width, depth])
# Create a model with the default configuration.
print('Creating model: {}'.format(m))
model = factory.CreateModel(m)
model.Initialize(global_step,
optimizer,
model.GetConfigStringForUnitTest())
self.assertTrue(model.loss is None, 'model: {}'.format(m))
self.assertTrue(model.train_op is None, 'model: {}'.format(m))
self.assertTrue(model.average_code_length is None, 'model: {}'.format(m))
# Build the Tensorflow graph corresponding to the model.
model.BuildGraph(binary_codes)
self.assertTrue(model.loss is not None, 'model: {}'.format(m))
self.assertTrue(model.average_code_length is not None,
'model: {}'.format(m))
if model.train_op is None:
print('Model {} is not trainable'.format(m))
if __name__ == '__main__':
tf.test.main()
{
"data": "/tmp/dataset/synthetic_dataset",
"unique_code_size": true
}
{
"batch_size": 4,
"learning_rate": 0.1,
"decay_rate": 0.9,
"samples_per_decay": 20000
}
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Load binary codes stored as tf.Example in a TFRecord table."""
import tensorflow as tf
def ReadFirstCode(dataset):
"""Read the first example from a binary code RecordIO table."""
for record in tf.python_io.tf_record_iterator(dataset):
tf_example = tf.train.Example()
tf_example.ParseFromString(record)
break
return tf_example
def LoadBinaryCode(input_config, batch_size):
"""Load a batch of binary codes from a tf.Example dataset.
Args:
input_config: An InputConfig proto containing the input configuration.
batch_size: Output batch size of examples.
Returns:
A batched tensor of binary codes.
"""
data = input_config.data
# TODO: Possibly use multiple files (instead of just one).
file_list = [data]
filename_queue = tf.train.string_input_producer(file_list,
capacity=4)
reader = tf.TFRecordReader()
_, values = reader.read(filename_queue)
serialized_example = tf.reshape(values, shape=[1])
serialized_features = {
'code_shape': tf.FixedLenFeature([3],
dtype=tf.int64),
'code': tf.VarLenFeature(tf.float32),
}
example = tf.parse_example(serialized_example, serialized_features)
# 3D shape: height x width x binary_code_depth
z = example['code_shape']
code_shape = tf.reshape(tf.cast(z, tf.int32), [3])
# Un-flatten the binary codes.
code = tf.reshape(tf.sparse_tensor_to_dense(example['code']), code_shape)
queue_size = 10
queue = tf.PaddingFIFOQueue(
queue_size + 3 * batch_size,
dtypes=[code.dtype],
shapes=[[None, None, None]])
enqueue_op = queue.enqueue([code])
dequeue_code = queue.dequeue_many(batch_size)
queue_runner = tf.train.queue_runner.QueueRunner(queue, [enqueue_op])
tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, queue_runner)
return dequeue_code
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions used in both train and inference."""
import json
import os.path
import tensorflow as tf
def GetConfigString(config_file):
config_string = ''
if config_file is not None:
config_string = open(config_file).read()
return config_string
class InputConfig(object):
def __init__(self, config_string):
config = json.loads(config_string)
self.data = config["data"]
self.unique_code_size = config["unique_code_size"]
class TrainConfig(object):
def __init__(self, config_string):
config = json.loads(config_string)
self.batch_size = config["batch_size"]
self.learning_rate = config["learning_rate"]
self.decay_rate = config["decay_rate"]
self.samples_per_decay = config["samples_per_decay"]
def SaveConfig(directory, filename, config_string):
path = os.path.join(directory, filename)
with tf.gfile.Open(path, mode='w') as f:
f.write(config_string)
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Compute the additional compression ratio after entropy coding."""
import io
import os
import numpy as np
import tensorflow as tf
import config_helper
# pylint: disable=unused-import
from entropy_coder.all_models import all_models
# pylint: enable=unused-import
from entropy_coder.model import model_factory
# Checkpoint used to restore the model parameters.
tf.app.flags.DEFINE_string('checkpoint', None,
"""Model checkpoint.""")
# Model selection and configuration.
tf.app.flags.DEFINE_string('model', None, """Underlying encoder model.""")
tf.app.flags.DEFINE_string('model_config', None,
"""Model config protobuf given as text file.""")
# File holding the binary codes.
tf.flags.DEFINE_string('input_codes', None, 'Location of binary code file.')
FLAGS = tf.flags.FLAGS
def main(_):
if (FLAGS.input_codes is None or FLAGS.model is None):
print ('\nUsage: python entropy_coder_single.py --model=progressive '
'--model_config=model_config.json'
'--iteration=15\n\n')
return
#if FLAGS.iteration < -1 or FLAGS.iteration > 15:
# print ('\n--iteration must be between 0 and 15 inclusive, or -1 to infer '
# 'from file.\n')
# return
#iteration = FLAGS.iteration
if not tf.gfile.Exists(FLAGS.input_codes):
print '\nInput codes not found.\n'
return
with tf.gfile.FastGFile(FLAGS.input_codes, 'rb') as code_file:
contents = code_file.read()
loaded_codes = np.load(io.BytesIO(contents))
assert ['codes', 'shape'] not in loaded_codes.files
loaded_shape = loaded_codes['shape']
loaded_array = loaded_codes['codes']
# Unpack and recover code shapes.
unpacked_codes = np.reshape(np.unpackbits(loaded_array)
[:np.prod(loaded_shape)],
loaded_shape)
numpy_int_codes = unpacked_codes.transpose([1, 2, 3, 0, 4])
numpy_int_codes = numpy_int_codes.reshape([numpy_int_codes.shape[0],
numpy_int_codes.shape[1],
numpy_int_codes.shape[2],
-1])
numpy_codes = numpy_int_codes.astype(np.float32) * 2.0 - 1.0
with tf.Graph().as_default() as graph:
# TF tensor to hold the binary codes to losslessly compress.
batch_size = 1
codes = tf.placeholder(tf.float32, shape=numpy_codes.shape)
# Create the entropy coder model.
global_step = None
optimizer = None
model = model_factory.GetModelRegistry().CreateModel(FLAGS.model)
model_config_string = config_helper.GetConfigString(FLAGS.model_config)
model.Initialize(global_step, optimizer, model_config_string)
model.BuildGraph(codes)
saver = tf.train.Saver(sharded=True, keep_checkpoint_every_n_hours=12.0)
with tf.Session(graph=graph) as sess:
# Initialize local variables.
sess.run(tf.local_variables_initializer())
# Restore model variables.
saver.restore(sess, FLAGS.checkpoint)
tf_tensors = {
'code_length': model.average_code_length
}
feed_dict = {codes: numpy_codes}
np_tensors = sess.run(tf_tensors, feed_dict=feed_dict)
print('Additional compression ratio: {}'.format(
np_tensors['code_length']))
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train an entropy coder model."""
import time
import tensorflow as tf
import code_loader
import config_helper
# pylint: disable=unused-import
from entropy_coder.all_models import all_models
# pylint: enable=unused-import
from entropy_coder.model import model_factory
FLAGS = tf.app.flags.FLAGS
# Hardware resources configuration.
tf.app.flags.DEFINE_string('master', '',
"""Name of the TensorFlow master to use.""")
tf.app.flags.DEFINE_string('train_dir', None,
"""Directory where to write event logs.""")
tf.app.flags.DEFINE_integer('task', None,
"""Task id of the replica running the training.""")
tf.app.flags.DEFINE_integer('ps_tasks', 0, """Number of tasks in the ps job.
If 0 no ps job is used.""")
# Model selection and configuration.
tf.app.flags.DEFINE_string('model', None, """Underlying encoder model.""")
tf.app.flags.DEFINE_string('model_config', None,
"""Model config protobuf given as text file.""")
# Training data and parameters configuration.
tf.app.flags.DEFINE_string('input_config', None,
"""Path to the training input config file.""")
tf.app.flags.DEFINE_string('train_config', None,
"""Path to the training experiment config file.""")
def train():
if FLAGS.train_dir is None:
raise ValueError('Parameter train_dir must be provided')
if FLAGS.task is None:
raise ValueError('Parameter task must be provided')
if FLAGS.model is None:
raise ValueError('Parameter model must be provided')
input_config_string = config_helper.GetConfigString(FLAGS.input_config)
input_config = config_helper.InputConfig(input_config_string)
# Training parameters.
train_config_string = config_helper.GetConfigString(FLAGS.train_config)
train_config = config_helper.TrainConfig(train_config_string)
batch_size = train_config.batch_size
initial_learning_rate = train_config.learning_rate
decay_rate = train_config.decay_rate
samples_per_decay = train_config.samples_per_decay
# Parameters for learning-rate decay.
# The formula is decay_rate ** floor(steps / decay_steps).
decay_steps = samples_per_decay / batch_size
decay_steps = max(decay_steps, 1)
first_code = code_loader.ReadFirstCode(input_config.data)
first_code_height = (
first_code.features.feature['code_shape'].int64_list.value[0])
first_code_width = (
first_code.features.feature['code_shape'].int64_list.value[1])
max_bit_depth = (
first_code.features.feature['code_shape'].int64_list.value[2])
print('Maximum code depth: {}'.format(max_bit_depth))
with tf.Graph().as_default():
ps_ops = ["Variable", "VariableV2", "AutoReloadVariable", "VarHandleOp"]
with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks,
ps_ops=ps_ops)):
codes = code_loader.LoadBinaryCode(
input_config=input_config,
batch_size=batch_size)
if input_config.unique_code_size:
print('Input code size: {} x {}'.format(first_code_height,
first_code_width))
codes.set_shape(
[batch_size, first_code_height, first_code_width, max_bit_depth])
else:
codes.set_shape([batch_size, None, None, max_bit_depth])
codes_effective_shape = tf.shape(codes)
global_step = tf.contrib.framework.create_global_step()
# Apply learning-rate decay.
learning_rate = tf.train.exponential_decay(
learning_rate=initial_learning_rate,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=decay_rate,
staircase=True)
tf.summary.scalar('Learning Rate', learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
epsilon=1.0)
# Create the entropy coder model.
model = model_factory.GetModelRegistry().CreateModel(FLAGS.model)
model_config_string = config_helper.GetConfigString(FLAGS.model_config)
model.Initialize(global_step, optimizer, model_config_string)
model.BuildGraph(codes)
summary_op = tf.summary.merge_all()
# Verify that the model can actually be trained.
if model.train_op is None:
raise ValueError('Input model {} is not trainable'.format(FLAGS.model))
# We disable the summary thread run by Supervisor class by passing
# summary_op=None. We still pass save_summaries_secs because it is used by
# the global step counter thread.
is_chief = (FLAGS.task == 0)
sv = tf.train.Supervisor(logdir=FLAGS.train_dir,
is_chief=is_chief,
global_step=global_step,
# saver=model.saver,
summary_op=None,
save_summaries_secs=120,
save_model_secs=600,
recovery_wait_secs=30)
sess = sv.PrepareSession(FLAGS.master)
sv.StartQueueRunners(sess)
step = sess.run(global_step)
print('Trainer initial step: {}.'.format(step))
# Once everything has been setup properly, save the configs.
if is_chief:
config_helper.SaveConfig(FLAGS.train_dir, 'input_config.json',
input_config_string)
config_helper.SaveConfig(FLAGS.train_dir, 'model_config.json',
model_config_string)
config_helper.SaveConfig(FLAGS.train_dir, 'train_config.json',
train_config_string)
# Train the model.
next_summary_time = time.time()
while not sv.ShouldStop():
feed_dict = None
# Once in a while, update the summaries on the chief worker.
if is_chief and next_summary_time < time.time():
summary_str = sess.run(summary_op, feed_dict=feed_dict)
sv.SummaryComputed(sess, summary_str)
next_summary_time = time.time() + sv.save_summaries_secs
else:
tf_tensors = {
'train': model.train_op,
'code_length': model.average_code_length
}
np_tensors = sess.run(tf_tensors, feed_dict=feed_dict)
print np_tensors['code_length']
sv.Stop()
def main(argv=None): # pylint: disable=unused-argument
train()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generate a synthetic dataset."""
import os
import numpy as np
import tensorflow as tf
import synthetic_model
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'dataset_dir', None,
"""Directory where to write the dataset and the configs.""")
tf.app.flags.DEFINE_integer(
'count', 1000,
"""Number of samples to generate.""")
def int64_feature(values):
"""Returns a TF-Feature of int64s.
Args:
values: A scalar or list of values.
Returns:
A TF-Feature.
"""
if not isinstance(values, (tuple, list)):
values = [values]
return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
def float_feature(values):
"""Returns a TF-Feature of floats.
Args:
values: A scalar of list of values.
Returns:
A TF-Feature.
"""
if not isinstance(values, (tuple, list)):
values = [values]
return tf.train.Feature(float_list=tf.train.FloatList(value=values))
def AddToTFRecord(code, tfrecord_writer):
example = tf.train.Example(features=tf.train.Features(feature={
'code_shape': int64_feature(code.shape),
'code': float_feature(code.flatten().tolist()),
}))
tfrecord_writer.write(example.SerializeToString())
def GenerateDataset(filename, count, code_shape):
with tf.python_io.TFRecordWriter(filename) as tfrecord_writer:
for _ in xrange(count):
code = synthetic_model.GenerateSingleCode(code_shape)
# Convert {0,1} codes to {-1,+1} codes.
code = 2.0 * code - 1.0
AddToTFRecord(code, tfrecord_writer)
def main(argv=None): # pylint: disable=unused-argument
GenerateDataset(os.path.join(FLAGS.dataset_dir + '/synthetic_dataset'),
FLAGS.count,
[35, 48, 8])
if __name__ == '__main__':
tf.app.run()
# Copyright 2016 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generate a single synthetic sample."""
import io
import os
import numpy as np
import tensorflow as tf
import synthetic_model
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'sample_filename', None,
"""Output file to store the generated binary code.""")
def GenerateSample(filename, code_shape, layer_depth):
# {0, +1} binary codes.
# No conversion since the output file is expected to store
# codes using {0, +1} codes (and not {-1, +1}).
code = synthetic_model.GenerateSingleCode(code_shape)
code = np.round(code)
# Reformat the code so as to be compatible with what is generated
# by the image encoder.
# The image encoder generates a tensor of size:
# iteration_count x batch_size x height x width x iteration_depth.
# Here: batch_size = 1
if code_shape[-1] % layer_depth != 0:
raise ValueError('Number of layers is not an integer')
height = code_shape[0]
width = code_shape[1]
code = code.reshape([1, height, width, -1, layer_depth])
code = np.transpose(code, [3, 0, 1, 2, 4])
int_codes = code.astype(np.int8)
exported_codes = np.packbits(int_codes.reshape(-1))
output = io.BytesIO()
np.savez_compressed(output, shape=int_codes.shape, codes=exported_codes)
with tf.gfile.FastGFile(filename, 'wb') as code_file:
code_file.write(output.getvalue())
def main(argv=None): # pylint: disable=unused-argument
# Note: the height and the width is different from the training dataset.
# The main purpose is to show that the entropy coder model is fully
# convolutional and can be used on any image size.
layer_depth = 2
GenerateSample(FLAGS.sample_filename, [31, 36, 8], layer_depth)
if __name__ == '__main__':
tf.app.run()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment