Unverified Commit 80178fc6 authored by Mark Omernick's avatar Mark Omernick Committed by GitHub
Browse files

Merge pull request #4153 from terryykoo/master

Export @195097388.
parents a84e1ef9 edea2b67
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TensorFlow ops for maximum spanning tree problems."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import dragnn.python.load_mst_cc_impl
from dragnn.mst.ops import gen_mst_ops
from dragnn.python import digraph_ops
from syntaxnet.util import check
# Re-export the generated MST op.
maximum_spanning_tree = gen_mst_ops.maximum_spanning_tree
@tf.RegisterGradient("MaximumSpanningTree")
def maximum_spanning_tree_gradient(mst_op, d_loss_d_max_scores, *_):
"""Returns a subgradient of the MaximumSpanningTree op.
Note that MaximumSpanningTree is only differentiable w.r.t. its |scores| input
and its |max_scores| output.
Args:
mst_op: The MaximumSpanningTree op being differentiated.
d_loss_d_max_scores: [B] vector where entry b is the gradient of the network
loss w.r.t. entry b of the |max_scores| output of the
|mst_op|.
*_: The gradients w.r.t. the other outputs; ignored.
Returns:
1. None, since the op is not differentiable w.r.t. its |num_nodes| input.
2. [B,M,M] tensor where entry b,t,s is a subgradient of the network loss
w.r.t. entry b,t,s of the |scores| input, with the same dtype as
|d_loss_d_max_scores|.
"""
dtype = d_loss_d_max_scores.dtype.base_dtype
check.NotNone(dtype)
argmax_sources_bxm = mst_op.outputs[1]
input_dim = tf.shape(argmax_sources_bxm)[1] # M in the docstring
# The one-hot argmax is a subgradient of max. Convert the batch of maximal
# spanning trees into 0/1 indicators, then scale them by the relevant output
# gradients from |d_loss_d_max_scores|. Note that |d_loss_d_max_scores| must
# be reshaped in order for it to broadcast across the batch dimension.
indicators_bxmxm = tf.one_hot(argmax_sources_bxm, input_dim, dtype=dtype)
d_loss_d_max_scores_bx1 = tf.expand_dims(d_loss_d_max_scores, -1)
d_loss_d_max_scores_bx1x1 = tf.expand_dims(d_loss_d_max_scores_bx1, -1)
d_loss_d_scores_bxmxm = indicators_bxmxm * d_loss_d_max_scores_bx1x1
return None, d_loss_d_scores_bxmxm
def log_partition_function(num_nodes,
scores,
forest=False,
max_dynamic_range=None):
r"""Returns the log of the sum-of-product of spanning trees or forests.
Computing the sum-of-product in the log domain reduces the chance of overflow
or underflow, and ML techniques (e.g., CRF loss functions) typically require
the log partition function anyways. For similar reasons, the scores input is
assumed to be specified in the log domain.
The partition function is caluclated via application of the Matrix-Tree
theorem; see the following for details:
https://en.wikipedia.org/wiki/Kirchhoff%27s_theorem
http://www.aclweb.org/anthology/D/D07/D07-1015.pdf
Computing the gradient of the log partition function requires inverting the
Laplacian matrix. Numerical issues may occur if the Laplacian is singular or
nearly-so. (Intuitively, the Laplacian will be close to singular when the
input scores strongly favor invalid structures such as cycles). In the EMNLP
paper, we alleviated the numerical issues by clipping the difference between
the minimum and maximum score for each node to 20 (in the log domain). The
|max_dynamic_range| argument can be used for this purpose.
TODO(googleuser): Try improving the condition number of the Laplacian matrix
directly, instead of using the indirect approach above. For example, one
could add c*I to the Laplacian (i.e., Tikhonov regularization).
Args:
num_nodes: [B] vector of graph sizes per batch item.
scores: [B,M,M] tensor of padded batched arc and root scores, in the format
used by the maximum_spanning_tree() op. Padding values must be finite.
forest: If true, sum over spanning forests instead of trees.
max_dynamic_range: If specified, incoming scores for each node are clipped
to at most this far from the maximum such score (in the log domain).
Returns:
[B] vector Z of log partition function values, where
Z[b] = log(
\sum_{tree spanning batch item b}
score(root_of(tree)) \prod_{arc in tree} score(arc))
"""
orig_dtype = scores.dtype.base_dtype
scores_bxmxm = tf.to_double(scores) # use doubles to reduce under/overflow
shape_bxmxm = tf.shape(scores_bxmxm)
batch_size = shape_bxmxm[0]
max_nodes = shape_bxmxm[1]
total_nodes = batch_size * max_nodes
# To eliminate overflow, we locally normalize the scores. Specifically, for
# each node we divide its incoming arc scores and root selection score by the
# maximum such score. Since each node in a tree must select exactly one of
# these scores (i.e., it is either a root or has exactly one incoming arc),
# the local normalization factors are identical for all trees and can thus be
# factored out of the sum over trees.
#
# More concretely, we find the maximum per node, divide all scores for that
# node by the maximum, and then find the partition function of the normalized
# scores. Then we recover the un-normalized partition function by multiplying
# the per-node maxima back in. This final step is performed in the log domain
# to avoid overflow.
#
# Note that underflow is still possible, but unlikely as long as the scores
# are close to feasible (i.e., there is not too much mass on non-trees). The
# |max_dynamic_range| argument can be used to mitigate this.
# Finding the maximum incoming score is difficult, because the batch padding
# may contain arbitrary values. We restrict the maximization to valid arcs
# using tf.unsorted_segment_max() with a specially-constructed set of IDs.
_, valid_tokens_bxm = digraph_ops.ValidArcAndTokenMasks(
num_nodes, max_nodes, dtype=tf.int32)
# Create a tensor of "target IDs". In each row of each sub-matrix, the
# positions of valid source tokens are filled with the 1-origin index of that
# row in the entire batch, and zero elsewhere. For example, given a batch
# with num_nodes=[2, 3] we might have
# [[[1, 1, 0],
# [2, 2, 0],
# [3, 3, 0]],
# [[4, 4, 4],
# [5, 5, 5],
# [6, 6, 6]]]
#
# TODO(googleuser): The dynamic masking is pretty awkward. Find an op that does
# this (I looked, but maybe not hard enough), or write a custom op for this.
valid_tokens_bx1xm = tf.expand_dims(valid_tokens_bxm, 1)
valid_sources_bxmxm = tf.tile(valid_tokens_bx1xm, [1, max_nodes, 1])
sequence_bm = 1 + tf.range(total_nodes, dtype=tf.int32)
sequence_bxmx1 = tf.reshape(sequence_bm, [batch_size, max_nodes, 1])
target_ids_bxmxm = valid_sources_bxmxm * sequence_bxmx1
max_scores_bm1 = tf.unsorted_segment_max(scores_bxmxm, target_ids_bxmxm,
total_nodes + 1)
max_scores_bm = max_scores_bm1[1:] # ID 0 corresponds to padding
# Similar to above, we need to sum over the valid tokens. We analogously use
# tf.unsorted_segment_sum() with a specially-constructed set of "batch IDs".
sequence_b = 1 + tf.range(batch_size, dtype=tf.int32)
sequence_bx1 = tf.expand_dims(sequence_b, 1)
batch_ids_bxm = valid_tokens_bxm * sequence_bx1
batch_ids_bm = tf.reshape(batch_ids_bxm, [-1])
log_normalization_factor_b1 = tf.unsorted_segment_sum(
max_scores_bm, batch_ids_bm, batch_size + 1)
log_normalization_factor_b = log_normalization_factor_b1[1:]
# Locally-normalize and optionally clip the scores.
max_scores_bxmx1 = tf.reshape(max_scores_bm, [batch_size, max_nodes, 1])
scores_bxmxm -= max_scores_bxmx1
if max_dynamic_range is not None:
# After normalization, the scores are non-positive with max=0, so the
# |max_dynamic_range| can be applied directly.
#
# PyLint thinks "-max_dynamic_range" is invalid because it defaults to None.
scores_bxmxm = tf.maximum(scores_bxmxm, -max_dynamic_range)
scores_bxmxm = tf.exp(scores_bxmxm)
# Apply the Matrix-Tree theorem.
exp_normalized_laplacian_bxmxm = digraph_ops.LaplacianMatrix(
num_nodes, scores_bxmxm, forest=forest)
log_normalized_partition_function_b = tf.log(
tf.matrix_determinant(exp_normalized_laplacian_bxmxm))
# Reapply the normalization factor that was divided out.
log_partition_function_b = (
log_normalized_partition_function_b + log_normalization_factor_b)
return tf.cast(log_partition_function_b, orig_dtype)
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for maximum spanning tree ops."""
import math
import numpy as np
import tensorflow as tf
from dragnn.python import mst_ops
class MstOpsTest(tf.test.TestCase):
"""Testing rig."""
def testMaximumSpanningTree(self):
"""Tests that the MST op can recover a simple tree."""
with self.test_session() as session:
# The first batch element prefers 3 as root, then 3->0->1->2, for a total
# score of 4+2+1=7. The second batch element is smaller and has reversed
# scores, so 0 is root and 0->2->1.
num_nodes = tf.constant([4, 3], tf.int32)
scores = tf.constant([[[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 2, 0, 0],
[1, 2, 3, 4]],
[[4, 3, 2, 9],
[0, 0, 2, 9],
[0, 0, 0, 9],
[9, 9, 9, 9]]], tf.int32) # pyformat: disable
mst_outputs = mst_ops.maximum_spanning_tree(
num_nodes, scores, forest=False)
max_scores, argmax_sources = session.run(mst_outputs)
tf.logging.info('\nmax_scores=%s\nargmax_sources=\n%s', max_scores,
argmax_sources)
self.assertAllEqual(max_scores, [7, 6])
self.assertAllEqual(argmax_sources, [[3, 0, 1, 3],
[0, 2, 0, -1]]) # pyformat: disable
def testMaximumSpanningTreeGradient(self):
"""Tests the MST max score gradient."""
with self.test_session() as session:
num_nodes = tf.constant([4, 3], tf.int32)
scores = tf.constant([[[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 2, 0, 0],
[1, 2, 3, 4]],
[[4, 3, 2, 9],
[0, 0, 2, 9],
[0, 0, 0, 9],
[9, 9, 9, 9]]], tf.int32) # pyformat: disable
mst_ops.maximum_spanning_tree(num_nodes, scores, forest=False, name='MST')
mst_op = session.graph.get_operation_by_name('MST')
d_loss_d_max_scores = tf.constant([3, 7], tf.float32)
d_loss_d_num_nodes, d_loss_d_scores = (
mst_ops.maximum_spanning_tree_gradient(mst_op, d_loss_d_max_scores))
# The num_nodes input is non-differentiable.
self.assertTrue(d_loss_d_num_nodes is None)
tf.logging.info('\nd_loss_d_scores=\n%s', d_loss_d_scores.eval())
self.assertAllEqual(d_loss_d_scores.eval(),
[[[0, 0, 0, 3],
[3, 0, 0, 0],
[0, 3, 0, 0],
[0, 0, 0, 3]],
[[7, 0, 0, 0],
[0, 0, 7, 0],
[7, 0, 0, 0],
[0, 0, 0, 0]]]) # pyformat: disable
def testMaximumSpanningTreeGradientError(self):
"""Numerically validates the max score gradient."""
with self.test_session():
# The maximum-spanning-tree-score function, as a max of linear functions,
# is piecewise-linear (i.e., faceted). The numerical gradient estimate
# may be inaccurate if the epsilon ball used for the estimate crosses an
# edge from one facet to another. To avoid spurious errors, we manually
# set the sample point so the epsilon ball fits in a facet. Or in other
# words, we set the scores so there is a non-trivial margin between the
# best and second-best trees.
scores_raw = [[[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 2, 0, 0],
[1, 2, 3, 4]],
[[4, 3, 2, 9],
[0, 0, 2, 9],
[0, 0, 0, 9],
[9, 9, 9, 9]]] # pyformat: disable
# Use 64-bit floats to reduce numerical error.
scores = tf.constant(scores_raw, tf.float64)
init_scores = np.array(scores_raw)
num_nodes = tf.constant([4, 3], tf.int32)
max_scores = mst_ops.maximum_spanning_tree(
num_nodes, scores, forest=False)[0]
gradient_error = tf.test.compute_gradient_error(
scores, [2, 4, 4], max_scores, [2], init_scores)
tf.logging.info('gradient_error=%s', gradient_error)
def testLogPartitionFunctionOneTree(self):
"""Tests the log partition function with one feasible tree with score 1."""
with self.test_session():
for forest in [False, True]:
# Each score matrix supports exactly one tree with score=1*1*1, and
# the rest with score=0. Thus the log partition function will be 1.0
# in each case.
pad = 12345.6
scores = tf.constant([[[ 1, pad, pad],
[pad, pad, pad],
[pad, pad, pad]],
[[ 1, 0, pad],
[ 1, 0, pad],
[pad, pad, pad]],
[[ 1, 0, 0],
[ 1, 0, 0],
[ 0, 1, 0]]],
tf.float64) # pyformat: disable
scores = tf.log(scores)
num_nodes = tf.constant([1, 2, 3], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
self.assertAlmostEqual(tf.exp(log_partition_functions[0]).eval(), 1.0)
self.assertAlmostEqual(tf.exp(log_partition_functions[1]).eval(), 1.0)
self.assertAlmostEqual(tf.exp(log_partition_functions[2]).eval(), 1.0)
def testLogPartitionFunctionOneTreeScaled(self):
"""Tests the log partition function with one feasible tree."""
with self.test_session():
for forest in [False, True]:
# Each score matrix supports exactly one tree with varying score, and
# the rest with score=0. Thus the log partition function will equal
# the score of that single tree in each case.
pad = 12345.6
scores = tf.constant([[[ 2, pad, pad],
[pad, pad, pad],
[pad, pad, pad]],
[[ 3, 0, pad],
[ 5, 0, pad],
[pad, pad, pad]],
[[ 7, 0, 0],
[ 11, 0, 0],
[ 0, 13, 0]]],
tf.float64) # pyformat: disable
scores = tf.log(scores)
num_nodes = tf.constant([1, 2, 3], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
self.assertAlmostEqual(tf.exp(log_partition_functions[0]).eval(), 2.0)
self.assertAlmostEqual(
tf.exp(log_partition_functions[1]).eval(), 3.0 * 5.0)
self.assertAlmostEqual(
tf.exp(log_partition_functions[2]).eval(), 7.0 * 11.0 * 13.0)
def testLogPartitionFunctionTwoTreesScaled(self):
"""Tests the log partition function with two feasible trees."""
with self.test_session():
for forest in [False, True]:
# Each score matrix supports exactly two trees with varying score, and
# the rest with score=0. Thus the log partition function will equal
# the sum of scores of those two trees in each case.
pad = 12345.6
scores = tf.constant([[[ 2, 0, 0, pad],
[ 3, 0, 0, pad],
[ 5, 7, 0, pad],
[pad, pad, pad, pad]],
[[ 0, 11, 0, 13],
[ 0, 17, 0, 0],
[ 0, 19, 0, 0],
[ 0, 23, 0, 0]]],
tf.float64) # pyformat: disable
scores = tf.log(scores)
num_nodes = tf.constant([3, 4], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
self.assertAlmostEqual(
tf.exp(log_partition_functions[0]).eval(),
2.0 * 3.0 * 5.0 + 2.0 * 3.0 * 7.0)
self.assertAlmostEqual(
tf.exp(log_partition_functions[1]).eval(),
11.0 * 17.0 * 19.0 * 23.0 + 13.0 * 17.0 * 19.0 * 23.0)
def testLogPartitionFunctionInfeasible(self):
"""Tests the log partition function on infeasible scores."""
with self.test_session():
for forest in [False, True]:
# The scores form cycles of various sizes. Note that one can compute
# the partition function for infeasible scores---it's the gradient that
# may be impacted by numerical error.
pad = 12345.6
scores = tf.constant([[[ 0, 1, pad, pad],
[ 1, 0, pad, pad],
[pad, pad, pad, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, pad],
[ 0, 0, 1, pad],
[ 1, 0, 0, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, 0],
[ 0, 0, 1, 0],
[ 0, 0, 0, 1],
[ 1, 0, 0, 0]]],
tf.float64) # pyformat: disable
scores = tf.log(scores)
num_nodes = tf.constant([2, 3, 4], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
self.assertAlmostEqual(tf.exp(log_partition_functions[0]).eval(), 0.0)
self.assertAlmostEqual(tf.exp(log_partition_functions[1]).eval(), 0.0)
self.assertAlmostEqual(tf.exp(log_partition_functions[2]).eval(), 0.0)
def testLogPartitionFunctionAllTrees(self):
"""Tests the log partition function with all trees feasible."""
with self.test_session():
for forest in [False, True]:
# The scores allow all trees. Using Cayley's formula, the
# number of directed spanning trees and forests in a complete
# digraph of n nodes is n^{n-1} and (n+1)^{n-1}, respectively.
# https://en.wikipedia.org/wiki/Cayley%27s_formula
scores = tf.zeros([10, 10, 10], tf.float64) # = 1 in log domain
num_nodes = tf.range(1, 11, dtype=tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
base_offset = 1 if forest else 0 # n+1 for forest, n for tree
for size in range(1, 11):
self.assertAlmostEqual(log_partition_functions[size - 1].eval(),
(size - 1) * math.log(size + base_offset))
def testLogPartitionFunctionWithVeryHighValues(self):
"""Tests the overflow protection in the log partition function."""
with self.test_session():
for forest in [False, True]:
# Set the scores to very high values to test overflow protection.
scores = 1000 * tf.ones([10, 10, 10], tf.float64)
num_nodes = tf.range(1, 11, dtype=tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
base_offset = 1 if forest else 0 # n+1 for forest, n for tree
for size in range(1, 11):
self.assertAlmostEqual(
log_partition_functions[size - 1].eval(),
(size - 1) * math.log(size + base_offset) + size * 1000)
def testLogPartitionFunctionWithVeryLowValues(self):
"""Tests the underflow protection in the log partition function."""
with self.test_session():
for forest in [False, True]:
# Set the scores to very low values to test underflow protection.
scores = -1000 * tf.ones([10, 10, 10], tf.float64)
num_nodes = tf.range(1, 11, dtype=tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
base_offset = 1 if forest else 0 # n+1 for forest, n for tree
for size in range(1, 11):
self.assertAlmostEqual(
log_partition_functions[size - 1].eval(),
(size - 1) * math.log(size + base_offset) - size * 1000)
def testLogPartitionFunctionGradientError(self):
"""Validates the log partition function gradient."""
with self.test_session():
for forest in [False, True]:
# To avoid numerical issues, provide score matrices that are weighted
# towards feasible trees or forests.
scores_raw = [[[0, 0, 0, 0],
[1, 0, 0, 0],
[1, 2, 0, 0],
[1, 2, 3, 4]],
[[4, 3, 2, 9],
[0, 0, 2, 9],
[0, 0, 0, 9],
[9, 9, 9, 9]]] # pyformat: disable
scores = tf.constant(scores_raw, tf.float64)
init_scores = np.array(scores_raw)
num_nodes = tf.constant([4, 3], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
gradient_error = tf.test.compute_gradient_error(
scores, [2, 4, 4], log_partition_functions, [2], init_scores)
tf.logging.info('forest=%s gradient_error=%s', forest, gradient_error)
self.assertLessEqual(gradient_error, 1e-7)
def testLogPartitionFunctionGradientErrorFailsIfInfeasible(self):
"""Tests that the partition function gradient fails on infeasible scores."""
with self.test_session():
for forest in [False, True]:
# The scores form cycles of various sizes.
pad = 12345.6
scores_raw = [[[ 0, 1, pad, pad],
[ 1, 0, pad, pad],
[pad, pad, pad, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, pad],
[ 0, 0, 1, pad],
[ 1, 0, 0, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, 0],
[ 0, 0, 1, 0],
[ 0, 0, 0, 1],
[ 1, 0, 0, 0]]] # pyformat: disable
scores = tf.log(scores_raw)
init_scores = np.log(np.array(scores_raw))
num_nodes = tf.constant([2, 3, 4], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest)
with self.assertRaises(Exception):
tf.test.compute_gradient_error(
scores, [3, 4, 4], log_partition_functions, [3], init_scores)
def testLogPartitionFunctionGradientErrorOkIfInfeasibleWithClipping(self):
"""Tests that the log partition function gradient is OK after clipping."""
with self.test_session():
for forest in [False, True]:
# The scores form cycles of various sizes.
pad = 12345.6
scores_raw = [[[ 0, 1, pad, pad],
[ 1, 0, pad, pad],
[pad, pad, pad, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, pad],
[ 0, 0, 1, pad],
[ 1, 0, 0, pad],
[pad, pad, pad, pad]],
[[ 0, 1, 0, 0],
[ 0, 0, 1, 0],
[ 0, 0, 0, 1],
[ 1, 0, 0, 0]]] # pyformat: disable
scores = tf.log(scores_raw)
init_scores = np.log(np.array(scores_raw))
num_nodes = tf.constant([2, 3, 4], tf.int32)
log_partition_functions = mst_ops.log_partition_function(
num_nodes, scores, forest=forest, max_dynamic_range=10)
gradient_error = tf.test.compute_gradient_error(
scores, [3, 4, 4], log_partition_functions, [3], init_scores)
tf.logging.info('forest=%s gradient_error=%s', forest, gradient_error)
# There's still a lot of error.
self.assertLessEqual(gradient_error, 1e-3)
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DRAGNN wrappers for the MST solver."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from dragnn.python import mst_ops
from dragnn.python import network_units
from syntaxnet.util import check
class MstSolverNetwork(network_units.NetworkUnitInterface):
"""Network unit that performs MST prediction with structured loss.
Parameters:
forest: If true, solve for a spanning forest instead of a spanning tree.
loss: The loss function for training. Select from
softmax: Default unstructured softmax (prediction is still structured).
m3n: Max-Margin Markov Networks loss.
crf_max_dynamic_range: Max dynamic range for the log partition function.
Links:
lengths: [B, 1] sequence lengths per batch item.
scores: [B * N, N] matrix of padded batched arc scores.
Layers:
lengths: [B] sequence lengths per batch item.
scores: [B, N, N] tensor of padded batched arc scores.
logits: [B * N, N] matrix of padded batched arc scores.
arcs: [B * N, N] matrix of padded batched 0/1 indicators for MST arcs.
"""
def __init__(self, component):
"""Initializes layers.
Args:
component: Parent ComponentBuilderBase object.
"""
layers = [
network_units.Layer(self, 'lengths', -1),
network_units.Layer(self, 'scores', -1),
network_units.Layer(self, 'logits', -1),
network_units.Layer(self, 'arcs', -1),
]
super(MstSolverNetwork, self).__init__(component, init_layers=layers)
self._attrs = network_units.get_attrs_with_defaults(
component.spec.network_unit.parameters,
defaults={
'forest': False,
'loss': 'softmax',
'crf_max_dynamic_range': 20,
})
check.Eq(
len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features')
check.Eq(
len(self._linked_feature_dims.items()), 2,
'Expected two linked features')
check.In('lengths', self._linked_feature_dims,
'Missing required linked feature')
check.In('scores', self._linked_feature_dims,
'Missing required linked feature')
def create(self,
fixed_embeddings,
linked_embeddings,
context_tensor_arrays,
attention_tensor,
during_training,
stride=None):
"""Forwards the lengths and scores."""
check.NotNone(stride, 'MstSolverNetwork requires stride')
lengths = network_units.lookup_named_tensor('lengths', linked_embeddings)
lengths_b = tf.to_int32(tf.squeeze(lengths.tensor, [1]))
scores = network_units.lookup_named_tensor('scores', linked_embeddings)
scores_bnxn = scores.tensor
max_length = tf.shape(scores_bnxn)[1]
scores_bxnxn = tf.reshape(scores_bnxn, [stride, max_length, max_length])
_, argmax_sources_bxn = mst_ops.maximum_spanning_tree(
forest=self._attrs['forest'], num_nodes=lengths_b, scores=scores_bxnxn)
argmax_sources_bn = tf.reshape(argmax_sources_bxn, [-1])
arcs_bnxn = tf.one_hot(argmax_sources_bn, max_length, dtype=tf.float32)
return [lengths_b, scores_bxnxn, scores_bnxn, arcs_bnxn]
def get_logits(self, network_tensors):
return network_tensors[self.get_layer_index('logits')]
def get_bulk_predictions(self, stride, network_tensors):
return network_tensors[self.get_layer_index('arcs')]
def compute_bulk_loss(self, stride, network_tensors, gold):
"""See base class."""
if self._attrs['loss'] == 'softmax':
return (None, None, None) # fall back to default bulk softmax
lengths_b, scores_bxnxn, _, arcs_bnxn = network_tensors
max_length = tf.shape(scores_bxnxn)[2]
arcs_bxnxn = tf.reshape(arcs_bnxn, [stride, max_length, max_length])
gold_bxn = tf.reshape(gold, [stride, max_length])
gold_bxnxn = tf.one_hot(gold_bxn, max_length, dtype=tf.float32)
loss = self._compute_loss(lengths_b, scores_bxnxn, gold_bxnxn)
correct = tf.reduce_sum(tf.to_int32(arcs_bxnxn * gold_bxnxn))
total = tf.reduce_sum(lengths_b)
return loss, correct, total
def _compute_loss(self, lengths, scores, gold):
"""Computes the configured structured loss for a batch.
Args:
lengths: [B] sequence lengths per batch item.
scores: [B, N, N] tensor of padded batched arc scores.
gold: [B, N, N] tensor of 0/1 indicators for gold arcs.
Returns:
Scalar sum of losses across the batch.
"""
# Dispatch to one of the _compute_*_loss() methods.
method_name = '_compute_%s_loss' % self._attrs['loss']
loss_b = getattr(self, method_name)(lengths, scores, gold)
return tf.reduce_sum(loss_b)
def _compute_m3n_loss(self, lengths, scores, gold):
"""Computes the M3N-style structured hinge loss for a batch."""
# Perform hamming-loss-augmented inference.
gold_scores_b = tf.reduce_sum(scores * gold, axis=[1, 2])
hamming_loss_bxnxn = 1 - gold
scores_bxnxn = scores + hamming_loss_bxnxn
max_scores_b, _ = mst_ops.maximum_spanning_tree(
num_nodes=lengths, scores=scores_bxnxn, forest=self._attrs['forest'])
return max_scores_b - gold_scores_b
def _compute_crf_loss(self, lengths, scores, gold):
"""Computes the negative CRF log-probability for a batch."""
# The |scores| are assumed to be in the log domain.
log_gold_scores_b = tf.reduce_sum(scores * gold, axis=[1, 2])
log_partition_functions_b = mst_ops.log_partition_function(
num_nodes=lengths,
scores=scores,
forest=self._attrs['forest'],
max_dynamic_range=self._attrs['crf_max_dynamic_range'])
return log_partition_functions_b - log_gold_scores_b # negative log-prob
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for DRAGNN wrappers for the MST solver."""
import math
import tensorflow as tf
from google.protobuf import text_format
from dragnn.protos import spec_pb2
from dragnn.python import mst_units
from dragnn.python import network_units
_MASTER_SPEC = r"""
component {
name: 'test'
linked_feature {
name: 'lengths'
size: 1
embedding_dim: -1
fml: 'input.focus'
source_translator: 'identity'
source_component: 'previous'
source_layer: 'lengths'
}
linked_feature {
name: 'scores'
size: 1
embedding_dim: -1
fml: 'input.focus'
source_translator: 'identity'
source_component: 'previous'
source_layer: 'scores'
}
}
"""
class MockNetwork(object):
def get_layer_size(self, unused_name):
return -1
class MockComponent(object):
def __init__(self, master, component_spec):
self.master = master
self.spec = component_spec
self.name = component_spec.name
self.beam_size = 1
self.num_actions = -1
self.network = MockNetwork()
class MockMaster(object):
def __init__(self, build_runtime_graph=False):
self.spec = spec_pb2.MasterSpec()
text_format.Parse(_MASTER_SPEC, self.spec)
self.hyperparams = spec_pb2.GridPoint()
self.lookup_component = {
'previous': MockComponent(self, spec_pb2.ComponentSpec())
}
self.build_runtime_graph = build_runtime_graph
class MstSolverNetworkTest(tf.test.TestCase):
def setUp(self):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf.reset_default_graph()
def testCreate(self):
with self.test_session():
master = MockMaster()
component = MockComponent(master, master.spec.component[0])
component.network = mst_units.MstSolverNetwork(component)
stride = 1
lengths = tf.constant([[3]], dtype=tf.int64)
scores = tf.constant([[1.0, 0.5, 0.5],
[2.0, 0.5, 0.5],
[0.5, 3.0, 0.5]],
dtype=tf.float32) # pyformat: disable
linked_embeddings = [
network_units.NamedTensor(lengths, 'lengths'),
network_units.NamedTensor(scores, 'scores')
]
network_tensors = component.network.create([], linked_embeddings, [],
None, False, stride)
self.assertAllEqual(network_tensors[0].eval(), [3])
self.assertAllEqual(network_tensors[1].eval(),
[[[1.0, 0.5, 0.5],
[2.0, 0.5, 0.5],
[0.5, 3.0, 0.5]]]) # pyformat: disable
self.assertAllEqual(network_tensors[2].eval(),
[[1.0, 0.5, 0.5],
[2.0, 0.5, 0.5],
[0.5, 3.0, 0.5]]) # pyformat: disable
self.assertAllEqual(network_tensors[3].eval(),
[[1.0, 0.0, 0.0],
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0]]) # pyformat: disable
def testGetBulkPredictions(self):
with self.test_session():
master = MockMaster()
component = MockComponent(master, master.spec.component[0])
component.network = mst_units.MstSolverNetwork(component)
stride = 2
lengths = tf.constant([[2], [3]], dtype=tf.int64)
pad = -12345.6
scores = tf.constant([[1.0, 2.0, pad],
[1.8, 2.0, pad],
[pad, pad, pad],
[3.8, 4.0, 3.9],
[3.9, 3.8, 4.0],
[3.8, 0.9, 4.0]],
dtype=tf.float32) # pyformat: disable
linked_embeddings = [
network_units.NamedTensor(lengths, 'lengths'),
network_units.NamedTensor(scores, 'scores')
]
network_tensors = component.network.create([], linked_embeddings, [],
None, False, stride)
predictions = component.network.get_bulk_predictions(
stride, network_tensors)
self.assertAllEqual(predictions.eval(),
[[0.0, 1.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 0.0, 1.0]]) # pyformat: disable
def testComputeBulkLossM3n(self):
with self.test_session():
master = MockMaster()
component = MockComponent(master, master.spec.component[0])
component.spec.network_unit.parameters['loss'] = 'm3n'
component.network = mst_units.MstSolverNetwork(component)
stride = 2
lengths = tf.constant([[2], [3]], dtype=tf.int64)
# Note that these scores are large enough to overcome the +1 hamming loss
# terms in the M3N loss. Therefore, the score matrix determines the tree
# that is used to compute the M3N loss.
pad = -12345.6
scores = tf.constant([[0.5, 2.0, pad],
[0.5, 2.0, pad],
[pad, pad, pad],
[2.5, 4.0, 2.5],
[2.5, 2.5, 4.0],
[2.5, 2.5, 4.0]],
dtype=tf.float32) # pyformat: disable
# For the first tree, the gold and scores agree on one arc (that index 1
# is a root), and for the second tree, the gold and scores agree on none
# of the arcs. Therefore, we expect +1 and +3 for the first and second
# trees in the M3N loss.
gold = tf.constant([0, 1, -1, 0, 0, 1], tf.int32)
first_gold_score = 0.5 + 2.0
second_gold_score = 2.5 + 2.5 + 2.5
first_tree_correct = 1
second_tree_correct = 0
first_tree_loss = 2 * 2.0 + 2 - first_tree_correct - first_gold_score
second_tree_loss = 3 * 4.0 + 3 - second_tree_correct - second_gold_score
linked_embeddings = [
network_units.NamedTensor(lengths, 'lengths'),
network_units.NamedTensor(scores, 'scores')
]
network_tensors = component.network.create([], linked_embeddings, [],
None, False, stride)
cost, correct, total = component.network.compute_bulk_loss(
stride, network_tensors, gold)
self.assertEqual(cost.eval(), first_tree_loss + second_tree_loss)
self.assertEqual(correct.eval(), first_tree_correct + second_tree_correct)
self.assertEqual(total.eval(), 2 + 3)
def testComputeBulkLossCrf(self):
with self.test_session():
master = MockMaster()
component = MockComponent(master, master.spec.component[0])
component.spec.network_unit.parameters['loss'] = 'crf'
component.network = mst_units.MstSolverNetwork(component)
stride = 2
lengths = tf.constant([[2], [3]], dtype=tf.int64)
# These scores have 2.0 (in the log domain) on the gold arcs and 1.0
# elsewhere.
pad = -12345.6
one = math.log(1.0)
two = math.log(2.0)
scores = tf.constant([[one, two, pad],
[one, two, pad],
[pad, pad, pad],
[one, two, one],
[one, one, two],
[one, one, two]],
dtype=tf.float32) # pyformat: disable
gold = tf.constant([1, 1, -1, 1, 2, 2], tf.int32)
first_partition_function = (
2.0 * 2.0 + # 0 -> 1 (gold)
1.0 * 1.0) # 1 -> 0
first_loss = -math.log(2.0 * 2.0 / first_partition_function)
second_partition_function = (
2.0 * 2.0 * 2.0 + # 0 -> 1 -> 2 (gold)
1.0 * 1.0 * 1.0 + # 2 -> 1 -> 0
1.0 * 1.0 * 1.0 + # 0 -> 2 -> 1
2.0 * 1.0 * 1.0 + # 1 -> 2 -> 0
2.0 * 1.0 * 1.0 + # 1 -> 0 -> 2
2.0 * 1.0 * 1.0 + # 2 -> 0 -> 1
2.0 * 2.0 * 1.0 + # {0, 1} -> 2
2.0 * 1.0 * 1.0 + # {0, 2} -> 1
1.0 * 1.0 * 1.0) # {1, 2} -> 0
second_loss = -math.log(2.0 * 2.0 * 2.0 / second_partition_function)
linked_embeddings = [
network_units.NamedTensor(lengths, 'lengths'),
network_units.NamedTensor(scores, 'scores')
]
network_tensors = component.network.create([], linked_embeddings, [],
None, False, stride)
cost, correct, total = component.network.compute_bulk_loss(
stride, network_tensors, gold)
self.assertAlmostEqual(cost.eval(), first_loss + second_loss)
self.assertEqual(correct.eval(), 2 + 3)
self.assertEqual(total.eval(), 2 + 3)
if __name__ == '__main__':
tf.test.main()
......@@ -22,7 +22,6 @@ import abc
import numpy as np
from six.moves import xrange
import tensorflow as tf
from tensorflow.python.ops import nn
from tensorflow.python.ops import tensor_array_ops as ta
......@@ -76,11 +75,13 @@ class StoredActivations(object):
check.NotNone(dim, 'Dim is required for bulk tensor')
self._bulk_tensor = tensor
with tf.name_scope('convert_to_dyn'):
tensor = tf.reshape(tensor, [stride, -1, dim])
tensor = tf.transpose(tensor, perm=[1, 0, 2])
pad = tf.zeros([1, stride, dim], dtype=tensor.dtype)
self._array_tensor = tf.concat([pad, tensor], 0)
if dim >= 0:
# These operations will fail if |dim| is negative.
with tf.name_scope('convert_to_dyn'):
tensor = tf.reshape(tensor, [stride, -1, dim])
tensor = tf.transpose(tensor, perm=[1, 0, 2])
pad = tf.zeros([1, stride, dim], dtype=tensor.dtype)
self._array_tensor = tf.concat([pad, tensor], 0)
if array is not None:
check.IsNone(tensor, 'Cannot initialize from both tensor and array')
......@@ -130,7 +131,8 @@ def add_embeddings(channel_id, feature_spec, seed=None):
check.Gt(feature_spec.embedding_dim, 0,
'Embeddings requested for non-embedded feature: %s' % feature_spec)
name = fixed_embeddings_name(channel_id)
shape = [feature_spec.vocabulary_size + 1, feature_spec.embedding_dim]
row_num = feature_spec.vocabulary_size + 1
shape = [row_num, feature_spec.embedding_dim]
if feature_spec.HasField('pretrained_embedding_matrix'):
if len(feature_spec.pretrained_embedding_matrix.part) > 1:
raise RuntimeError('pretrained_embedding_matrix resource contains '
......@@ -143,9 +145,9 @@ def add_embeddings(channel_id, feature_spec, seed=None):
embeddings = syntaxnet_ops.word_embedding_initializer(
vectors=feature_spec.pretrained_embedding_matrix.part[0].file_pattern,
vocabulary=feature_spec.vocab.part[0].file_pattern,
override_num_embeddings=row_num,
num_special_embeddings=1,
embedding_init=1.0,
embedding_init=0.0, # zero out rows with no pretrained values
seed=seed1,
seed2=seed2)
return tf.get_variable(
......@@ -183,7 +185,57 @@ def embedding_lookup(embedding_matrix, indices, ids, weights, size):
return embeddings
def fixed_feature_lookup(component, state, channel_id, stride):
def apply_feature_id_dropout(ids, weights, channel):
"""Randomly perturbs a vector of feature IDs.
Args:
ids: Vector of feature IDs.
weights: Vector of feature weights.
channel: FixedFeatureChannel that extracted the |ids|.
Returns:
Copy of |ids| and |weights| where each ID is randomly replaced with
|channel.dropout_id|, according to the probabilities in
|channel.dropout_keep_probabilities|. The weights of dropped features are
set to zero if |channel.dropped_id| equals |channel.vocabulary_size|.
"""
check.Gt(
len(channel.dropout_keep_probability), 0,
'Channel {} dropout_keep_probability is empty'.format(channel.name))
check.Le(
len(channel.dropout_keep_probability), channel.vocabulary_size,
'Channel {} dropout_keep_probability is too long'.format(channel.name))
# Channel fields, converted from proto to constant tensor.
dropout_id = tf.constant(
channel.dropout_id, name='dropout_id', dtype=tf.int64)
dropout_keep_probabilities = tf.constant(
list(channel.dropout_keep_probability),
name='dropout_keep_probability',
dtype=tf.float32,
shape=[channel.vocabulary_size])
# The keep probabilities for the current batch of feature IDs.
keep_probabilities = tf.gather(dropout_keep_probabilities, ids)
# Draw random values and determine which IDs should be kept.
shape = tf.shape(ids)
noise = tf.random_uniform(shape) # \in [0,1)^d
should_keep = noise < keep_probabilities
# Replace dropped IDs with the specified replacement ID.
dropout_ids = tf.fill(shape, dropout_id)
new_ids = tf.where(should_keep, ids, dropout_ids)
if channel.dropout_id == channel.vocabulary_size:
# Replace weights of dropped IDs with 0.
zeros = tf.zeros(shape, dtype=tf.float32)
new_weights = tf.where(should_keep, weights, zeros)
else:
new_weights = weights
return new_ids, new_weights
def fixed_feature_lookup(component, state, channel_id, stride, during_training):
"""Looks up fixed features and passes them through embeddings.
Embedding vectors may be scaled by weights if the features specify it.
......@@ -193,6 +245,8 @@ def fixed_feature_lookup(component, state, channel_id, stride):
state: MasterState object for the live ComputeSession.
channel_id: int id of the fixed feature to look up.
stride: int Tensor of current batch * beam size.
during_training: True if this is being called from a training code path.
This controls, e.g., the use of feature ID dropout.
Returns:
NamedTensor object containing the embedding vectors.
......@@ -200,13 +254,35 @@ def fixed_feature_lookup(component, state, channel_id, stride):
feature_spec = component.spec.fixed_feature[channel_id]
check.Gt(feature_spec.embedding_dim, 0,
'Embeddings requested for non-embedded feature: %s' % feature_spec)
embedding_matrix = component.get_variable(fixed_embeddings_name(channel_id))
if feature_spec.is_constant:
embedding_matrix = tf.get_variable(fixed_embeddings_name(channel_id))
else:
embedding_matrix = component.get_variable(fixed_embeddings_name(channel_id))
with tf.op_scope([embedding_matrix], 'fixed_embedding_' + feature_spec.name):
indices, ids, weights = dragnn_ops.extract_fixed_features(
state.handle, component=component.name, channel_id=channel_id)
size = stride * feature_spec.size
embeddings = embedding_lookup(embedding_matrix, indices, ids, weights, size)
if during_training and feature_spec.dropout_id >= 0:
ids, weights = apply_feature_id_dropout(ids, weights, feature_spec)
if component.master.build_runtime_graph:
# To simplify integration with NN compilers, assume that each feature in
# the channel extracts exactly one ID and no weights.
# TODO(googleuser): Relax this restriction?
embeddings = []
for index in range(feature_spec.size):
feature_id = component.add_cell_input(
tf.int32, [1], 'fixed_channel_{}_index_{}_ids'.format(
channel_id, index))
embeddings.append(tf.gather(embedding_matrix, feature_id))
embeddings = tf.concat(embeddings, 1)
else:
size = stride * feature_spec.size
embeddings = embedding_lookup(embedding_matrix, indices, ids, weights,
size)
dim = feature_spec.size * feature_spec.embedding_dim
return NamedTensor(
tf.reshape(embeddings, [-1, dim]), feature_spec.name, dim=dim)
......@@ -368,12 +444,16 @@ def convert_network_state_tensorarray(tensorarray):
return tf.reshape(tensor, [-1, tf.shape(tensor)[2]])
def pass_through_embedding_matrix(act_block, embedding_matrix, step_idx):
def pass_through_embedding_matrix(component, channel_id, size, act_block,
embedding_matrix, step_idx):
"""Passes the activations through the embedding_matrix.
Takes care to handle out of bounds lookups.
Args:
component: Component that produced the linked features.
channel_id: Channel that produced the linked features.
size: Number of linked embeddings in the channel.
act_block: matrix of activations.
embedding_matrix: matrix of weights.
step_idx: vector containing step indices, with -1 indicating out of bounds.
......@@ -383,14 +463,36 @@ def pass_through_embedding_matrix(act_block, embedding_matrix, step_idx):
"""
# Indicator vector for out of bounds lookups.
step_idx_mask = tf.expand_dims(tf.equal(step_idx, -1), -1)
step_idx_mask = tf.to_float(step_idx_mask)
if component.master.build_runtime_graph:
step_idx_mask = component.add_cell_input(
step_idx_mask.dtype, [size, 1],
'linked_channel_{}_out_of_bounds'.format(channel_id))
# Pad the last column of the activation vectors with the indicator.
act_block = tf.concat([act_block, tf.to_float(step_idx_mask)], 1)
act_block = tf.concat([act_block, step_idx_mask], 1)
return tf.matmul(act_block, embedding_matrix)
def lookup_named_tensor_or_none(name, named_tensors):
"""Retrieves a NamedTensor by name, or None if it doesn't exist.
Args:
name: Name of the tensor to retrieve.
named_tensors: List of NamedTensor objects to search.
Returns:
The NamedTensor in |named_tensors| with the |name| or None.
"""
for named_tensor in named_tensors:
if named_tensor.name == name:
return named_tensor
return None
def lookup_named_tensor(name, named_tensors):
"""Retrieves a NamedTensor by name.
"""Retrieves a NamedTensor by name, raising KeyError if it doesn't exist.
Args:
name: Name of the tensor to retrieve.
......@@ -402,11 +504,11 @@ def lookup_named_tensor(name, named_tensors):
Raises:
KeyError: If the |name| is not found among the |named_tensors|.
"""
for named_tensor in named_tensors:
if named_tensor.name == name:
return named_tensor
raise KeyError('Name "%s" not found in named tensors: %s' % (name,
named_tensors))
result = lookup_named_tensor_or_none(name, named_tensors)
if result is None:
raise KeyError('Name "%s" not found in named tensors: %s' % (name,
named_tensors))
return result
def activation_lookup_recurrent(component, state, channel_id, source_array,
......@@ -417,9 +519,9 @@ def activation_lookup_recurrent(component, state, channel_id, source_array,
not passed through (i.e. multiplied by) an embedding matrix.
Args:
component: Component object in which to look up the fixed features.
component: Component object in which to look up the linked features.
state: MasterState object for the live ComputeSession.
channel_id: int id of the fixed feature to look up.
channel_id: int id of the linked feature to look up.
source_array: TensorArray from which to fetch feature vectors, expected to
have size [steps + 1] elements of shape [stride, D] each.
source_layer_size: int length of feature vectors before embedding.
......@@ -459,11 +561,17 @@ def activation_lookup_recurrent(component, state, channel_id, source_array,
act_block = tf.gather(act_block, flat_idx)
act_block = tf.reshape(act_block, [-1, source_layer_size])
if component.master.build_runtime_graph:
act_block = component.add_cell_input(act_block.dtype, [
feature_spec.size, source_layer_size
], 'linked_channel_{}_activations'.format(channel_id))
if feature_spec.embedding_dim != -1:
embedding_matrix = component.get_variable(
linked_embeddings_name(channel_id))
act_block = pass_through_embedding_matrix(act_block, embedding_matrix,
step_idx)
act_block = pass_through_embedding_matrix(component, channel_id,
feature_spec.size, act_block,
embedding_matrix, step_idx)
dim = feature_spec.size * feature_spec.embedding_dim
else:
# If embedding_dim is -1, just output concatenation of activations.
......@@ -481,9 +589,9 @@ def activation_lookup_other(component, state, channel_id, source_tensor,
not passed through (i.e. multiplied by) an embedding matrix.
Args:
component: Component object in which to look up the fixed features.
component: Component object in which to look up the linked features.
state: MasterState object for the live ComputeSession.
channel_id: int id of the fixed feature to look up.
channel_id: int id of the linked feature to look up.
source_tensor: Tensor from which to fetch feature vectors. Expected to have
have shape [steps + 1, stride, D].
source_layer_size: int length of feature vectors before embedding (D). It
......@@ -508,11 +616,17 @@ def activation_lookup_other(component, state, channel_id, source_tensor,
act_block = tf.gather_nd(source_tensor, indices)
act_block = tf.reshape(act_block, [-1, source_layer_size])
if component.master.build_runtime_graph:
act_block = component.add_cell_input(act_block.dtype, [
feature_spec.size, source_layer_size
], 'linked_channel_{}_activations'.format(channel_id))
if feature_spec.embedding_dim != -1:
embedding_matrix = component.get_variable(
linked_embeddings_name(channel_id))
act_block = pass_through_embedding_matrix(act_block, embedding_matrix,
step_idx)
act_block = pass_through_embedding_matrix(component, channel_id,
feature_spec.size, act_block,
embedding_matrix, step_idx)
dim = feature_spec.size * feature_spec.embedding_dim
else:
# If embedding_dim is -1, just output concatenation of activations.
......@@ -629,7 +743,7 @@ class Layer(object):
Returns:
TensorArray object
"""
check.Gt(self.dim, 0, 'Cannot create array when dimension is dynamic')
check.Ge(self.dim, 0, 'Cannot create array when dimension is dynamic')
tensor_array = ta.TensorArray(
dtype=tf.float32,
size=0,
......@@ -671,7 +785,19 @@ def get_attrs_with_defaults(parameters, defaults):
return attrs
def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
def maybe_make_dropout_mask(shape, keep_prob):
"""Returns a reusable dropout mask, or None if dropout would not occur."""
if keep_prob >= 1.0:
return None
return tf.nn.dropout(tf.ones(shape, dtype=tf.float32), keep_prob)
def maybe_apply_dropout(inputs,
keep_prob,
per_sequence,
stride=None,
dropout_mask=None,
name=None):
"""Applies dropout, if so configured, to an input tensor.
The input may be rank 2 or 3 depending on whether the stride (i.e., batch
......@@ -682,20 +808,27 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
keep_prob: Scalar probability of keeping each input element. If >= 1.0, no
dropout is performed.
per_sequence: If true, sample the dropout mask once per sequence, instead of
once per step. Requires |stride| when true.
stride: Scalar batch size. Optional if |per_sequence| is false.
once per step. Either |stride| or |dropout_mask| must be set when true.
stride: Scalar batch size. Optional if |per_sequence| is false, or if
|dropout_mask| is provided.
dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
broadcastable to |inputs|. Optional if |per_sequence| is false, or if
|stride| is provided.
name: Optional name for the dropout operation, if dropout is applied.
Returns:
[stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
shape of |inputs|, containing the masked or original inputs, depending on
whether dropout was actually performed.
"""
if keep_prob >= 1.0:
return inputs
if not per_sequence:
return tf.nn.dropout(inputs, keep_prob)
return tf.nn.dropout(inputs, keep_prob, name=name)
if dropout_mask is not None:
return tf.multiply(inputs, dropout_mask, name=name)
# We only check the dims if we are applying per-sequence dropout
check.Ge(inputs.get_shape().ndims, 2, 'inputs must be rank 2 or 3')
......@@ -713,7 +846,7 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
# Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
# to all steps for a particular sequence.
noise_shape = [stride, 1, dim]
masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape)
masked_sxnxd = tf.nn.dropout(inputs_sxnxd, keep_prob, noise_shape, name=name)
# If needed, flatten out the batch dimension in the return value.
return tf.reshape(masked_sxnxd, [-1, dim]) if flat else masked_sxnxd
......@@ -749,6 +882,7 @@ class NetworkUnitInterface(object):
"""
self._component = component
self._params = []
self._derived_params = []
self._layers = init_layers if init_layers else []
self._regularized_weights = []
self._context_layers = init_context_layers if init_context_layers else []
......@@ -764,7 +898,10 @@ class NetworkUnitInterface(object):
check.Gt(spec.size, 0, 'Invalid fixed feature size')
if spec.embedding_dim > 0:
fixed_dim = spec.embedding_dim
self._params.append(add_embeddings(channel_id, spec))
if spec.is_constant:
add_embeddings(channel_id, spec)
else:
self._params.append(add_embeddings(channel_id, spec))
else:
fixed_dim = 1 # assume feature ID extraction; only one ID per step
self._fixed_feature_dims[spec.name] = spec.size * fixed_dim
......@@ -802,8 +939,8 @@ class NetworkUnitInterface(object):
self._concatenated_input_dim = -1
else:
self._concatenated_input_dim = sum(input_dims)
tf.logging.info('component %s concat_input_dim %s', component.name,
self._concatenated_input_dim)
tf.logging.debug('component %s concat_input_dim %s', component.name,
self._concatenated_input_dim)
# Allocate attention parameters.
if self._component.spec.attention_component:
......@@ -845,6 +982,19 @@ class NetworkUnitInterface(object):
[attention_hidden_layer_size, component.num_actions],
initializer=tf.random_normal_initializer(stddev=1e-4)))
def pre_create(self, stride):
"""Prepares this network for inputs of the given stride.
This will be called before entering the main transition loop and calling
create(). Networks can use this to pre-compute values that are reused in
the main transition loop. Note that this may be called multiple times;
e.g., once for the training graph, and again for the inference graph.
Args:
stride: Scalar batch_size * beam_size.
"""
pass
@abc.abstractmethod
def create(self,
fixed_embeddings,
......@@ -878,6 +1028,18 @@ class NetworkUnitInterface(object):
def params(self):
return self._params
@property
def derived_params(self):
"""Gets the list of derived parameters.
Derived parameters are similar to `params`, but reformatted slightly
(because doing so is easier in Python).
Returns:
List of zero-argument getters, each of which return a tensor when called.
"""
return self._derived_params
@property
def regularized_weights(self):
return self._regularized_weights
......@@ -919,6 +1081,38 @@ class NetworkUnitInterface(object):
"""
raise NotImplementedError()
def get_bulk_predictions(self, stride, network_tensors):
"""Returns custom bulk predictions, if supported.
The returned predictions will be used to advance the batch of states, like
logits. For example, a network may perform structured prediction, and then
return 0/1 indicators of the jointly-predicted annotations. The difference
between this and get_logits() is that this is only used at inference time.
Args:
stride: Scalar stride for segmenting bulk tensors.
network_tensors: List of tensors as returned by create().
Returns:
[stride * steps, dim] matrix of predictions, or None if not supported.
"""
del stride, network_tensors
return None
def compute_bulk_loss(self, stride, network_tensors, gold):
"""Returns a custom bulk training loss, if supported.
Args:
stride: Scalar stride for segmenting bulk tensors.
network_tensors: List of tensors as returned by create().
gold: [stride * steps] vector of gold actions.
Returns:
Tuple of (loss, correct, total), or (None, None, None) if not supported.
"""
del stride, network_tensors, gold
return (None, None, None)
def get_l2_regularized_weights(self):
"""Gets the weights that need to be regularized."""
return self.regularized_weights
......@@ -1026,6 +1220,12 @@ class FeedForwardNetwork(NetworkUnitInterface):
(https://arxiv.org/abs/1512.05287).
dropout_all_layers (False): If true, apply dropout to the input of all
hidden layers, instead of just applying it to the network input.
initialize_bias_zero (False): If true, initialize bias vectors to 0.
Otherwise, they are initialized to a small constant value.
initialize_softmax_zero (False): If true, initialize softmax weights to 0.
Otherwise, they are initialized to small random values.
initialize_hidden_orthogonal (False): If true, initialize hidden weights
orthogonally. Otherwise, they are initialized to small random values.
Hyperparameters used:
dropout_rate: The probability that an input is not dropped. Only used
......@@ -1041,9 +1241,25 @@ class FeedForwardNetwork(NetworkUnitInterface):
'nonlinearity': 'relu',
'dropout_keep_prob': -1.0,
'dropout_per_sequence': False,
'dropout_all_layers': False
'dropout_all_layers': False,
'initialize_bias_zero': False,
'initialize_softmax_zero': False,
'initialize_hidden_orthogonal': False,
})
def _make_bias_initializer():
return (tf.zeros_initializer() if self._attrs['initialize_bias_zero'] else
tf.constant_initializer(0.2, dtype=tf.float32))
def _make_softmax_initializer():
return (tf.zeros_initializer() if self._attrs['initialize_softmax_zero']
else tf.random_normal_initializer(stddev=1e-4))
def _make_hidden_initializer():
return (tf.orthogonal_initializer()
if self._attrs['initialize_hidden_orthogonal'] else
tf.random_normal_initializer(stddev=1e-4))
# Initialize the hidden layer sizes before running the base initializer, as
# the base initializer may need to know the size of the hidden layer for
# recurrent connections.
......@@ -1084,13 +1300,13 @@ class FeedForwardNetwork(NetworkUnitInterface):
for index, hidden_layer_size in enumerate(self._hidden_layer_sizes):
weights = tf.get_variable(
'weights_%d' % index, [last_layer_dim, hidden_layer_size],
initializer=tf.random_normal_initializer(stddev=1e-4))
initializer=_make_hidden_initializer())
self._params.append(weights)
if index > 0 or self._layer_norm_hidden is None:
self._params.append(
tf.get_variable(
'bias_%d' % index, [hidden_layer_size],
initializer=tf.constant_initializer(0.2, dtype=tf.float32)))
initializer=_make_bias_initializer()))
self._weights.append(weights)
self._layers.append(
......@@ -1108,7 +1324,7 @@ class FeedForwardNetwork(NetworkUnitInterface):
self._params.append(
tf.get_variable(
'weights_softmax', [last_layer_dim, component.num_actions],
initializer=tf.random_normal_initializer(stddev=1e-4)))
initializer=_make_softmax_initializer()))
self._params.append(
tf.get_variable(
'bias_softmax', [component.num_actions],
......@@ -1199,67 +1415,133 @@ class FeedForwardNetwork(NetworkUnitInterface):
class LSTMNetwork(NetworkUnitInterface):
"""Implementation of action LSTM style network."""
"""Implementation of action LSTM style network.
Note that this is not a vanilla LSTM: it adds peephole connections and couples
the input and forget gates.
This implementation treats linked features called lstm_h and lstm_c specially.
Instead of treating them as normal linked features, it uses them as the
previous LSTM states. This allows having a single LSTM component actually
consist of several LSTMs, or to have a tree-shaped LSTM.
"""
def __init__(self, component):
"""Initializes LSTM parameters.
Args:
component: parent ComponentBuilderBase object.
Parameters used to construct the network:
hidden_layer_sizes: In spite of its name, a single int indicating the
number of hidden units in each hidden layer.
factored_hidden_dim: If positive, the weight matrix is factored into a
product of two matrices with this inner dimension.
omit_logits (False): Whether to elide the logits layer.
initialize_bias_zero (False): If true, initialize bias vectors to 0.
Otherwise, they are initialized to small random values.
initialize_softmax_zero (False): If true, initialize softmax weights to 0.
Otherwise, they are initialized to small random values.
initialize_hidden_orthogonal (False): If true, initialize hidden weights
orthogonally. Otherwise, they are initialized to small random values.
input_dropout_rate (-1.0): Keep probability for inputs. If negative, fall
back to the |dropout_rate| hyperparameter.
recurrent_dropout_rate (-1.0): Keep probability for recurrences. If
negative, fall back to the |recurrent_dropout_rate| hyperparameter.
dropout_per_sequence (False): If true, sample the dropout mask once per
sequence, instead of once per step. See Gal and Ghahramani
(https://arxiv.org/abs/1512.05287).
"""
assert component.num_actions > 0, 'Component num actions must be positive.'
network_unit_spec = component.spec.network_unit
self._hidden_layer_sizes = (
int)(network_unit_spec.parameters['hidden_layer_sizes'])
self._attrs = get_attrs_with_defaults(
component.spec.network_unit.parameters,
defaults={
'hidden_layer_sizes': -1, # NB: a single dim, not a list
'factored_hidden_dim': -1,
'omit_logits': False,
'initialize_bias_zero': False,
'initialize_softmax_zero': False,
'initialize_hidden_orthogonal': False,
'input_dropout_rate': -1.0,
'recurrent_dropout_rate': -1.0,
'dropout_per_sequence': False,
})
def _make_bias_initializer():
return (tf.zeros_initializer() if self._attrs['initialize_bias_zero'] else
tf.random_normal_initializer(stddev=1e-4))
self._input_dropout_rate = component.master.hyperparams.dropout_rate
self._recurrent_dropout_rate = (
component.master.hyperparams.recurrent_dropout_rate)
def _make_softmax_initializer():
return (tf.zeros_initializer() if self._attrs['initialize_softmax_zero']
else tf.random_normal_initializer(stddev=1e-4))
self._hidden_layer_sizes = self._attrs['hidden_layer_sizes']
self._factored_hidden_dim = self._attrs['factored_hidden_dim']
self._compute_logits = not self._attrs['omit_logits']
self._dropout_per_sequence = self._attrs['dropout_per_sequence']
self._input_dropout_rate = self._attrs['input_dropout_rate']
if self._input_dropout_rate < 0.0:
self._input_dropout_rate = component.master.hyperparams.dropout_rate
self._recurrent_dropout_rate = self._attrs['recurrent_dropout_rate']
if self._recurrent_dropout_rate < 0.0:
self._recurrent_dropout_rate = (
component.master.hyperparams.recurrent_dropout_rate)
if self._recurrent_dropout_rate < 0.0:
self._recurrent_dropout_rate = component.master.hyperparams.dropout_rate
tf.logging.info('[%s] dropout: input=%s recurrent=%s per_sequence=%s',
component.name, self._input_dropout_rate,
self._recurrent_dropout_rate, self._dropout_per_sequence)
super(LSTMNetwork, self).__init__(component)
layer_input_dim = self._concatenated_input_dim
self._layer_input_dim = self._concatenated_input_dim
if self._layer_input_dim > 1:
for skipped_link in ['lstm_h', 'lstm_c']:
if skipped_link in self._linked_feature_dims:
self._layer_input_dim -= self._linked_feature_dims[skipped_link]
self._input_dropout_mask = None
self._recurrent_dropout_mask = None
self._context_layers = []
# TODO(googleuser): should we choose different initilizer,
# e.g. truncated_normal_initializer?
self._x2i = tf.get_variable(
'x2i', [layer_input_dim, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._h2i = tf.get_variable(
'h2i', [self._hidden_layer_sizes, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._c2i = tf.get_variable(
'c2i', [self._hidden_layer_sizes, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._bi = tf.get_variable(
'bi', [self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._x2o = tf.get_variable(
'x2o', [layer_input_dim, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._h2o = tf.get_variable(
'h2o', [self._hidden_layer_sizes, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._c2o = tf.get_variable(
'c2o', [self._hidden_layer_sizes, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._bo = tf.get_variable(
'bo', [self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._x2c = tf.get_variable(
'x2c', [layer_input_dim, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._h2c = tf.get_variable(
'h2c', [self._hidden_layer_sizes, self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._bc = tf.get_variable(
'bc', [self._hidden_layer_sizes],
initializer=tf.random_normal_initializer(stddev=1e-4))
self._params.extend([
self._x2i, self._h2i, self._c2i, self._bi, self._x2o, self._h2o,
self._c2o, self._bo, self._x2c, self._h2c, self._bc
])
self._create_hidden_weights(
'x2i', [self._layer_input_dim, self._hidden_layer_sizes])
self._create_hidden_weights(
'h2i', [self._hidden_layer_sizes, self._hidden_layer_sizes])
self._create_hidden_weights(
'c2i', [self._hidden_layer_sizes, self._hidden_layer_sizes])
self._params.append(
tf.get_variable(
'bi', [self._hidden_layer_sizes],
initializer=_make_bias_initializer()))
self._create_hidden_weights(
'x2o', [self._layer_input_dim, self._hidden_layer_sizes])
self._create_hidden_weights(
'h2o', [self._hidden_layer_sizes, self._hidden_layer_sizes])
self._create_hidden_weights(
'c2o', [self._hidden_layer_sizes, self._hidden_layer_sizes])
self._params.append(
tf.get_variable(
'bo', [self._hidden_layer_sizes],
initializer=_make_bias_initializer()))
self._create_hidden_weights(
'x2c', [self._layer_input_dim, self._hidden_layer_sizes])
self._create_hidden_weights(
'h2c', [self._hidden_layer_sizes, self._hidden_layer_sizes])
self._params.append(
tf.get_variable(
'bc', [self._hidden_layer_sizes],
initializer=_make_bias_initializer()))
# Add runtime hooks for combined matrices.
self._derived_params.append(self._get_x_to_ico)
self._derived_params.append(self._get_h_to_ico)
self._derived_params.append(self._get_ico_bias)
lstm_h_layer = Layer(component, name='lstm_h', dim=self._hidden_layer_sizes)
lstm_c_layer = Layer(component, name='lstm_c', dim=self._hidden_layer_sizes)
......@@ -1272,18 +1554,92 @@ class LSTMNetwork(NetworkUnitInterface):
self._layers.append(
Layer(component, name='layer_0', dim=self._hidden_layer_sizes))
self.params.append(
tf.get_variable(
'weights_softmax',
[self._hidden_layer_sizes, component.num_actions],
initializer=tf.random_normal_initializer(stddev=1e-4)))
self.params.append(
tf.get_variable(
'bias_softmax', [component.num_actions],
initializer=tf.zeros_initializer()))
if self._compute_logits:
self.params.append(
tf.get_variable(
'weights_softmax',
[self._hidden_layer_sizes, component.num_actions],
initializer=_make_softmax_initializer()))
self.params.append(
tf.get_variable(
'bias_softmax', [component.num_actions],
initializer=tf.zeros_initializer()))
self._layers.append(
Layer(component, name='logits', dim=component.num_actions))
self._layers.append(
Layer(component, name='logits', dim=component.num_actions))
def _get_variable_name_prefix(self):
"""Returns the prefix for variable names."""
# The bias variables are always present; infer the prefix from one of them.
bi = self._component.get_variable('bi')
tokens = bi.op.name.split('/')
while tokens.pop() != 'bi':
pass # remove the last 'bi' and everything after it
return '/'.join(tokens) + '/'
def _get_x_to_ico(self):
# TODO(googleuser): Export the factored representation, if available.
x2i = self._multiply_hidden_weights(tf.eye(self._layer_input_dim), 'x2i')
x2c = self._multiply_hidden_weights(tf.eye(self._layer_input_dim), 'x2c')
x2o = self._multiply_hidden_weights(tf.eye(self._layer_input_dim), 'x2o')
prefix = self._get_variable_name_prefix()
with tf.name_scope(None):
return tf.concat([x2i, x2c, x2o], axis=1, name=prefix + 'x_to_ico')
def _get_h_to_ico(self):
# TODO(googleuser): Export the factored representation, if available.
h2i = self._multiply_hidden_weights(tf.eye(self._hidden_layer_sizes), 'h2i')
h2c = self._multiply_hidden_weights(tf.eye(self._hidden_layer_sizes), 'h2c')
h2o = self._multiply_hidden_weights(tf.eye(self._hidden_layer_sizes), 'h2o')
prefix = self._get_variable_name_prefix()
with tf.name_scope(None):
return tf.concat([h2i, h2c, h2o], axis=1, name=prefix + 'h_to_ico')
def _get_ico_bias(self):
bi = self._component.get_variable('bi')
bc = self._component.get_variable('bc')
bo = self._component.get_variable('bo')
prefix = self._get_variable_name_prefix()
with tf.name_scope(None):
return tf.concat([bi, bc, bo], axis=0, name=prefix + 'ico_bias')
def _create_hidden_weights(self, name, shape):
"""Creates params for hidden weight matrix of the given shape."""
check.Eq(len(shape), 2, 'Hidden weights %s must be a matrix' % name)
def _initializer():
return (tf.orthogonal_initializer()
if self._attrs['initialize_hidden_orthogonal'] else
tf.random_normal_initializer(stddev=1e-4))
if self._factored_hidden_dim > 0:
self._params.append(
tf.get_variable(
'%s_in' % name, [shape[0], self._factored_hidden_dim],
initializer=_initializer()))
self._params.append(
tf.get_variable(
'%s_out' % name, [self._factored_hidden_dim, shape[1]],
initializer=_initializer()))
else:
self._params.append(
tf.get_variable(name, shape, initializer=_initializer()))
def _multiply_hidden_weights(self, inputs, name):
"""Multiplies the inputs with the named hidden weight matrix."""
if self._factored_hidden_dim > 0:
inputs = tf.matmul(inputs, self._component.get_variable('%s_in' % name))
return tf.matmul(inputs, self._component.get_variable('%s_out' % name))
else:
return tf.matmul(inputs, self._component.get_variable(name))
def pre_create(self, stride):
"""Refreshes the dropout masks, if applicable."""
if self._dropout_per_sequence:
self._input_dropout_mask = maybe_make_dropout_mask(
[stride, self._layer_input_dim], self._input_dropout_rate)
self._recurrent_dropout_mask = maybe_make_dropout_mask(
[stride, self._hidden_layer_sizes], self._recurrent_dropout_rate)
def create(self,
fixed_embeddings,
......@@ -1293,51 +1649,84 @@ class LSTMNetwork(NetworkUnitInterface):
during_training,
stride=None):
"""See base class."""
input_tensor = get_input_tensor(fixed_embeddings, linked_embeddings)
# context_tensor_arrays[0] is lstm_h
# context_tensor_arrays[1] is lstm_c
assert len(context_tensor_arrays) == 2
length = context_tensor_arrays[0].size()
# Get the (possibly averaged) parameters to execute the network.
x2i = self._component.get_variable('x2i')
h2i = self._component.get_variable('h2i')
c2i = self._component.get_variable('c2i')
# Get the (possibly averaged) biases to execute the network.
bi = self._component.get_variable('bi')
x2o = self._component.get_variable('x2o')
h2o = self._component.get_variable('h2o')
c2o = self._component.get_variable('c2o')
bo = self._component.get_variable('bo')
x2c = self._component.get_variable('x2c')
h2c = self._component.get_variable('h2c')
bc = self._component.get_variable('bc')
if self._compute_logits:
weights_softmax = self._component.get_variable('weights_softmax')
bias_softmax = self._component.get_variable('bias_softmax')
i_h_tm1 = lookup_named_tensor_or_none('lstm_h', linked_embeddings)
h_from_linked = False
if i_h_tm1 is not None:
h_from_linked = True
i_h_tm1 = i_h_tm1.tensor
i_c_tm1 = lookup_named_tensor_or_none('lstm_c', linked_embeddings)
c_from_linked = False
if i_c_tm1 is not None:
c_from_linked = True
i_c_tm1 = i_c_tm1.tensor
# i_h_tm1, i_c_tm1 = h_{t-1}, c_{t-1} and label c and h inputs
if i_h_tm1 is None:
i_h_tm1 = context_tensor_arrays[0].read(length - 1)
if i_c_tm1 is None:
i_c_tm1 = context_tensor_arrays[1].read(length - 1)
i_h_tm1 = tf.identity(i_h_tm1, name='lstm_h_in')
i_c_tm1 = tf.identity(i_c_tm1, name='lstm_c_in')
# i_h_tm1, i_c_tm1 = h_{t-1}, c_{t-1}
i_h_tm1 = context_tensor_arrays[0].read(length - 1)
i_c_tm1 = context_tensor_arrays[1].read(length - 1)
# Add hard-coded recurrent inputs to the exported cell.
if self._component.master.build_runtime_graph:
shape = [1, self._hidden_layer_sizes]
if not c_from_linked:
i_c_tm1 = self._component.add_cell_input(i_c_tm1.dtype, shape, 'lstm_c',
'TYPE_RECURRENT')
if not h_from_linked:
i_h_tm1 = self._component.add_cell_input(i_h_tm1.dtype, shape, 'lstm_h',
'TYPE_RECURRENT')
# Remove 'lstm_h' and 'lstm_c' from linked_embeddings, since they are used
# in a special way.
linked_embeddings = [
x for x in linked_embeddings if x.name not in ['lstm_h', 'lstm_c']
]
# label c and h inputs
i_c_tm1 = tf.identity(i_c_tm1, name='lstm_c_in')
i_h_tm1 = tf.identity(i_h_tm1, name='lstm_h_in')
input_tensor = get_input_tensor(fixed_embeddings, linked_embeddings)
# label the feature input (for debugging purposes)
input_tensor = tf.identity(input_tensor, name='input_tensor')
# apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf
if during_training and self._input_dropout_rate < 1:
input_tensor = tf.nn.dropout(input_tensor, self._input_dropout_rate)
if during_training:
input_tensor = maybe_apply_dropout(
input_tensor,
self._input_dropout_rate,
self._dropout_per_sequence,
dropout_mask=self._input_dropout_mask)
# input -- i_t = sigmoid(affine(x_t, h_{t-1}, c_{t-1}))
i_ait = tf.matmul(input_tensor, x2i) + tf.matmul(i_h_tm1, h2i) + tf.matmul(
i_c_tm1, c2i) + bi
# Note peephole connection to previous cell state.
i_ait = (
self._multiply_hidden_weights(input_tensor, 'x2i') +
self._multiply_hidden_weights(i_h_tm1, 'h2i') +
self._multiply_hidden_weights(i_c_tm1, 'c2i') + bi)
i_it = tf.sigmoid(i_ait)
# forget -- f_t = 1 - i_t
# Note coupling with input gate.
i_ft = tf.ones([1, 1]) - i_it
# write memory cell -- tanh(affine(x_t, h_{t-1}))
i_awt = tf.matmul(input_tensor, x2c) + tf.matmul(i_h_tm1, h2c) + bc
i_awt = (
self._multiply_hidden_weights(input_tensor, 'x2c') +
self._multiply_hidden_weights(i_h_tm1, 'h2c') + bc)
i_wt = tf.tanh(i_awt)
# c_t = f_t \odot c_{t-1} + i_t \odot tanh(affine(x_t, h_{t-1}))
......@@ -1345,8 +1734,11 @@ class LSTMNetwork(NetworkUnitInterface):
tf.multiply(i_it, i_wt), tf.multiply(i_ft, i_c_tm1), name='lstm_c')
# output -- o_t = sigmoid(affine(x_t, h_{t-1}, c_t))
i_aot = tf.matmul(input_tensor, x2o) + tf.matmul(ct, c2o) + tf.matmul(
i_h_tm1, h2o) + bo
# Note peephole connection to current cell state.
i_aot = (
self._multiply_hidden_weights(input_tensor, 'x2o') +
self._multiply_hidden_weights(ct, 'c2o') +
self._multiply_hidden_weights(i_h_tm1, 'h2o') + bo)
i_ot = tf.sigmoid(i_aot)
......@@ -1354,27 +1746,35 @@ class LSTMNetwork(NetworkUnitInterface):
ph_t = tf.tanh(ct)
ht = tf.multiply(i_ot, ph_t, name='lstm_h')
if during_training and self._recurrent_dropout_rate < 1:
ht = tf.nn.dropout(
ht, self._recurrent_dropout_rate, name='lstm_h_dropout')
if during_training:
ht = maybe_apply_dropout(
ht,
self._recurrent_dropout_rate,
self._dropout_per_sequence,
dropout_mask=self._recurrent_dropout_mask,
name='lstm_h_dropout')
h = tf.identity(ht, name='layer_0')
logits = tf.nn.xw_plus_b(ht,
tf.get_variable('weights_softmax'),
tf.get_variable('bias_softmax'))
# tensors will be consistent with the layers:
# [lstm_h, lstm_c, layer_0, (optional) logits]
tensors = [ht, ct, h]
if self._component.spec.attention_component:
logits += self.attention(ht, attention_tensor)
if self._compute_logits:
logits = tf.nn.xw_plus_b(ht, weights_softmax, bias_softmax)
if self._component.spec.attention_component:
logits += self.attention(ht, attention_tensor)
logits = tf.identity(logits, name='logits')
tensors.append(logits)
logits = tf.identity(logits, name='logits')
# tensors will be consistent with the layers:
# [lstm_h, lstm_c, layer_0, logits]
tensors = [ht, ct, h, logits]
return tensors
def get_layer_size(self, layer_name):
assert layer_name == 'layer_0', 'Can only retrieve from first hidden layer.'
assert layer_name in {
'layer_0', 'lstm_h', 'lstm_c'
}, 'Can only retrieve from first hidden layer, lstm_h or lstm_c.'
return self._hidden_layer_sizes
def get_logits(self, network_tensors):
......@@ -1846,10 +2246,9 @@ class PairwiseConvNetwork(NetworkUnitInterface):
self._widths, self._dropout, self._bias_init, self._initialization
])
if not all(param_lengths[0] == param_len for param_len in param_lengths):
raise RuntimeError(
'Unmatched widths/dropout/bias_init/initialization: ' +
'%d/%d/%d/%d' % (param_lengths[0], param_lengths[1],
param_lengths[2], param_lengths[3]))
raise RuntimeError('Unmatched widths/dropout/bias_init/initialization: ' +
'%d/%d/%d/%d' % (param_lengths[0], param_lengths[1],
param_lengths[2], param_lengths[3]))
self._depths.extend(map(int, parameters['depths'].split(',')))
if len(self._depths) != len(self._widths) + 1:
......@@ -1866,9 +2265,8 @@ class PairwiseConvNetwork(NetworkUnitInterface):
self._num_labels = self._depths[-1]
if parameters['activation_layers']:
self._activation_layers = set(map(int,
parameters['activation_layers'].split(
',')))
self._activation_layers = set(
map(int, parameters['activation_layers'].split(',')))
else:
self._activation_layers = set(range(self._num_layers - 1))
......@@ -1876,7 +2274,7 @@ class PairwiseConvNetwork(NetworkUnitInterface):
for i, width in enumerate(self._widths):
if self._activation == 'glu' and i in self._activation_layers:
self._kernel_shapes.append(
[width, width, self._depths[i], 2*self._depths[i + 1]])
[width, width, self._depths[i], 2 * self._depths[i + 1]])
else:
self._kernel_shapes.append(
[width, width, self._depths[i], self._depths[i + 1]])
......@@ -1910,7 +2308,8 @@ class PairwiseConvNetwork(NetworkUnitInterface):
del context_tensor_arrays, attention_tensor # Unused.
# TODO(googleuser): Normalize the arguments to create(). 'stride'
# is unused by the recurrent network units, while 'context_tensor_arrays'
# and 'attenion_tensor_array' is unused by bulk network units. b/33587044
# and 'attenion_tensor_array' is unused by bulk network units.
if stride is None:
raise ValueError("PairwiseConvNetwork needs 'stride'")
......@@ -1926,8 +2325,9 @@ class PairwiseConvNetwork(NetworkUnitInterface):
sources_shape = tf.shape(source_tokens)
targets_shape = tf.shape(target_tokens)
num_steps = sources_shape[1]
with tf.control_dependencies([tf.assert_equal(num_steps, targets_shape[2],
name='num_steps_mismatch')]):
with tf.control_dependencies([
tf.assert_equal(num_steps, targets_shape[2], name='num_steps_mismatch')
]):
arg1 = tf.tile(source_tokens, tf.stack([1, 1, num_steps, 1]))
arg2 = tf.tile(target_tokens, tf.stack([1, num_steps, 1, 1]))
conv = tf.concat([arg1, arg2], 3)
......@@ -1935,10 +2335,10 @@ class PairwiseConvNetwork(NetworkUnitInterface):
with tf.variable_scope('conv%d' % i, reuse=True) as scope:
if during_training:
conv = maybe_apply_dropout(conv, self._dropout[i], False)
conv = tf.nn.conv2d(conv,
self._component.get_variable('weights'),
[1, 1, 1, 1],
padding='SAME')
conv = tf.nn.conv2d(
conv,
self._component.get_variable('weights'), [1, 1, 1, 1],
padding='SAME')
conv = tf.nn.bias_add(conv, self._component.get_variable('biases'))
if i in self._activation_layers:
conv = self._activation_fn(conv, name=scope.name)
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for network_units."""
......@@ -26,8 +25,6 @@ from tensorflow.python.platform import googletest
from dragnn.protos import spec_pb2
from dragnn.python import network_units
FLAGS = tf.app.flags.FLAGS
class NetworkUnitsConverterTest(test_util.TensorFlowTestCase):
......@@ -61,6 +58,7 @@ class MockComponent(object):
self.spec = component_spec
self.name = component_spec.name
self.beam_size = 1
self.num_actions = 45
self._attrs = {}
def attr(self, name):
......@@ -72,12 +70,13 @@ class MockComponent(object):
class MockMaster(object):
def __init__(self):
def __init__(self, build_runtime_graph=False):
self.spec = spec_pb2.MasterSpec()
self.hyperparams = spec_pb2.GridPoint()
self.lookup_component = {
'previous': MockComponent(self, spec_pb2.ComponentSpec())
}
self.build_runtime_graph = build_runtime_graph
class MockNetwork(object):
......@@ -167,6 +166,164 @@ class GetAttrsWithDefaultsTest(test_util.TensorFlowTestCase):
_assert_attr_is_true('TRUE')
class LstmNetworkTest(test_util.TensorFlowTestCase):
test_spec_1 = """
component {
name: 'bi_lstm'
backend { registered_name: 'TestComponent' }
fixed_feature {
name: 'words'
fml: 'words'
size: 1
embedding_dim: 32
vocabulary_size: 1079813,
}
network_unit {
registered_name: 'LSTMNetwork'
parameters {
key: "hidden_layer_sizes"
value: "128"
}
}
}
"""
test_spec_linked = """
component {
name: 'bi_lstm'
backend { registered_name: 'TestComponent' }
fixed_feature {
name: 'words'
fml: 'words'
size: 1
embedding_dim: 32
vocabulary_size: 1079813,
}
linked_feature {
name: 'lstm_h'
fml: 'bias(0)'
embedding_dim: -1
size: 1
source_component: 'bi_lstm'
source_translator: 'history'
source_layer: 'lstm_h'
}
linked_feature {
name: 'lstm_c'
fml: 'bias(0)'
embedding_dim: -1
size: 1
source_component: 'bi_lstm'
source_translator: 'history'
source_layer: 'lstm_c'
}
network_unit {
registered_name: 'LSTMNetwork'
parameters {
key: "hidden_layer_sizes"
value: "128"
}
}
}
"""
def setUp(self):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf.reset_default_graph()
def construct_lstm_network_unit(self, master):
"""Helper to construct a LSTMNetwork. Doesn't call create() yet."""
component = MockComponent(master, master.spec.component[0])
with tf.variable_scope('bi_lstm'):
lstm_network_unit = network_units.LSTMNetwork(component)
return lstm_network_unit
def get_context_tensor_arrays(self, lstm_network_unit):
context_tensor_arrays = []
for context_layer in lstm_network_unit.context_layers:
context_tensor_arrays.append(context_layer.create_array(1))
return context_tensor_arrays
def fixed_word_embeddings(self):
"""Helper for returning fixed embeddings, for 1 word feature."""
words_tensor = tf.constant([[1.0] * 32], dtype=tf.float32)
return [network_units.NamedTensor(words_tensor, 'words')]
def testCanCreate(self):
"""Smoke test that the create() function doesn't raise errors."""
master = MockMaster()
master.spec = spec_pb2.MasterSpec()
text_format.Parse(self.test_spec_1, master.spec)
lstm_network_unit = self.construct_lstm_network_unit(master)
with tf.variable_scope('bi_lstm', reuse=True):
lstm_network_unit.create(
self.fixed_word_embeddings(), [],
self.get_context_tensor_arrays(lstm_network_unit), None, True)
def testCanCreateLinked(self):
"""Smoke test that the create() function doesn't raise errors."""
master = MockMaster()
master.spec = spec_pb2.MasterSpec()
text_format.Parse(self.test_spec_linked, master.spec)
lstm_network_unit = self.construct_lstm_network_unit(master)
with tf.variable_scope('bi_lstm', reuse=True):
lstm_network_unit.create(
self.fixed_word_embeddings(), [],
self.get_context_tensor_arrays(lstm_network_unit), None, True)
def testRuntimeConcatentatedMatrices(self):
"""Test generation of concatenated matrices."""
# TODO(googleuser): Make MockComponent support runtime graph generation.
master = MockMaster(build_runtime_graph=False)
master.spec = spec_pb2.MasterSpec()
text_format.Parse(self.test_spec_1, master.spec)
lstm_network_unit = self.construct_lstm_network_unit(master)
with tf.variable_scope('bi_lstm', reuse=True):
lstm_network_unit.create(
self.fixed_word_embeddings(), [],
self.get_context_tensor_arrays(lstm_network_unit), None, False)
x_to_ico = lstm_network_unit.derived_params[0]()
h_to_ico = lstm_network_unit.derived_params[1]()
ico_bias = lstm_network_unit.derived_params[2]()
# Should be the word dimension (32) to 3x the hidden dimension (128).
self.assertEqual(x_to_ico.shape, (32, 384))
self.assertEqual(x_to_ico.op.name, 'bi_lstm/x_to_ico')
# Should be the hidden dimension (128) to 3x the hidden dimension (128).
self.assertEqual(h_to_ico.shape, (128, 384))
self.assertEqual(h_to_ico.op.name, 'bi_lstm/h_to_ico')
# Should be equal to the hidden dimension (128) times 3.
self.assertEqual(ico_bias.shape, (384,))
self.assertEqual(ico_bias.op.name, 'bi_lstm/ico_bias')
def testRuntimeConcatentatedMatricesLinked(self):
"""Test generation of concatenated matrices."""
# TODO(googleuser): Make MockComponent support runtime graph generation.
master = MockMaster(build_runtime_graph=False)
master.spec = spec_pb2.MasterSpec()
text_format.Parse(self.test_spec_linked, master.spec)
lstm_network_unit = self.construct_lstm_network_unit(master)
with tf.variable_scope('bi_lstm', reuse=True):
lstm_network_unit.create(
self.fixed_word_embeddings(), [],
self.get_context_tensor_arrays(lstm_network_unit), None, False)
x_to_ico = lstm_network_unit.derived_params[0]()
h_to_ico = lstm_network_unit.derived_params[1]()
ico_bias = lstm_network_unit.derived_params[2]()
# Should be the word dimension (32) to 3x the hidden dimension (128).
self.assertEqual(x_to_ico.shape, (32, 384))
# Should be the hidden dimension (128) to 3x the hidden dimension (128).
self.assertEqual(h_to_ico.shape, (128, 384))
# Should be equal to the hidden dimension (128) times 3.
self.assertEqual(ico_bias.shape, (384,))
class GatherNetworkTest(test_util.TensorFlowTestCase):
def setUp(self):
......@@ -214,12 +371,30 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
network = network_units.GatherNetwork(self._component)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices = tf.constant([[1], [2], [0], # item 1
[-1], [0], [-1]], # item 2
dtype=tf.int64)
features = tf.constant([[1.0, 1.5], [2.0, 2.5], [3.0, 3.5], # item 1
[4.0, 4.5], [5.0, 5.5], [6.0, 6.5]], # item 2
dtype=tf.float32)
indices = tf.constant(
[
# item 1
[1],
[2],
[0],
# item 2
[-1],
[0],
[-1]
],
dtype=tf.int64)
features = tf.constant(
[
# item 1
[1.0, 1.5],
[2.0, 2.5],
[3.0, 3.5],
# item 2
[4.0, 4.5],
[5.0, 5.5],
[6.0, 6.5]
],
dtype=tf.float32)
fixed_embeddings = []
linked_embeddings = [
......@@ -233,13 +408,16 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
gathered = outputs[0]
# Zeros will be substituted for index -1.
self.assertAllEqual(gathered.eval(),
[[2.0, 2.5], # gathered from 1
[3.0, 3.5], # gathered from 2
[1.0, 1.5], # gathered from 0
[0.0, 0.0], # gathered from -1
[4.0, 4.5], # gathered from 0
[0.0, 0.0]]) # gathered from -1
self.assertAllEqual(
gathered.eval(),
[
[2.0, 2.5], # gathered from 1
[3.0, 3.5], # gathered from 2
[1.0, 1.5], # gathered from 0
[0.0, 0.0], # gathered from -1
[4.0, 4.5], # gathered from 0
[0.0, 0.0] # gathered from -1
])
def testTrainablePadding(self):
self._component.spec.network_unit.parameters['trainable_padding'] = 'true'
......@@ -248,12 +426,30 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
network = network_units.GatherNetwork(self._component)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices = tf.constant([[1], [2], [0], # item 1
[-1], [0], [-1]], # item 2
dtype=tf.int64)
features = tf.constant([[1.0, 1.5], [2.0, 2.5], [3.0, 3.5], # item 1
[4.0, 4.5], [5.0, 5.5], [6.0, 6.5]], # item 2
dtype=tf.float32)
indices = tf.constant(
[
# item 1
[1],
[2],
[0],
# item 2
[-1],
[0],
[-1]
],
dtype=tf.int64)
features = tf.constant(
[
# item 1
[1.0, 1.5],
[2.0, 2.5],
[3.0, 3.5],
# item 2
[4.0, 4.5],
[5.0, 5.5],
[6.0, 6.5]
],
dtype=tf.float32)
fixed_embeddings = []
linked_embeddings = [
......@@ -299,8 +495,8 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
"""
with tf.Graph().as_default(), self.test_session() as session:
np.random.seed(4)
tensor = network_units.add_var_initialized('tensor', shape, 'identity',
divisor=divisor, stddev=std)
tensor = network_units.add_var_initialized(
'tensor', shape, 'identity', divisor=divisor, stddev=std)
session.run(tf.global_variables_initializer())
actual = session.run(tensor)
self.assertAllClose(actual, expected, 1e-8, 1e-8)
......@@ -345,13 +541,13 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
divisor = 3.
std = 1e-3
shape = (6, 3)
m = divisor/shape[-1]
expected = [[m, 4.99951362e-04, -9.95908980e-04],
[m, -4.18301526e-04, -1.58457726e-03],
[-6.47706795e-04, m, 3.32250027e-04],
[-1.14747661e-03, m, -8.79869258e-05],
[4.25072387e-04, 3.32253141e-04, m],
[3.50997143e-04, -6.06887275e-04, m]]
m = divisor / shape[-1]
expected = [[m, 4.99951362e-04,
-9.95908980e-04], [m, -4.18301526e-04, -1.58457726e-03],
[-6.47706795e-04, m,
3.32250027e-04], [-1.14747661e-03, m, -8.79869258e-05],
[4.25072387e-04, 3.32253141e-04,
m], [3.50997143e-04, -6.06887275e-04, m]]
self.IdentityInitializerHelper(shape, expected, divisor, std)
def testIdentityInitializerNonSquareRank2FirstDimSmaller(self):
......@@ -368,14 +564,14 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
std = 1e-3
shape = (2, 2, 6)
m = divisor / shape[-1]
expected = [[[5.05617063e-05, 4.99951362e-04, -9.95908980e-04,
6.93598529e-04, -4.18301526e-04, -1.58457726e-03],
[-6.47706795e-04, 5.98575163e-04, 3.32250027e-04,
-1.14747661e-03, 6.18669670e-04, -8.79869258e-05]],
[[m, m, m,
3.50997143e-04, -6.06887275e-04, 1.54697930e-03],
[7.23341596e-04, 4.61355667e-05, -9.82991653e-04,
m, m, m]]]
expected = [[[
5.05617063e-05, 4.99951362e-04, -9.95908980e-04, 6.93598529e-04,
-4.18301526e-04, -1.58457726e-03
], [
-6.47706795e-04, 5.98575163e-04, 3.32250027e-04, -1.14747661e-03,
6.18669670e-04, -8.79869258e-05
]], [[m, m, m, 3.50997143e-04, -6.06887275e-04, 1.54697930e-03],
[7.23341596e-04, 4.61355667e-05, -9.82991653e-04, m, m, m]]]
self.IdentityInitializerHelper(shape, expected, divisor, std)
def testIdentityInitializerNonSquareRank4(self):
......@@ -383,40 +579,110 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
std = 1e-3
shape = (2, 3, 2, 8)
m = divisor / float(shape[-1])
expected = [
[[[5.05617063e-05, 4.99951362e-04, -9.95908980e-04, 6.93598529e-04,
-4.18301526e-04, -1.58457726e-03, -6.47706795e-04, 5.98575163e-04],
[3.32250027e-04, -1.14747661e-03, 6.18669670e-04, -8.79869258e-05,
4.25072387e-04, 3.32253141e-04, -1.15681626e-03, 3.50997143e-04]],
[[-6.06887275e-04, 1.54697930e-03, 7.23341596e-04, 4.61355667e-05,
-9.82991653e-04, 5.44327377e-05, 1.59892938e-04, -1.20894820e-03],
[2.22336012e-03, 3.94295203e-04, 1.69235771e-03, -1.11281220e-03,
1.63574750e-03, -1.36096554e-03, -6.51225855e-04, 5.42451337e-04]],
[[4.80062481e-05, -2.35807360e-03, -1.10558409e-03, 8.37836356e-04,
2.08787085e-03, 9.14840959e-04, -2.76203355e-04, 7.96511886e-04],
[-1.14379858e-03, 5.09919773e-04, -1.34746032e-03, -9.36010019e-06,
-1.30704633e-04, 8.02086608e-04, -3.02963977e-04, 1.20200263e-03]]],
[[[-1.96745284e-04, 8.36528721e-04, 7.86602264e-04, -1.84087583e-03,
3.75474883e-05, 3.59280530e-05, -7.78739923e-04, 1.79410708e-04],
[-1.45553437e-03, 5.56185201e-04, 5.09778853e-04, 3.00445536e-04,
2.47658417e-03, 3.52343399e-04, 6.74710027e-05, -7.32264714e-04]],
[[m, m, m, m,
1.58469542e-04, 1.99008291e-03, 1.16418756e-03, 2.42660157e-04],
[1.37992005e-03, -5.45587063e-05, 7.95233937e-04, 1.90899627e-05,
m, m, m, m]],
[[-1.09712186e-03, -5.28196048e-04, -2.37977528e-03, -6.07683673e-04,
-1.07529014e-03, 2.02240516e-03, -5.64875314e-04, -1.54292909e-03],
[8.70841788e-04, -1.75210531e-04, 4.86030076e-05, 1.88646198e-04,
2.09313483e-04, -3.74444906e-04, 9.54698597e-04, 5.23247640e-04]]]
]
expected = [[[[
5.05617063e-05, 4.99951362e-04, -9.95908980e-04, 6.93598529e-04,
-4.18301526e-04, -1.58457726e-03, -6.47706795e-04, 5.98575163e-04
], [
3.32250027e-04, -1.14747661e-03, 6.18669670e-04, -8.79869258e-05,
4.25072387e-04, 3.32253141e-04, -1.15681626e-03, 3.50997143e-04
]], [[
-6.06887275e-04, 1.54697930e-03, 7.23341596e-04, 4.61355667e-05,
-9.82991653e-04, 5.44327377e-05, 1.59892938e-04, -1.20894820e-03
], [
2.22336012e-03, 3.94295203e-04, 1.69235771e-03, -1.11281220e-03,
1.63574750e-03, -1.36096554e-03, -6.51225855e-04, 5.42451337e-04
]], [[
4.80062481e-05, -2.35807360e-03, -1.10558409e-03, 8.37836356e-04,
2.08787085e-03, 9.14840959e-04, -2.76203355e-04, 7.96511886e-04
], [
-1.14379858e-03, 5.09919773e-04, -1.34746032e-03, -9.36010019e-06,
-1.30704633e-04, 8.02086608e-04, -3.02963977e-04, 1.20200263e-03
]]], [[[
-1.96745284e-04, 8.36528721e-04, 7.86602264e-04, -1.84087583e-03,
3.75474883e-05, 3.59280530e-05, -7.78739923e-04, 1.79410708e-04
], [
-1.45553437e-03, 5.56185201e-04, 5.09778853e-04, 3.00445536e-04,
2.47658417e-03, 3.52343399e-04, 6.74710027e-05, -7.32264714e-04
]], [[
m, m, m, m, 1.58469542e-04, 1.99008291e-03, 1.16418756e-03,
2.42660157e-04
], [
1.37992005e-03, -5.45587063e-05, 7.95233937e-04, 1.90899627e-05, m, m,
m, m
]], [[
-1.09712186e-03, -5.28196048e-04, -2.37977528e-03, -6.07683673e-04,
-1.07529014e-03, 2.02240516e-03, -5.64875314e-04, -1.54292909e-03
], [
8.70841788e-04, -1.75210531e-04, 4.86030076e-05, 1.88646198e-04,
2.09313483e-04, -3.74444906e-04, 9.54698597e-04, 5.23247640e-04
]]]]
self.IdentityInitializerHelper(shape, expected, divisor, std)
class FeatureIdDropoutTest(test_util.TensorFlowTestCase):
def setUp(self):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf.reset_default_graph()
def testApplyFeatureIdDropout(self):
channel = spec_pb2.FixedFeatureChannel()
text_format.Parse("""
vocabulary_size: 10
dropout_id: 8
dropout_keep_probability: [0.0, 0.25, 0.5, 0.75, 1.0]
""", channel)
with tf.Graph().as_default(), self.test_session():
with tf.variable_scope('test_scope'):
ids = tf.constant([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=tf.int64)
weights = tf.constant([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=tf.float32)
tensors = network_units.apply_feature_id_dropout(ids, weights, channel)
perturbed_ids = tensors[0].eval()
tf.logging.info('perturbed_ids = %s', perturbed_ids)
# Given the dropout_keep_probability values specified above:
# * ID 0 is never kept.
# * IDs 1-3 are randomly kept with varying probability.
# * IDs 4-9 are always kept.
# To avoid non-determinism, we only check for specific feature IDs at
# the extremes (never/always kept). Behavior in between the extremes
# should interpolate between the two extremes.
self.assertEqual(perturbed_ids[0], channel.dropout_id)
self.assertTrue(perturbed_ids[1] in (1, channel.dropout_id))
self.assertTrue(perturbed_ids[2] in (2, channel.dropout_id))
self.assertTrue(perturbed_ids[3] in (3, channel.dropout_id))
self.assertAllEqual(perturbed_ids[4:], [4, 5, 6, 7, 8, 9])
def testApplyFeatureIdDropoutSkip(self):
channel = spec_pb2.FixedFeatureChannel()
text_format.Parse("""
vocabulary_size: 2
dropout_id: 2
dropout_keep_probability: [0.0, 1.0]
""", channel)
with tf.Graph().as_default(), self.test_session():
with tf.variable_scope('test_scope'):
ids = tf.constant([0, 1], dtype=tf.int64)
weights = tf.constant([1, 1], dtype=tf.float32)
tensors = network_units.apply_feature_id_dropout(ids, weights, channel)
perturbed_ids, perturbed_weights = tensors[0].eval(), tensors[1].eval()
tf.logging.info('perturbed_ids = %s', perturbed_ids)
tf.logging.info('perturbed_weights = %s', perturbed_weights)
# Given the dropout_keep_probability values specified above:
# * ID 0 is never kept, its weight is set to 0.
# * IDs 1 are always kept.
# To avoid non-determinism, we only check for specific feature IDs at
# the extremes (never/always kept).
self.assertEqual(perturbed_ids[0], channel.dropout_id)
self.assertEqual(perturbed_weights[0], 0)
self.assertEqual(perturbed_ids[1], 1)
self.assertEqual(perturbed_weights[1], 1)
if __name__ == '__main__':
googletest.main()
component {
name: "convnet"
transition_system {
registered_name: "shift-only"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "words-embedding-input"
part {
file_pattern: "/readahead/512M/cns/lg-d/home/saft/corpora/word-embeddings/en/word2vec/1billion/word2vec-embedding-bi-true-32.sst"
file_format: "sstable"
record_format: "dist_belief.TokenEmbedding"
}
}
resource {
name: "words-vocab-input"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/vocab"
file_format: "text"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
fixed_feature {
name: "char_ngram"
fml: "input.token.lexifuse-char-ngram"
embedding_dim: 16
vocabulary_size: 16500
size: 1
predicate_map: "hashed"
}
fixed_feature {
name: "words"
fml: "input.word"
embedding_dim: 32
vocabulary_size: 39395
size: 1
predicate_map: "hashed"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 1
attention_component: ""
component_builder {
registered_name: "components.common.dragnn.python.conv_component.ConvComponentBuilder"
parameters {
key: "depths"
value: "48,128"
}
parameters {
key: "output_dims"
value: "45"
}
parameters {
key: "widths"
value: "7"
}
}
training_beam_size: 1
inference_beam_size: 1
}
component {
name: "tagger"
transition_system {
registered_name: "tagger"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "tag-map"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexicon/tag-map"
file_format: "text"
record_format: ""
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
linked_feature {
name: "convnet"
fml: "input.focus"
embedding_dim: -1
size: 1
source_component: "convnet"
source_translator: "identity"
source_layer: "conv0_logits"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 45
attention_component: ""
component_builder {
registered_name: "bulk_component.BulkAnnotatorComponentBuilder"
}
training_beam_size: 1
inference_beam_size: 1
}
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for supporting the DRAGNN runtime from the TF side."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import re
import tensorflow as tf
from dragnn.python import network_units
from syntaxnet.util import check
def add_hooks(component, cell_subgraph_spec):
"""Adds "hook" nodes to the graph, for use by the runtime.
The runtime hook nodes are not on the path to any required output, and will
not be called when running TF-based DRAGNN. As long as the TF graph is not
pruned, however, the DRAGNN runtime can call them.
Runtime hook nodes can perform any TF computation. Possible uses include:
* Applying stable names to existing tensors (e.g., via tf.identity()).
* Converting variable data from a TF-friendly or training-friendly format
into a runtime-friendly format.
NB: There are several restrictions on the context in which this function is
called. In brief, call ComponentBuilderBase._add_runtime_hooks() at the top
of each ComponentBuilderSubclass.build_*() method. In detail, this:
* Must be called in the variable scope of the |component|, so variable
references in component.get_variable() work.
* Must be called, possibly transitively, from one of the |component|'s
build_*() methods, so MasterBuilder.read_from_avg is set properly for
component.get_variable().
* Must not be called from within a tf.while_loop(), or the hook nodes will
not work. In particular, NetworkUnitInterface.create() is called from a
tf.while_loop() in DynamicComponentBuilder.
Args:
component: Component for which to add hooks.
cell_subgraph_spec: CellSubgraphSpec for which to add hooks.
"""
for channel_id, feature_spec in enumerate(component.spec.linked_feature):
if feature_spec.embedding_dim != -1:
_add_hooks_for_linked_embedding_matrix(component, channel_id)
for channel_id, feature_spec in enumerate(component.spec.fixed_feature):
if feature_spec.embedding_dim != -1:
_add_hooks_for_fixed_embedding_matrix(component, channel_id)
for params in component.network.params:
_add_hooks_for_trainable_params(component, params)
for parameter_getter in component.network.derived_params:
_add_hooks_for_derived_parameter(parameter_getter)
_add_hook_node(
tf.constant(cell_subgraph_spec.SerializeToString(), tf.string),
'{}/EXPORT/CellSubgraphSpec'.format(component.name))
def _blocked_and_dtype_transformations(tensor):
"""Yields variants of a tensor, for standard blocking/dtype variants.
Args:
tensor (tf.Tensor): Input tensor.
Yields:
(modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
version of the input, and `suffix` is a string like "/blocked32".
"""
for blocking_level in (32, 48):
blocked = make_padded_blocked_matrix(tensor, blocking_level)
bfloat16_blocked = tf.to_bfloat16(bfloat16_permutation(blocked))
yield blocked, '/blocked{}'.format(blocking_level)
yield bfloat16_blocked, '/blocked{}/bfloat16'.format(blocking_level)
def _add_hooks_for_linked_embedding_matrix(component, channel_id):
"""Adds runtime hooks for a linked embedding matrix.
The computation performed by network_units.pass_through_embedding_matrix() is
equivalent to the following:
for i in range(stride):
if step_idx[i] == -1:
outputs[i,:] = out_of_bounds_vector
else:
outputs[i,:] = tf.matmul(act_block[i,:], weight_matrix)
The implementation uses clever arithmetic to do this in one matmul per batch.
Specifically, the weight_matrix is extended with the out_of_bounds_vector and
each activation vector is extended with a 0/1 out-of-bounds indicator. Then,
multiplying the two suffices, assuming that act_block[i,:] is set to zero for
out-of-bounds links.
While this works well for training and high-throughput batched computation, it
isn't the best for the runtime:
* Appending a 0/1 indicator to the input activation vector requires a copy.
Ideally, we could use the input activation vector by reference alone.
* In order to access to the |out_of_bounds_vector| as a contiguous array,
the runtime must load the linked embedding matrix in row-major format,
which may not be the fastest format for arithmetic.
* The dimensions of the extended-by-1 matrix and vector are likely to be
pessimal. Most dimensions are specified as 2^n, and adding one element
produces maximal padding on the trailing elements, which in turn wastes
memory, reduces cache utilization, etc.
Therefore, in the runtime we split the linked embedding matrix into a separate
weight matrix and out-of-bounds vector.
Args:
component: Component for which to add hooks.
channel_id: Linked embedding channel for which to add hooks.
"""
var_name = network_units.linked_embeddings_name(channel_id)
extended_matrix = component.get_variable(var_name)
extended_num_rows = tf.shape(extended_matrix)[0]
matrix, vector = tf.split(extended_matrix, [extended_num_rows - 1, 1], 0)
transposed = tf.transpose(matrix)
hook_name = functools.partial(_get_hook_name, component, var_name)
_add_hook_node(matrix, hook_name('/weights'))
_add_hook_node(transposed, hook_name('/weights/transposed'))
# Add blocked versions of the matrix and its transpose.
for blocked, blocked_suffix in _blocked_and_dtype_transformations(matrix):
blocked_name = hook_name('/weights/matrix' + blocked_suffix)
_add_hook_node(blocked, blocked_name)
for blocked, blocked_suffix in _blocked_and_dtype_transformations(transposed):
blocked_name = hook_name('/weights/transposed' + blocked_suffix)
_add_hook_node(blocked, blocked_name)
# Add shape and out-of-bounds information.
_add_hook_node(tf.shape(transposed), hook_name('/weights/transposed/shape'))
_add_hook_node(vector, _get_hook_name(component, var_name, '/out_of_bounds'))
def _add_hooks_for_fixed_embedding_matrix(component, channel_id):
"""Adds runtime hooks for a fixed embedding matrix.
The hooks remove the last row from the embedding matrix. The extra row was
probably intended for out-of-vocabulary items, but those are handled in the
feature system and the extra row is never used.
Args:
component: Component for which to add hooks.
channel_id: Fixed embedding channel for which to add hooks.
"""
var_name = network_units.fixed_embeddings_name(channel_id)
extended_matrix = component.get_variable(var_name)
extended_num_rows = tf.shape(extended_matrix)[0]
matrix = tf.slice(extended_matrix, [0, 0], [extended_num_rows - 1, -1])
# TODO(googleuser): If the extra row is removed from the variable itself, remove
# the tf.slice() and point the hook directly at the variable.
_add_hook_node(matrix, _get_hook_name(component, var_name, '/trimmed'))
def _add_hooks_for_derived_parameter(getter):
"""Adds hooks for derived parameters.
Derived parameters are typically slight format modifications of regular
parameters, exposed because doing the computation in Python is more convenient
than as VariableStore wrappers.
Args:
getter: Function which, when called, will return the derived tensor.
"""
parameter = getter()
full_name = parameter.op.name
def _hook_name(base_name):
"""Returns a hook node name constructed from a base name."""
return full_name + base_name
if parameter.shape.ndims != 2:
tf.logging.info('Not adding matrix hooks for derived parameter %s',
full_name)
return
_add_hook_node(tf.transpose(parameter), _hook_name('/transposed'))
for blocked, blocked_suffix in _blocked_and_dtype_transformations(parameter):
_add_hook_node(blocked, _hook_name('/matrix' + blocked_suffix))
def _add_hooks_for_trainable_params(component, params):
"""Adds runtime hooks for a variable of trainable parameters.
Ignores parameters that are not statically-deducible as matrices.
Args:
component: Component for which to add hooks.
params: Variable for which to add hooks.
"""
full_name = params.op.name
matrix = component.get_variable(var_params=params)
# Only add hooks for tensors that are statically-deducible as matrices.
if params.shape.ndims != 2:
tf.logging.info('Not adding hooks for trainable params %s', full_name)
return
# Infer the suffix to append to variable names, if any, based on whether the
# possibly-averaged |matrix| is named differently than the |params|.
suffix = re.sub('^' + re.escape(full_name), '', matrix.op.name)
check.Ne(suffix, matrix.op.name,
'Failed to find suffix for params %s' % full_name)
def _hook_name(base_name):
"""Returns a hook node name constructed from a base name."""
return full_name + base_name + suffix
# Add the matrix and its transpose.
transposed = tf.transpose(matrix)
_add_hook_node(matrix, _hook_name('/matrix'))
_add_hook_node(transposed, _hook_name('/transposed'))
# Add blocked versions of the matrix and its transpose.
for blocked, blocked_suffix in _blocked_and_dtype_transformations(matrix):
_add_hook_node(blocked, _hook_name('/matrix' + blocked_suffix))
for blocked, blocked_suffix in _blocked_and_dtype_transformations(transposed):
_add_hook_node(blocked, _hook_name('/transposed' + blocked_suffix))
# Also add hooks for the original shapes, which are obscured by padding.
_add_hook_node(tf.shape(matrix), _hook_name('/matrix/shape'))
_add_hook_node(tf.shape(transposed), _hook_name('/transposed/shape'))
def make_padded_blocked_matrix(matrix, block_size):
"""Converts a matrix to padded column-blocked format.
For example, given a [64,127] matrix and block_size=16, this function returns
an [8,64,16] tensor where the 8 inner sub-matrices, when concatenated left to
right, re-constitute the original matrix. Note that the 8th sub-matrix has a
final column of padding.
Args:
matrix: The matrix to convert.
block_size: The number of columns per block.
Returns:
Padded column-blocked matrix.
"""
shape = tf.shape(matrix)
num_rows = shape[0]
num_columns = shape[1]
# Compute the amount of padding and resulting number of blocks.
last_block_size = num_columns % block_size
padding_size = (block_size - last_block_size) % block_size
num_blocks = (num_columns + padding_size) // block_size
# Somehow the obvious approach based on tf.split() and tf.stack() doesn't work
# (seems that the number of splits needs to be statically-known), but this
# alternative based on tf.transpose() and tf.reshape() does. Continuing the
# example from the docstring...
padded = tf.pad(matrix, [[0, 0], [0, padding_size]]) # [64,127] => [64,128]
transposed = tf.transpose(padded) # => [128,64]
blocked = tf.reshape(transposed, [num_blocks, block_size,
num_rows]) # => [8,16,64]
return tf.transpose(blocked, [0, 2, 1]) # => [8,64,16]
def bfloat16_permutation(tensor):
"""Permutes values in the last dimension of a tensor.
This permutation is used so that we can directly use unpacklo/unpackhi AVX2
instructions on the matrix coefficients. These unpacking instructions
effectively permute the data. See FastUnpackPermutation() and
AvxFloatVecArray::Load(const TruncatedFloat16 *) in avx_vector_array.h for
more details.
Args:
tensor: Blocked matrix, the result of make_padded_blocked_matrix(). Must
have its last dimension a multiple of 16.
Returns:
Permuted matrix, suitable for calling tf.to_bfloat16() on. For testing
convenience we don't do so in this method.
Raises:
ValueError: If the matrix's block dimension is not a multiple of 16.
"""
orig_shape = tensor.shape
if tensor.shape[-1] % 16 != 0:
raise ValueError('Bad block dimension, must be divisible by 16')
permutation = [0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15]
indices = tf.constant(
[16 * (i // 16) + permutation[i % 16] for i in xrange(orig_shape[-1])])
return tf.gather(tensor, indices, axis=len(orig_shape) - 1)
def _get_hook_name(component, variable_name, suffix):
"""Builds the name of a hook node.
Specifically, the name of the hook node is:
<component.name>/<variable_name><suffix><remainder>
where <remainder> is whatever follows <variable_name> in the name of the op
that produces the named variable. Recall that component.get_variable() may
return either the original variable or its moving average. These might have
names like:
foo_component/bar_variable
foo_component/bar_variable/ExponentialMovingAverage
In the examples above, the <remainder> is "" for the original variable and
"/ExponentialMovingAverage" for its moving average. Calling this function
with suffix="/baz_suffix" in either case would add hook nodes named:
foo_component/bar_variable/baz_suffix
foo_component/bar_variable/baz_suffix/ExponentialMovingAverage
Note that the suffix is inserted after the variable name, not necessarily at
the end of the entire op name.
Args:
component: Component that the hook node belongs to.
variable_name: Variable that the hook node name is based on.
suffix: Suffix to append to the variable name.
Returns:
Name of the hook node.
"""
variable = component.get_variable(variable_name)
full_name = variable.op.name
prefix = component.name + '/' + variable_name
hook_name = re.sub('^' + re.escape(prefix), prefix + suffix, full_name)
# If re.sub() did not match anything, it returns the unmodified input (i.e.,
# |full_name|). Enforce that some change was made.
check.Ne(
full_name, hook_name,
'Failed to match expected variable prefix "{}" in variable "{}"'.format(
prefix, full_name))
return hook_name
def _add_hook_node(tensor, fully_qualified_name):
"""Adds a hook node that outputs a tensor with a fully-qualified name."""
# Since the name is fully-qualified, insert the hook node into the top-level
# name scope.
with tf.name_scope(None):
tf.identity(tensor, name=fully_qualified_name)
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the runtime support utils."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from dragnn.protos import export_pb2
from dragnn.protos import spec_pb2
from dragnn.python import network_units
from dragnn.python import runtime_support
class MockNetwork(object):
"""Mock for tests."""
def __init__(self):
self.params = [
tf.get_variable('rank2', [64, 127], tf.float32),
tf.get_variable('rank3', [64, 127, 250], tf.float32)
]
self.derived_params = [
self._fake_derived_vector, self._fake_derived_parameter
]
def _fake_derived_vector(self):
value = tf.constant([1, 2, 3], dtype=tf.float32)
with tf.name_scope(None):
return tf.identity(value, name='derived/vector')
def _fake_derived_parameter(self):
# Use absolute scoping to put the derived parameter in the same namespace.
base_name = self.params[0].op.name.rsplit('/', 1)[0]
with tf.name_scope(None):
return tf.concat(
[self.params[0], self.params[0]],
axis=0,
name='{}/derived'.format(base_name))
class MockComponent(object):
"""Mock for tests."""
def __init__(self):
self.name = 'test_component'
self.spec = spec_pb2.ComponentSpec()
with tf.variable_scope(self.name):
self.network = MockNetwork()
def get_variable(self, var_name=None, var_params=None):
if var_name:
return tf.get_variable(var_name)
else:
return var_params
class RuntimeSupportTest(tf.test.TestCase):
"""Testing rig."""
def testAddLinkedHooks(self):
component = MockComponent()
link0 = component.spec.linked_feature.add()
link1 = component.spec.linked_feature.add()
link0.embedding_dim = -1 # direct link
link1.embedding_dim = 32 # transformed link
link0_matrix_name = network_units.linked_embeddings_name(0)
link1_matrix_name = network_units.linked_embeddings_name(1)
with self.test_session() as session:
graph = session.graph
# Create linked embedding matrices. Only channel 1 uses one.
with tf.variable_scope(component.name):
tf.get_variable(link1_matrix_name, shape=[64 + 1, 32], dtype=tf.float32)
# Add hooks. This should ignore channel 0 and add hooks for channel 1.
with tf.variable_scope(component.name, reuse=True):
runtime_support.add_hooks(component, export_pb2.CellSubgraphSpec())
# Check that no hooks were added for channel 0.
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/weights:0'.format(component.name, link0_matrix_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name('{}/{}/weights/transposed:0'.format(
component.name, link0_matrix_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name('{}/{}/weights/transposed/shape:0'.format(
component.name, link0_matrix_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name('{}/{}/weights/transposed/blocked32:0'.format(
component.name, link0_matrix_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name('{}/{}/weights/transposed/blocked48:0'.format(
component.name, link0_matrix_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/out_of_bounds:0'.format(component.name, link0_matrix_name))
# Get the hooks added for channel 1.
weights = graph.get_tensor_by_name(
'{}/{}/weights:0'.format(component.name, link1_matrix_name))
transposed = graph.get_tensor_by_name('{}/{}/weights/transposed:0'.format(
component.name, link1_matrix_name))
transposed_shape = graph.get_tensor_by_name(
'{}/{}/weights/transposed/shape:0'.format(component.name,
link1_matrix_name))
transposed32 = graph.get_tensor_by_name(
'{}/{}/weights/transposed/blocked32:0'.format(component.name,
link1_matrix_name))
transposed48 = graph.get_tensor_by_name(
'{}/{}/weights/transposed/blocked48:0'.format(component.name,
link1_matrix_name))
out_of_bounds = graph.get_tensor_by_name(
'{}/{}/out_of_bounds:0'.format(component.name, link1_matrix_name))
# Check dimensions of the hooks.
tf.global_variables_initializer().run()
self.assertAllEqual(tf.shape(weights).eval(), [64, 32])
self.assertAllEqual(tf.shape(transposed).eval(), [32, 64])
self.assertAllEqual(transposed_shape.eval(), [32, 64])
self.assertAllEqual(tf.shape(transposed32).eval(), [2, 32, 32])
self.assertAllEqual(tf.shape(transposed48).eval(), [2, 32, 48])
self.assertAllEqual(tf.shape(out_of_bounds).eval(), [1, 32])
def testAddFixedHooks(self):
component = MockComponent()
fixed0 = component.spec.fixed_feature.add()
fixed1 = component.spec.fixed_feature.add()
fixed0.embedding_dim = -1
fixed1.embedding_dim = 32
fixed0.vocabulary_size = 100
fixed1.vocabulary_size = 1000
fixed0_matrix_name = network_units.fixed_embeddings_name(0)
fixed1_matrix_name = network_units.fixed_embeddings_name(1)
with self.test_session() as session:
graph = session.graph
# Create fixed embedding matrices. Only channel 1 uses one.
with tf.variable_scope(component.name):
tf.get_variable(
fixed1_matrix_name, shape=[1000 + 1, 32], dtype=tf.float32)
# Add hooks. This should ignore channel 0 and add hooks for channel 1.
with tf.variable_scope(component.name, reuse=True):
runtime_support.add_hooks(component, export_pb2.CellSubgraphSpec())
# Check that no hooks were added for channel 0.
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/trimmed:0'.format(component.name, fixed0_matrix_name))
# Get the hooks added for channel 1.
trimmed = graph.get_tensor_by_name(
'{}/{}/trimmed:0'.format(component.name, fixed1_matrix_name))
# Check dimensions of the hooks.
tf.global_variables_initializer().run()
self.assertAllEqual(tf.shape(trimmed).eval(), [1000, 32])
def testAddParamsHooks(self):
component = MockComponent()
rank2_name = 'rank2'
rank3_name = 'rank3'
with self.test_session() as session:
graph = session.graph
# Add hooks. This should add hooks for all rank-2 params.
with tf.variable_scope(component.name, reuse=True):
runtime_support.add_hooks(component, export_pb2.CellSubgraphSpec())
# Check that no hooks were added for the rank-3 params.
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/matrix:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/transposed:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/matrix/blocked32:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/matrix/blocked48:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/transposed/blocked32:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/transposed/blocked48:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/matrix/shape:0'.format(component.name, rank3_name))
with self.assertRaises(KeyError):
graph.get_tensor_by_name(
'{}/{}/transposed/shape:0'.format(component.name, rank3_name))
# Get the hooks added for each variable.
matrix = graph.get_tensor_by_name(
'{}/{}/matrix:0'.format(component.name, rank2_name))
transposed = graph.get_tensor_by_name(
'{}/{}/transposed:0'.format(component.name, rank2_name))
matrix32 = graph.get_tensor_by_name(
'{}/{}/matrix/blocked32:0'.format(component.name, rank2_name))
matrix48 = graph.get_tensor_by_name(
'{}/{}/matrix/blocked48:0'.format(component.name, rank2_name))
transposed32 = graph.get_tensor_by_name(
'{}/{}/transposed/blocked32:0'.format(component.name, rank2_name))
transposed48 = graph.get_tensor_by_name(
'{}/{}/transposed/blocked48:0'.format(component.name, rank2_name))
matrix_shape = graph.get_tensor_by_name(
'{}/{}/matrix/shape:0'.format(component.name, rank2_name))
transposed_shape = graph.get_tensor_by_name(
'{}/{}/transposed/shape:0'.format(component.name, rank2_name))
# Check dimensions of the hooks.
tf.global_variables_initializer().run()
self.assertAllEqual(tf.shape(matrix).eval(), [64, 127])
self.assertAllEqual(tf.shape(transposed).eval(), [127, 64])
self.assertAllEqual(matrix_shape.eval(), [64, 127])
self.assertAllEqual(transposed_shape.eval(), [127, 64])
self.assertAllEqual(tf.shape(matrix32).eval(), [4, 64, 32])
self.assertAllEqual(tf.shape(matrix48).eval(), [3, 64, 48])
self.assertAllEqual(tf.shape(transposed32).eval(), [2, 127, 32])
self.assertAllEqual(tf.shape(transposed48).eval(), [2, 127, 48])
def testAddDerivedParamHooks(self):
component = MockComponent()
derived_name = 'derived'
with self.test_session() as session:
graph = session.graph
# Add hooks.
with tf.variable_scope(component.name, reuse=True):
runtime_support.add_hooks(component, export_pb2.CellSubgraphSpec())
session.run(tf.global_variables_initializer())
# Get hooks for the derived vector.
vector = graph.get_tensor_by_name('derived/vector:0')
self.assertEqual(vector.shape, (3,))
# Get the hooks for the derived variable.
matrix = graph.get_tensor_by_name(
'{}/{}/matrix/blocked32:0'.format(component.name, derived_name))
self.assertAllEqual(tf.shape(matrix).eval(), [4, 128, 32])
# Check the bfloat16 version. It should have the same shape.
bfloat16_matrix = graph.get_tensor_by_name(
'{}/{}/matrix/blocked32/bfloat16:0'.format(component.name,
derived_name))
self.assertAllEqual(tf.shape(bfloat16_matrix).eval(), [4, 128, 32])
def testMakePaddedBlockedMatrix(self):
with self.test_session():
matrix = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15],
[16, 17, 18, 19, 20]]
expected_blocked = [[[1, 2], [6, 7], [11, 12],
[16, 17]], [[3, 4], [8, 9], [13, 14], [18, 19]],
[[5, 0], [10, 0], [15, 0], [20, 0]]]
matrix = tf.constant(matrix, tf.float32)
actual_blocked = runtime_support.make_padded_blocked_matrix(matrix, 2)
self.assertAllEqual(actual_blocked.eval(), expected_blocked)
def testBfloat16Permutation(self):
with self.test_session():
matrix = [list(range(16))]
expected_permuted = [[
0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
]]
matrix = tf.constant(matrix, tf.float32)
actual_permuted = runtime_support.bfloat16_permutation(matrix)
self.assertAllEqual(actual_permuted.eval(), expected_permuted)
def testLargerBfloat16Permutation(self):
with self.test_session() as session:
matrix = tf.random_uniform((3, 4, 32))
permuted = runtime_support.bfloat16_permutation(matrix)
matrix, actual_permuted = session.run([matrix, permuted])
# Just check a few items for now, hopefully that's sufficient to ensure
# the permutation is okay.
self.assertEqual(matrix[0, 0, 0], actual_permuted[0, 0, 0])
self.assertEqual(matrix[0, 0, 1], actual_permuted[0, 0, 1])
self.assertEqual(matrix[1, 1, 16], actual_permuted[1, 1, 16])
self.assertEqual(matrix[2, 0, 4], actual_permuted[2, 0, 8])
self.assertEqual(matrix[2, 0, 5], actual_permuted[2, 0, 9])
self.assertEqual(matrix[2, 1, 8], actual_permuted[2, 1, 4])
self.assertEqual(matrix[2, 1, 8 + 16], actual_permuted[2, 1, 4 + 16])
def testAddCellSubgraphSpecHook(self):
component = MockComponent()
cell = export_pb2.CellSubgraphSpec()
cell.input.add(
name='feature',
tensor='feature_tensor',
type=export_pb2.CellSubgraphSpec.Input.TYPE_FEATURE)
cell.input.add(
name='recurrent',
tensor='recurrent_tensor',
type=export_pb2.CellSubgraphSpec.Input.TYPE_RECURRENT)
cell.output.add(name='layer_0', tensor='layer_0_tensor')
cell.output.add(name='logits', tensor='logits_tensor')
with self.test_session() as session:
graph = session.graph
# Add hooks for the cell constructed above.
with tf.variable_scope(component.name, reuse=True):
runtime_support.add_hooks(component, cell)
# Get the hook containing the wire-format proto.
cell_wire_format = graph.get_tensor_by_name(
'{}/EXPORT/CellSubgraphSpec:0'.format(component.name))
# Check that the hook matches the cell.
tf.global_variables_initializer().run()
self.assertEqual(cell_wire_format.eval(), cell.SerializeToString())
if __name__ == '__main__':
tf.test.main()
......@@ -16,30 +16,19 @@
import os
import tensorflow as tf
from tensorflow.python.framework import test_util
from tensorflow.python.platform import googletest
from dragnn.python import dragnn_ops
from dragnn.python import sentence_io
from syntaxnet import sentence_pb2
FLAGS = tf.app.flags.FLAGS
def setUpModule():
if not hasattr(FLAGS, 'test_srcdir'):
FLAGS.test_srcdir = ''
if not hasattr(FLAGS, 'test_tmpdir'):
FLAGS.test_tmpdir = tf.test.get_temp_dir()
from syntaxnet import test_flags
class ConllSentenceReaderTest(test_util.TensorFlowTestCase):
class ConllSentenceReaderTest(tf.test.TestCase):
def setUp(self):
# This dataset contains 54 sentences.
self.filepath = os.path.join(
FLAGS.test_srcdir,
test_flags.source_root(),
'syntaxnet/testdata/mini-training-set')
self.batch_size = 20
......@@ -82,4 +71,4 @@ class ConllSentenceReaderTest(test_util.TensorFlowTestCase):
if __name__ == '__main__':
googletest.main()
tf.test.main()
......@@ -15,7 +15,6 @@
"""Utils for building DRAGNN specs."""
from six.moves import xrange
import tensorflow as tf
from dragnn.protos import spec_pb2
......@@ -110,7 +109,9 @@ class ComponentSpecBuilder(object):
if transition_spec.registered_name == 'arc-standard':
return 'shift-reduce-step'
if transition_spec.registered_name in ('shift-only', 'tagger'):
if transition_spec.registered_name in ('shift-only', 'tagger', 'morpher',
'lm-transitions', 'dependency-label',
'category'):
if 'left_to_right' in transition_spec.parameters:
if transition_spec.parameters['left_to_right'] == 'false':
return 'reverse-token'
......
......@@ -27,15 +27,6 @@ from dragnn.python import spec_builder
from syntaxnet import parser_trainer
FLAGS = tf.app.flags.FLAGS
def setUpModule():
if not hasattr(FLAGS, 'test_srcdir'):
FLAGS.test_srcdir = ''
if not hasattr(FLAGS, 'test_tmpdir'):
FLAGS.test_tmpdir = tf.test.get_temp_dir()
class SpecBuilderTest(tf.test.TestCase):
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions to build DRAGNN MasterSpecs and schedule model training.
Provides functions to finish a MasterSpec, building required lexicons for it and
......@@ -23,13 +22,12 @@ import random
import tensorflow as tf
from six.moves import xrange
from tensorflow.core.framework.summary_pb2 import Summary
from tensorflow.python.framework import errors
from tensorflow.python.platform import gfile
flags = tf.app.flags
FLAGS = flags.FLAGS
from syntaxnet.util import check
def calculate_component_accuracies(eval_res_values):
......@@ -59,7 +57,9 @@ def annotate_dataset(sess, annotator, eval_corpus):
end = min(start + batch_size, len(eval_corpus))
serialized_annotations = sess.run(
annotator['annotations'],
feed_dict={annotator['input_batch']: eval_corpus[start:end]})
feed_dict={
annotator['input_batch']: eval_corpus[start:end]
})
assert len(serialized_annotations) == end - start
processed.extend(serialized_annotations)
tf.logging.info('Done. Produced %d annotations', len(processed))
......@@ -81,16 +81,60 @@ def get_summary_writer(tensorboard_dir):
return summary_writer
def generate_target_per_step_schedule(pretrain_steps, train_steps):
"""Generates a sampled training schedule.
Arguments:
pretrain_steps: List, number of pre-training steps per each target.
train_steps: List, number of sampled training steps per each target.
Returns:
Python list of length sum(pretrain_steps + train_steps), containing
target numbers per step.
"""
check.Eq(len(pretrain_steps), len(train_steps))
# Arbitrary seed to make sure the return is deterministic.
random.seed(0x31337)
tf.logging.info('Determining the training schedule...')
target_per_step = []
for target_idx in xrange(len(pretrain_steps)):
target_per_step += [target_idx] * pretrain_steps[target_idx]
train_steps = list(train_steps)
while sum(train_steps) > 0:
step = random.randint(0, sum(train_steps) - 1)
cumulative_steps = 0
for target_idx in xrange(len(train_steps)):
cumulative_steps += train_steps[target_idx]
if step < cumulative_steps:
break
assert train_steps[target_idx] > 0
train_steps[target_idx] -= 1
target_per_step.append(target_idx)
tf.logging.info('Training schedule defined!')
return target_per_step
def run_training_step(sess, trainer, train_corpus, batch_size):
"""Runs a single iteration of train_op on a randomly sampled batch."""
batch = random.sample(train_corpus, batch_size)
sess.run(trainer['run'], feed_dict={trainer['input_batch']: batch})
def run_training(sess, trainers, annotator, evaluator, pretrain_steps,
train_steps, train_corpus, eval_corpus, eval_gold,
batch_size, summary_writer, report_every, saver,
checkpoint_filename, checkpoint_stats=None):
def run_training(sess,
trainers,
annotator,
evaluator,
pretrain_steps,
train_steps,
train_corpus,
eval_corpus,
eval_gold,
batch_size,
summary_writer,
report_every,
saver,
checkpoint_filename,
checkpoint_stats=None):
"""Runs multi-task DRAGNN training on a single corpus.
Arguments:
......@@ -117,30 +161,15 @@ def run_training(sess, trainers, annotator, evaluator, pretrain_steps,
checkpoint_filename: File to save checkpoints to.
checkpoint_stats: Stats of checkpoint.
"""
random.seed(0x31337)
if not checkpoint_stats:
checkpoint_stats = [0] * (len(train_steps) + 1)
tf.logging.info('Determining the training schedule...')
target_for_step = []
for target_idx in xrange(len(pretrain_steps)):
target_for_step += [target_idx] * pretrain_steps[target_idx]
while sum(train_steps) > 0:
step = random.randint(0, sum(train_steps) - 1)
cumulative_steps = 0
for target_idx in xrange(len(train_steps)):
cumulative_steps += train_steps[target_idx]
if step < cumulative_steps:
break
assert train_steps[target_idx] > 0
train_steps[target_idx] -= 1
target_for_step.append(target_idx)
tf.logging.info('Training schedule defined!')
target_per_step = generate_target_per_step_schedule(pretrain_steps,
train_steps)
best_eval_metric = -1.0
tf.logging.info('Starting training...')
actual_step = sum(checkpoint_stats[1:])
for step, target_idx in enumerate(target_for_step):
for step, target_idx in enumerate(target_per_step):
run_training_step(sess, trainers[target_idx], train_corpus, batch_size)
checkpoint_stats[target_idx + 1] += 1
if step % 100 == 0:
......
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dragnn.python.trainer_lib."""
from tensorflow.python.framework import test_util
from tensorflow.python.platform import googletest
from dragnn.python import trainer_lib
class TrainerLibTest(test_util.TensorFlowTestCase):
def testImmutabilityOfArguments(self):
"""Tests that training schedule generation does not change its arguments."""
pretrain_steps = [1, 2, 3]
train_steps = [5, 5, 5]
trainer_lib.generate_target_per_step_schedule(pretrain_steps, train_steps)
self.assertEqual(pretrain_steps, [1, 2, 3])
self.assertEqual(train_steps, [5, 5, 5])
def testTrainingScheduleGenerationAndDeterminism(self):
"""Non-trivial schedule, check generation and determinism."""
pretrain_steps = [1, 2, 3]
train_steps = [5, 5, 5]
generated_schedule = trainer_lib.generate_target_per_step_schedule(
pretrain_steps, train_steps)
expected_schedule = [
0, 1, 1, 2, 2, 2, 1, 0, 2, 1, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2
]
self.assertEqual(generated_schedule, expected_schedule)
def testNoPretrainSteps(self):
"""Edge case, 1 target, no pretrain."""
generated_schedule = trainer_lib.generate_target_per_step_schedule([0],
[10])
expected_schedule = [0] * 10
self.assertEqual(generated_schedule, expected_schedule)
def testNoTrainSteps(self):
"""Edge case, 1 target, only pretrain."""
generated_schedule = trainer_lib.generate_target_per_step_schedule([10],
[0])
expected_schedule = [0] * 10
self.assertEqual(generated_schedule, expected_schedule)
if __name__ == '__main__':
googletest.main()
......@@ -330,7 +330,7 @@ class LayerNormBasicLSTMNetwork(BaseLSTMNetwork):
def _cell_closure(scope):
"""Applies the LSTM cell to the current inputs and state."""
return cell(input_tensor, state, scope)
return cell(input_tensor, state, scope=scope)
unused_h, state = self._apply_with_captured_variables(_cell_closure)
......
# -*- Python -*-
# Given a source file, generate a test name.
# i.e. "common_runtime/direct_session_test.cc" becomes
# "common_runtime_direct_session_test"
def src_to_test_name(src):
return src.replace("/", "_").split(".")[0]
# Return the options to use for a C++ library or binary build.
# Uses the ":optmode" config_setting to pick the options.
load(
"@org_tensorflow//tensorflow/core:platform/default/build_config_root.bzl",
"tf_cuda_tests_tags",
"tf_sycl_tests_tags",
)
load(
"@local_config_cuda//cuda:build_defs.bzl",
"if_cuda",
"cuda_default_copts"
)
# List of proto files for android builds
def tf_android_core_proto_sources(core_proto_sources_relative):
return ["@org_tensorflow//tensorflow/core:" + p
for p in core_proto_sources_relative]
# Returns the list of pb.h and proto.h headers that are generated for
# tf_android_core_proto_sources().
def tf_android_core_proto_headers(core_proto_sources_relative):
return (["@org_tensorflow//tensorflow/core/" + p.replace(".proto", ".pb.h")
for p in core_proto_sources_relative] +
["@org_tensorflow//tensorflow/core/" + p.replace(".proto", ".proto.h")
for p in core_proto_sources_relative])
def if_android_arm(a):
return select({
"@org_tensorflow//tensorflow:android_arm": a,
"//conditions:default": [],
})
def if_android_arm64(a):
return select({
"@org_tensorflow//tensorflow:android_arm64": a,
"//conditions:default": [],
})
def if_not_android(a):
return select({
"@org_tensorflow//tensorflow:android": [],
"//conditions:default": a,
})
def if_android(a):
return select({
"@org_tensorflow//tensorflow:android": a,
"//conditions:default": [],
})
def if_ios(a):
return select({
"@org_tensorflow//tensorflow:ios": a,
"//conditions:default": [],
})
def if_mobile(a):
return select({
"@org_tensorflow//tensorflow:android": a,
"@org_tensorflow//tensorflow:ios": a,
"//conditions:default": [],
})
def if_not_mobile(a):
return select({
"@org_tensorflow//tensorflow:android": [],
"@org_tensorflow//tensorflow:ios": [],
"//conditions:default": a,
})
def if_not_windows(a):
return select({
"@org_tensorflow//tensorflow:windows": [],
"//conditions:default": a,
})
def if_x86(a):
return select({
"@org_tensorflow//tensorflow:linux_x86_64": a,
"@org_tensorflow//tensorflow:windows": a,
"//conditions:default": [],
})
def tf_copts():
return (["-DEIGEN_AVOID_STL_ARRAY",
"-Iexternal/gemmlowp",
"-Wno-sign-compare",
"-fno-exceptions",] +
if_cuda(["-DGOOGLE_CUDA=1"]) +
if_android_arm(["-mfpu=neon"]) +
select({
"@org_tensorflow//tensorflow:android": [
"-std=c++11",
"-DTF_LEAN_BINARY",
"-O2",
],
"@org_tensorflow//tensorflow:darwin": [],
"@org_tensorflow//tensorflow:windows": [
"/DLANG_CXX11",
"/D__VERSION__=\\\"MSVC\\\"",
"/DPLATFORM_WINDOWS",
"/DEIGEN_HAS_C99_MATH",
"/DTENSORFLOW_USE_EIGEN_THREADPOOL",
],
"@org_tensorflow//tensorflow:ios": ["-std=c++11"],
"//conditions:default": ["-pthread"]}))
def tf_opts_nortti_if_android():
return if_android([
"-fno-rtti",
"-DGOOGLE_PROTOBUF_NO_RTTI",
"-DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER",
])
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate a library for that file.
def tf_gen_op_libs(op_lib_names, deps=None):
# Make library out of each op so it can also be used to generate wrappers
# for various languages.
if not deps:
deps = []
for n in op_lib_names:
native.cc_library(name=n + "_op_lib",
copts=tf_copts(),
srcs=["ops/" + n + ".cc"],
deps=deps + ["@org_tensorflow//tensorflow/core:framework"],
visibility=["//visibility:public"],
alwayslink=1,
linkstatic=1,)
def tf_gen_op_wrapper_cc(name, out_ops_file, pkg="",
op_gen="@org_tensorflow//tensorflow/cc:cc_op_gen_main",
deps=None,
override_file=None,
include_internal_ops=0):
# Construct an op generator binary for these ops.
tool = out_ops_file + "_gen_cc"
if deps == None:
deps = [pkg + ":" + name + "_op_lib"]
native.cc_binary(
name = tool,
copts = tf_copts(),
linkopts = ["-lm"],
linkstatic = 1, # Faster to link this one-time-use binary dynamically
deps = [op_gen] + deps
)
if override_file == None:
srcs = []
override_arg = ","
else:
srcs = [override_file]
override_arg = "$(location " + override_file + ")"
native.genrule(
name=name + "_genrule",
outs=[out_ops_file + ".h", out_ops_file + ".cc",
out_ops_file + "_internal.h", out_ops_file + "_internal.cc"],
srcs=srcs,
tools=[":" + tool],
cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " +
"$(location :" + out_ops_file + ".cc) " + override_arg + " " +
str(include_internal_ops)))
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate individual C++ .cc and .h
# files for each of the ops files mentioned, and then generate a
# single cc_library called "name" that combines all the
# generated C++ code.
#
# For example, for:
# tf_gen_op_wrappers_cc("tf_ops_lib", [ "array_ops", "math_ops" ])
#
#
# This will ultimately generate ops/* files and a library like:
#
# cc_library(name = "tf_ops_lib",
# srcs = [ "ops/array_ops.cc",
# "ops/math_ops.cc" ],
# hdrs = [ "ops/array_ops.h",
# "ops/math_ops.h" ],
# deps = [ ... ])
#
# Plus a private library for the "hidden" ops.
# cc_library(name = "tf_ops_lib_internal",
# srcs = [ "ops/array_ops_internal.cc",
# "ops/math_ops_internal.cc" ],
# hdrs = [ "ops/array_ops_internal.h",
# "ops/math_ops_internal.h" ],
# deps = [ ... ])
# TODO(googleuser): Cleaner approach for hidden ops.
def tf_gen_op_wrappers_cc(name,
op_lib_names=[],
other_srcs=[],
other_hdrs=[],
pkg="",
deps=[
"@org_tensorflow//tensorflow/cc:ops",
"@org_tensorflow//tensorflow/cc:scope",
"@org_tensorflow//tensorflow/cc:const_op",
],
op_gen="@org_tensorflow//tensorflow/cc:cc_op_gen_main",
override_file=None,
include_internal_ops=0,
visibility=None):
subsrcs = other_srcs
subhdrs = other_hdrs
internalsrcs = []
internalhdrs = []
for n in op_lib_names:
tf_gen_op_wrapper_cc(
n, "ops/" + n, pkg=pkg, op_gen=op_gen, override_file=override_file,
include_internal_ops=include_internal_ops)
subsrcs += ["ops/" + n + ".cc"]
subhdrs += ["ops/" + n + ".h"]
internalsrcs += ["ops/" + n + "_internal.cc"]
internalhdrs += ["ops/" + n + "_internal.h"]
native.cc_library(name=name,
srcs=subsrcs,
hdrs=subhdrs,
deps=deps + if_not_android([
"@org_tensorflow//tensorflow/core:core_cpu",
"@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:lib",
"@org_tensorflow//tensorflow/core:protos_all_cc",
]) + if_android([
"@org_tensorflow//tensorflow/core:android_tensorflow_lib",
]),
copts=tf_copts(),
alwayslink=1,
visibility=visibility)
native.cc_library(name=name + "_internal",
srcs=internalsrcs,
hdrs=internalhdrs,
deps=deps + if_not_android([
"@org_tensorflow//tensorflow/core:core_cpu",
"@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:lib",
"@org_tensorflow//tensorflow/core:protos_all_cc",
]) + if_android([
"@org_tensorflow//tensorflow/core:android_tensorflow_lib",
]),
copts=tf_copts(),
alwayslink=1,
visibility=["@org_tensorflow//tensorflow:internal"])
# Invoke this rule in .../tensorflow/python to build the wrapper library.
def tf_gen_op_wrapper_py(name, out=None, hidden=None, visibility=None, deps=[],
require_shape_functions=False, hidden_file=None,
generated_target_name=None):
# Construct a cc_binary containing the specified ops.
tool_name = "gen_" + name + "_py_wrappers_cc"
if not deps:
deps = ["@org_tensorflow//tensorflow/core:" + name + "_op_lib"]
native.cc_binary(
name = tool_name,
linkopts = ["-lm"],
copts = tf_copts(),
linkstatic = 1, # Faster to link this one-time-use binary dynamically
deps = (["@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/python:python_op_gen_main"] + deps),
visibility = ["@org_tensorflow//tensorflow:internal"],
)
# Invoke the previous cc_binary to generate a python file.
if not out:
out = "ops/gen_" + name + ".py"
if hidden:
# `hidden` is a list of op names to be hidden in the generated module.
native.genrule(
name=name + "_pygenrule",
outs=[out],
tools=[tool_name],
cmd=("$(location " + tool_name + ") " + ",".join(hidden)
+ " " + ("1" if require_shape_functions else "0") + " > $@"))
elif hidden_file:
# `hidden_file` is file containing a list of op names to be hidden in the
# generated module.
native.genrule(
name=name + "_pygenrule",
outs=[out],
srcs=[hidden_file],
tools=[tool_name],
cmd=("$(location " + tool_name + ") @$(location "
+ hidden_file + ") " + ("1" if require_shape_functions else "0")
+ " > $@"))
else:
# No ops should be hidden in the generated module.
native.genrule(
name=name + "_pygenrule",
outs=[out],
tools=[tool_name],
cmd=("$(location " + tool_name + ") "
+ ("1" if require_shape_functions else "0") + " > $@"))
# Make a py_library out of the generated python file.
if not generated_target_name:
generated_target_name = name
native.py_library(name=generated_target_name,
srcs=[out],
srcs_version="PY2AND3",
visibility=visibility,
deps=[
"@org_tensorflow//tensorflow/python:framework_for_generated_wrappers",
],)
# Define a bazel macro that creates cc_test for tensorflow.
# TODO(googleuser): we need to enable this to work around the hidden symbol
# __cudaRegisterFatBinary error. Need more investigations.
def tf_cc_test(name, srcs, deps, linkstatic=0, tags=[], data=[], size="medium",
suffix="", args=None, linkopts=[]):
native.cc_test(name="%s%s" % (name, suffix),
srcs=srcs,
size=size,
args=args,
copts=tf_copts(),
data=data,
deps=deps,
linkopts=["-lpthread", "-lm"] + linkopts,
linkstatic=linkstatic,
tags=tags)
# Part of the testing process requires a distinguishable name for the build
# rules that involve a GPU, even if otherwise identical to the base rule.
def tf_cc_test_gpu(name, srcs, deps, linkstatic=0, tags=[], data=[],
size="medium", suffix="", args=None):
tf_cc_test(name, srcs, deps, linkstatic=linkstatic, tags=tags, data=data,
size=size, suffix=suffix, args=args)
def tf_cuda_cc_test(name, srcs=[], deps=[], tags=[], data=[], size="medium",
linkstatic=0, args=[], linkopts=[]):
tf_cc_test(name=name,
srcs=srcs,
deps=deps,
tags=tags + ["manual"],
data=data,
size=size,
linkstatic=linkstatic,
linkopts=linkopts,
args=args)
tf_cc_test(name=name,
srcs=srcs,
suffix="_gpu",
deps=deps + if_cuda(["@org_tensorflow//tensorflow/core:gpu_runtime"]),
linkstatic=if_cuda(1, 0),
tags=tags + tf_cuda_tests_tags(),
data=data,
size=size,
linkopts=linkopts,
args=args)
# Create a cc_test for each of the tensorflow tests listed in "tests"
def tf_cc_tests(srcs, deps, name='', linkstatic=0, tags=[], size="medium",
args=None, linkopts=[]):
for src in srcs:
tf_cc_test(
name=src_to_test_name(src),
srcs=[src],
deps=deps,
linkstatic=linkstatic,
tags=tags,
size=size,
args=args,
linkopts=linkopts)
def tf_cc_tests_gpu(srcs, deps, name='', linkstatic=0, tags=[], size="medium",
args=None):
tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args)
def tf_cuda_cc_tests(srcs, deps, name='', tags=[], size="medium", linkstatic=0,
args=None, linkopts=[]):
for src in srcs:
tf_cuda_cc_test(
name=src_to_test_name(src),
srcs=[src],
deps=deps,
tags=tags,
size=size,
linkstatic=linkstatic,
args=args,
linkopts=linkopts)
def _cuda_copts():
"""Gets the appropriate set of copts for (maybe) CUDA compilation.
If we're doing CUDA compilation, returns copts for our particular CUDA
compiler. If we're not doing CUDA compilation, returns an empty list.
"""
return cuda_default_copts() + select({
"//conditions:default": [],
"@local_config_cuda//cuda:using_nvcc": (
[
"-nvcc_options=relaxed-constexpr",
"-nvcc_options=ftz=true",
]
),
"@local_config_cuda//cuda:using_clang": (
[
"-fcuda-flush-denormals-to-zero",
]
),
})
# Build defs for TensorFlow kernels
# When this target is built using --config=cuda, a cc_library is built
# that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
# libraries needed by GPU kernels.
def tf_gpu_kernel_library(srcs, copts=[], cuda_copts=[], deps=[], hdrs=[],
**kwargs):
copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
native.cc_library(
srcs = srcs,
hdrs = hdrs,
copts = copts,
deps = deps + if_cuda([
"@org_tensorflow//tensorflow/core:cuda",
"@org_tensorflow//tensorflow/core:gpu_lib",
]),
alwayslink=1,
**kwargs)
def tf_cuda_library(deps=None, cuda_deps=None, copts=None, **kwargs):
"""Generate a cc_library with a conditional set of CUDA dependencies.
When the library is built with --config=cuda:
- both deps and cuda_deps are used as dependencies
- the cuda runtime is added as a dependency (if necessary)
- The library additionally passes -DGOOGLE_CUDA=1 to the list of copts
Args:
- cuda_deps: BUILD dependencies which will be linked if and only if:
'--config=cuda' is passed to the bazel command line.
- deps: dependencies which will always be linked.
- copts: copts always passed to the cc_library.
- kwargs: Any other argument to cc_library.
"""
if not deps:
deps = []
if not cuda_deps:
cuda_deps = []
if not copts:
copts = []
native.cc_library(
deps = deps + if_cuda(cuda_deps + [
"@org_tensorflow//tensorflow/core:cuda",
"@local_config_cuda//cuda:cuda_headers"
]),
copts = copts + if_cuda(["-DGOOGLE_CUDA=1"]),
**kwargs)
def tf_kernel_library(name, prefix=None, srcs=None, gpu_srcs=None, hdrs=None,
deps=None, alwayslink=1, copts=tf_copts(), **kwargs):
"""A rule to build a TensorFlow OpKernel.
May either specify srcs/hdrs or prefix. Similar to tf_cuda_library,
but with alwayslink=1 by default. If prefix is specified:
* prefix*.cc (except *.cu.cc) is added to srcs
* prefix*.h (except *.cu.h) is added to hdrs
* prefix*.cu.cc and prefix*.h (including *.cu.h) are added to gpu_srcs.
With the exception that test files are excluded.
For example, with prefix = "cast_op",
* srcs = ["cast_op.cc"]
* hdrs = ["cast_op.h"]
* gpu_srcs = ["cast_op_gpu.cu.cc", "cast_op.h"]
* "cast_op_test.cc" is excluded
With prefix = "cwise_op"
* srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
* hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
* gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
"cwise_ops.h", "cwise_ops_common.h",
"cwise_ops_gpu_common.cu.h"]
* "cwise_ops_test.cc" is excluded
"""
if not srcs:
srcs = []
if not hdrs:
hdrs = []
if not deps:
deps = []
if prefix:
if native.glob([prefix + "*.cu.cc"], exclude = ["*test*"]):
if not gpu_srcs:
gpu_srcs = []
gpu_srcs = gpu_srcs + native.glob([prefix + "*.cu.cc", prefix + "*.h"],
exclude = ["*test*"])
srcs = srcs + native.glob([prefix + "*.cc"],
exclude = ["*test*", "*.cu.cc"])
hdrs = hdrs + native.glob([prefix + "*.h"], exclude = ["*test*", "*.cu.h"])
cuda_deps = ["@org_tensorflow//tensorflow/core:gpu_lib"]
if gpu_srcs:
for gpu_src in gpu_srcs:
if gpu_src.endswith(".cc") and not gpu_src.endswith(".cu.cc"):
fail("{} not allowed in gpu_srcs. .cc sources must end with .cu.cc".format(gpu_src))
tf_gpu_kernel_library(
name = name + "_gpu",
srcs = gpu_srcs,
deps = deps,
**kwargs)
cuda_deps.extend([":" + name + "_gpu"])
tf_cuda_library(
name = name,
srcs = srcs,
hdrs = hdrs,
copts = copts,
cuda_deps = cuda_deps,
linkstatic = 1, # Needed since alwayslink is broken in bazel b/27630669
alwayslink = alwayslink,
deps = deps,
**kwargs)
# Bazel rules for building swig files.
def _py_wrap_cc_impl(ctx):
srcs = ctx.files.srcs
if len(srcs) != 1:
fail("Exactly one SWIG source file label must be specified.", "srcs")
module_name = ctx.attr.module_name
src = ctx.files.srcs[0]
inputs = depset([src])
inputs += ctx.files.swig_includes
for dep in ctx.attr.deps:
inputs += dep.cc.transitive_headers
inputs += ctx.files._swiglib
inputs += ctx.files.toolchain_deps
swig_include_dirs = depset(_get_repository_roots(ctx, inputs))
swig_include_dirs += sorted([f.dirname for f in ctx.files._swiglib])
args = ["-c++",
"-python",
"-module", module_name,
"-o", ctx.outputs.cc_out.path,
"-outdir", ctx.outputs.py_out.dirname]
args += ["-l" + f.path for f in ctx.files.swig_includes]
args += ["-I" + i for i in swig_include_dirs]
args += [src.path]
outputs = [ctx.outputs.cc_out,
ctx.outputs.py_out]
ctx.action(executable=ctx.executable._swig,
arguments=args,
inputs=list(inputs),
outputs=outputs,
mnemonic="PythonSwig",
progress_message="SWIGing " + src.path)
return struct(files=depset(outputs))
_py_wrap_cc = rule(
attrs = {
"srcs": attr.label_list(
mandatory = True,
allow_files = True,
),
"swig_includes": attr.label_list(
cfg = "data",
allow_files = True,
),
"deps": attr.label_list(
allow_files = True,
providers = ["cc"],
),
"toolchain_deps": attr.label_list(
allow_files = True,
),
"module_name": attr.string(mandatory = True),
"py_module_name": attr.string(mandatory = True),
"_swig": attr.label(
default = Label("@swig//:swig"),
executable = True,
cfg = "host",
),
"_swiglib": attr.label(
default = Label("@swig//:templates"),
allow_files = True,
),
},
outputs = {
"cc_out": "%{module_name}.cc",
"py_out": "%{py_module_name}.py",
},
implementation = _py_wrap_cc_impl,
)
def _get_repository_roots(ctx, files):
"""Returns abnormal root directories under which files reside.
When running a ctx.action, source files within the main repository are all
relative to the current directory; however, files that are generated or exist
in remote repositories will have their root directory be a subdirectory,
e.g. bazel-out/local-fastbuild/genfiles/external/jpeg_archive. This function
returns the set of these devious directories, ranked and sorted by popularity
in order to hopefully minimize the number of I/O system calls within the
compiler, because includes have quadratic complexity.
"""
result = {}
for f in files:
root = f.root.path
if root:
if root not in result:
result[root] = 0
result[root] -= 1
work = f.owner.workspace_root
if work:
if root:
root += "/"
root += work
if root:
if root not in result:
result[root] = 0
result[root] -= 1
return [k for v, k in sorted([(v, k) for k, v in result.items()])]
# Bazel rule for collecting the header files that a target depends on.
def _transitive_hdrs_impl(ctx):
outputs = depset()
for dep in ctx.attr.deps:
outputs += dep.cc.transitive_headers
return struct(files=outputs)
_transitive_hdrs = rule(
attrs = {
"deps": attr.label_list(
allow_files = True,
providers = ["cc"],
),
},
implementation = _transitive_hdrs_impl,
)
def transitive_hdrs(name, deps=[], **kwargs):
_transitive_hdrs(name=name + "_gather",
deps=deps)
native.filegroup(name=name,
srcs=[":" + name + "_gather"])
# Create a header only library that includes all the headers exported by
# the libraries in deps.
def cc_header_only_library(name, deps=[], **kwargs):
_transitive_hdrs(name=name + "_gather",
deps=deps)
native.cc_library(name=name,
hdrs=[":" + name + "_gather"],
**kwargs)
def tf_custom_op_library_additional_deps():
return [
"@protobuf_archive//:protobuf",
"//third_party/eigen3",
"@org_tensorflow//tensorflow/core:framework_headers_lib",
]
# Traverse the dependency graph along the "deps" attribute of the
# target and return a struct with one field called 'tf_collected_deps'.
# tf_collected_deps will be the union of the deps of the current target
# and the tf_collected_deps of the dependencies of this target.
def _collect_deps_aspect_impl(target, ctx):
alldeps = depset()
if hasattr(ctx.rule.attr, "deps"):
for dep in ctx.rule.attr.deps:
alldeps = alldeps | depset([dep.label])
if hasattr(dep, "tf_collected_deps"):
alldeps = alldeps | dep.tf_collected_deps
return struct(tf_collected_deps=alldeps)
collect_deps_aspect = aspect(
implementation=_collect_deps_aspect_impl,
attr_aspects=["deps"])
def _dep_label(dep):
label = dep.label
return label.package + ":" + label.name
# This rule checks that the transitive dependencies of targets listed
# in the 'deps' attribute don't depend on the targets listed in
# the 'disallowed_deps' attribute.
def _check_deps_impl(ctx):
disallowed_deps = ctx.attr.disallowed_deps
for input_dep in ctx.attr.deps:
if not hasattr(input_dep, "tf_collected_deps"):
continue
for dep in input_dep.tf_collected_deps:
for disallowed_dep in disallowed_deps:
if dep == disallowed_dep.label:
fail(_dep_label(input_dep) + " cannot depend on " +
_dep_label(disallowed_dep))
return struct()
check_deps = rule(
_check_deps_impl,
attrs = {
"deps": attr.label_list(
aspects=[collect_deps_aspect],
mandatory = True,
allow_files = True
),
"disallowed_deps": attr.label_list(
mandatory = True,
allow_files = True
)},
)
# Helper to build a dynamic library (.so) from the sources containing
# implementations of custom ops and kernels.
def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
cuda_deps = [
"@org_tensorflow//tensorflow/core:stream_executor_headers_lib",
"@local_config_cuda//cuda:cudart_static",
]
deps = deps + tf_custom_op_library_additional_deps()
if gpu_srcs:
basename = name.split(".")[0]
native.cc_library(
name = basename + "_gpu",
srcs = gpu_srcs,
copts = _cuda_copts(),
deps = deps + if_cuda(cuda_deps))
cuda_deps.extend([":" + basename + "_gpu"])
check_deps(name=name+"_check_deps",
deps=deps + if_cuda(cuda_deps),
disallowed_deps=["@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:lib"])
native.cc_binary(name=name,
srcs=srcs,
deps=deps + if_cuda(cuda_deps),
data=[name + "_check_deps"],
copts=tf_copts(),
linkshared=1,
linkopts = select({
"//conditions:default": [
"-lm",
],
"@org_tensorflow//tensorflow:darwin": [],
}),
)
def tf_extension_linkopts():
return [] # No extension link opts
def tf_extension_copts():
return [] # No extension c opts
def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
module_name = name.split("/")[-1]
# Convert a rule name such as foo/bar/baz to foo/bar/_baz.so
# and use that as the name for the rule producing the .so file.
cc_library_name = "/".join(name.split("/")[:-1] + ["_" + module_name + ".so"])
cc_library_pyd_name = "/".join(name.split("/")[:-1] + ["_" + module_name + ".pyd"])
extra_deps = []
_py_wrap_cc(name=name + "_py_wrap",
srcs=srcs,
swig_includes=swig_includes,
deps=deps + extra_deps,
toolchain_deps=["//tools/defaults:crosstool"],
module_name=module_name,
py_module_name=name)
extra_linkopts = select({
"@local_config_cuda//cuda:darwin": [
"-Wl,-exported_symbols_list",
"@org_tensorflow//tensorflow:tf_exported_symbols.lds"
],
"@org_tensorflow//tensorflow:windows": [
],
"//conditions:default": [
"-Wl,--version-script",
"@org_tensorflow//tensorflow:tf_version_script.lds"
]})
extra_deps += select({
"@local_config_cuda//cuda:darwin": [
"@org_tensorflow//tensorflow:tf_exported_symbols.lds"
],
"@org_tensorflow//tensorflow:windows": [
],
"//conditions:default": [
"@org_tensorflow//tensorflow:tf_version_script.lds"
]
})
native.cc_binary(
name=cc_library_name,
srcs=[module_name + ".cc"],
copts=(copts + ["-Wno-self-assign",
"-Wno-sign-compare",
"-Wno-write-strings"]
+ tf_extension_copts()),
linkopts=tf_extension_linkopts() + extra_linkopts,
linkstatic=1,
linkshared=1,
deps=deps + extra_deps)
native.genrule(
name = "gen_" + cc_library_pyd_name,
srcs = [":" + cc_library_name],
outs = [cc_library_pyd_name],
cmd = "cp $< $@",
)
native.py_library(name=name,
srcs=[":" + name + ".py"],
srcs_version="PY2AND3",
data=select({
"@org_tensorflow//tensorflow:windows": [":" + cc_library_pyd_name],
"//conditions:default": [":" + cc_library_name],
}))
def py_test(deps=[], **kwargs):
native.py_test(
deps=select({
"//conditions:default" : deps,
"@org_tensorflow//tensorflow:no_tensorflow_py_deps" : []
}),
**kwargs)
def tf_py_test(name, srcs, size="medium", data=[], main=None, args=[],
tags=[], shard_count=1, additional_deps=[], flaky=0):
native.py_test(
name=name,
size=size,
srcs=srcs,
main=main,
args=args,
tags=tags,
visibility=["@org_tensorflow//tensorflow:internal"],
shard_count=shard_count,
data=data,
deps=select({
"//conditions:default" : [
"@org_tensorflow//tensorflow/python:extra_py_tests_deps",
"@org_tensorflow//tensorflow/python:gradient_checker",
] + additional_deps,
"@org_tensorflow//tensorflow:no_tensorflow_py_deps" : []
}),
flaky=flaky,
srcs_version="PY2AND3")
def cuda_py_test(name, srcs, size="medium", data=[], main=None, args=[],
shard_count=1, additional_deps=[], tags=[], flaky=0):
test_tags = tags + tf_cuda_tests_tags()
tf_py_test(name=name,
size=size,
srcs=srcs,
data=data,
main=main,
args=args,
tags=test_tags,
shard_count=shard_count,
additional_deps=additional_deps,
flaky=flaky)
def sycl_py_test(name, srcs, size="medium", data=[], main=None, args=[],
shard_count=1, additional_deps=[], tags=[], flaky=0):
test_tags = tags + tf_sycl_tests_tags()
tf_py_test(name=name,
size=size,
srcs=srcs,
data=data,
main=main,
args=args,
tags=test_tags,
shard_count=shard_count,
additional_deps=additional_deps,
flaky=flaky)
def py_tests(name,
srcs,
size="medium",
additional_deps=[],
data=[],
tags=[],
shard_count=1,
prefix=""):
for src in srcs:
test_name = src.split("/")[-1].split(".")[0]
if prefix:
test_name = "%s_%s" % (prefix, test_name)
tf_py_test(name=test_name,
size=size,
srcs=[src],
main=src,
tags=tags,
shard_count=shard_count,
data=data,
additional_deps=additional_deps)
def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[],
shard_count=1, tags=[], prefix=""):
test_tags = tags + tf_cuda_tests_tags()
py_tests(name=name, size=size, srcs=srcs, additional_deps=additional_deps,
data=data, tags=test_tags, shard_count=shard_count,prefix=prefix)
# Creates a genrule named <name> for running tools/proto_text's generator to
# make the proto_text functions, for the protos passed in <srcs>.
#
# Return a struct with fields (hdrs, srcs) containing the names of the
# generated files.
def tf_generate_proto_text_sources(name, srcs_relative_dir, srcs):
out_hdrs = ([p.replace(".proto", ".pb_text.h") for p in srcs] +
[p.replace(".proto", ".pb_text-impl.h") for p in srcs])
out_srcs = [p.replace(".proto", ".pb_text.cc") for p in srcs]
native.genrule(
name = name,
srcs = srcs + ["@org_tensorflow//tensorflow/tools/proto_text:placeholder.txt"],
outs = out_hdrs + out_srcs,
cmd = "$(location //tensorflow/tools/proto_text:gen_proto_text_functions) " +
"$(@D) " + srcs_relative_dir + " $(SRCS)",
tools = ["@org_tensorflow//tensorflow/tools/proto_text:gen_proto_text_functions"],
)
return struct(hdrs=out_hdrs, srcs=out_srcs)
def tf_genrule_cmd_append_to_srcs(to_append):
return ("cat $(SRCS) > $(@) && " +
"echo >> $(@) && " +
"echo " + to_append + " >> $(@)")
def tf_version_info_genrule():
native.genrule(
name = "version_info_gen",
srcs = [
"@org_tensorflow//tensorflow/tools/git:gen/spec.json",
"@org_tensorflow//tensorflow/tools/git:gen/head",
"@org_tensorflow//tensorflow/tools/git:gen/branch_ref",
],
outs = ["util/version_info.cc"],
cmd = "$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS) \"$@\"",
local = 1,
tools = ["@org_tensorflow//tensorflow/tools/git:gen_git_source.py"],
)
def cc_library_with_android_deps(deps, android_deps=[],
common_deps=[], **kwargs):
deps = if_not_android(deps) + if_android(android_deps) + common_deps
native.cc_library(deps=deps, **kwargs)
......@@ -9,9 +9,10 @@ py_binary(
name = "conll_checkpoint_converter",
srcs = ["conll_checkpoint_converter.py"],
deps = [
"//dragnn/protos:spec_py_pb2",
"//dragnn/protos:spec_pb2_py",
"//dragnn/python:dragnn_model_saver_lib",
"//dragnn/python:spec_builder",
"@absl_py//absl/flags",
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
],
......@@ -28,6 +29,7 @@ py_binary(
":components",
"//dragnn/python:evaluation",
"//dragnn/python:spec_builder",
"@absl_py//absl/flags",
],
)
......@@ -43,6 +45,7 @@ py_binary(
"//dragnn/python:dragnn_ops",
"//dragnn/python:evaluation",
"//dragnn/python:spec_builder",
"@absl_py//absl/flags",
],
)
......@@ -58,6 +61,7 @@ py_binary(
"//dragnn/python:dragnn_ops",
"//dragnn/python:evaluation",
"//dragnn/python:spec_builder",
"@absl_py//absl/flags",
],
)
......@@ -73,6 +77,7 @@ py_binary(
"//dragnn/python:dragnn_ops",
"//dragnn/python:evaluation",
"//dragnn/python:spec_builder",
"@absl_py//absl/flags",
],
)
......@@ -86,7 +91,8 @@ py_binary(
"//dragnn/python:lexicon",
"//dragnn/python:spec_builder",
"//dragnn/python:trainer_lib",
"//syntaxnet:task_spec_py_pb2",
"//syntaxnet:task_spec_pb2_py",
"@absl_py//absl/flags",
],
)
......@@ -100,7 +106,9 @@ py_binary(
"//dragnn/python:lexicon",
"//dragnn/python:spec_builder",
"//dragnn/python:trainer_lib",
"//syntaxnet:task_spec_py_pb2",
"//syntaxnet:task_spec_pb2_py",
"@absl_py//absl:app",
"@absl_py//absl/flags",
],
)
......@@ -110,13 +118,14 @@ py_binary(
deps = [
"//dragnn/core:dragnn_bulk_ops",
"//dragnn/core:dragnn_ops",
"//dragnn/protos:spec_py_pb2",
"//dragnn/protos:spec_pb2_py",
"//dragnn/python:evaluation",
"//dragnn/python:graph_builder",
"//dragnn/python:sentence_io",
"//dragnn/python:spec_builder",
"//dragnn/python:trainer_lib",
"//syntaxnet:parser_ops",
"@absl_py//absl/flags",
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
],
......@@ -128,7 +137,7 @@ py_binary(
deps = [
"//dragnn/core:dragnn_bulk_ops",
"//dragnn/core:dragnn_ops",
"//dragnn/protos:spec_py_pb2",
"//dragnn/protos:spec_pb2_py",
"//dragnn/python:dragnn_ops",
"//dragnn/python:evaluation",
"//dragnn/python:graph_builder",
......@@ -136,9 +145,11 @@ py_binary(
"//dragnn/python:spec_builder",
"//dragnn/python:trainer_lib",
"//syntaxnet:parser_ops",
"//syntaxnet:sentence_py_pb2",
"//syntaxnet:task_spec_py_pb2",
"//syntaxnet:sentence_pb2_py",
"//syntaxnet:task_spec_pb2_py",
"//syntaxnet/util:check",
"@absl_py//absl:app",
"@absl_py//absl/flags",
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
],
......@@ -190,11 +201,11 @@ py_library(
deps = [
"//dragnn/core:dragnn_bulk_ops",
"//dragnn/core:dragnn_ops",
"//dragnn/protos:spec_py_pb2",
"//dragnn/protos:spec_pb2_py",
"//dragnn/python:graph_builder",
"//dragnn/python:sentence_io",
"//syntaxnet:parser_ops",
"//syntaxnet:sentence_py_pb2",
"//syntaxnet:sentence_pb2_py",
"@org_tensorflow//tensorflow:tensorflow_py",
"@org_tensorflow//tensorflow/core:protos_all_py",
],
......@@ -215,6 +226,6 @@ py_library(
"//dragnn/python:spec_builder",
"//dragnn/python:trainer_lib",
"//dragnn/python:visualization",
"//syntaxnet:task_spec_py_pb2",
"//syntaxnet:task_spec_pb2_py",
],
)
......@@ -25,6 +25,7 @@ from __future__ import division
from __future__ import print_function
import os
from absl import flags
import tensorflow as tf
from google.protobuf import text_format
......@@ -32,7 +33,6 @@ from dragnn.protos import spec_pb2
from dragnn.python import dragnn_model_saver_lib as saver_lib
from dragnn.python import spec_builder
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('master_spec', None, 'Path to task context with '
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment