Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
80178fc6
Unverified
Commit
80178fc6
authored
May 11, 2018
by
Mark Omernick
Committed by
GitHub
May 11, 2018
Browse files
Merge pull request #4153 from terryykoo/master
Export @195097388.
parents
a84e1ef9
edea2b67
Changes
145
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2756 additions
and
1409 deletions
+2756
-1409
research/syntaxnet/dragnn/python/mst_ops.py
research/syntaxnet/dragnn/python/mst_ops.py
+197
-0
research/syntaxnet/dragnn/python/mst_ops_test.py
research/syntaxnet/dragnn/python/mst_ops_test.py
+391
-0
research/syntaxnet/dragnn/python/mst_units.py
research/syntaxnet/dragnn/python/mst_units.py
+164
-0
research/syntaxnet/dragnn/python/mst_units_test.py
research/syntaxnet/dragnn/python/mst_units_test.py
+261
-0
research/syntaxnet/dragnn/python/network_units.py
research/syntaxnet/dragnn/python/network_units.py
+555
-155
research/syntaxnet/dragnn/python/network_units_test.py
research/syntaxnet/dragnn/python/network_units_test.py
+337
-71
research/syntaxnet/dragnn/python/perf_test_data/master-spec
research/syntaxnet/dragnn/python/perf_test_data/master-spec
+0
-171
research/syntaxnet/dragnn/python/perf_test_data/params
research/syntaxnet/dragnn/python/perf_test_data/params
+0
-0
research/syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
...syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
+0
-0
research/syntaxnet/dragnn/python/runtime_support.py
research/syntaxnet/dragnn/python/runtime_support.py
+363
-0
research/syntaxnet/dragnn/python/runtime_support_test.py
research/syntaxnet/dragnn/python/runtime_support_test.py
+341
-0
research/syntaxnet/dragnn/python/sentence_io_test.py
research/syntaxnet/dragnn/python/sentence_io_test.py
+4
-15
research/syntaxnet/dragnn/python/spec_builder.py
research/syntaxnet/dragnn/python/spec_builder.py
+3
-2
research/syntaxnet/dragnn/python/spec_builder_test.py
research/syntaxnet/dragnn/python/spec_builder_test.py
+0
-9
research/syntaxnet/dragnn/python/trainer_lib.py
research/syntaxnet/dragnn/python/trainer_lib.py
+56
-27
research/syntaxnet/dragnn/python/trainer_lib_test.py
research/syntaxnet/dragnn/python/trainer_lib_test.py
+61
-0
research/syntaxnet/dragnn/python/wrapped_units.py
research/syntaxnet/dragnn/python/wrapped_units.py
+1
-1
research/syntaxnet/dragnn/tensorflow_ops.bzl
research/syntaxnet/dragnn/tensorflow_ops.bzl
+0
-947
research/syntaxnet/dragnn/tools/BUILD
research/syntaxnet/dragnn/tools/BUILD
+21
-10
research/syntaxnet/dragnn/tools/conll_checkpoint_converter.py
...arch/syntaxnet/dragnn/tools/conll_checkpoint_converter.py
+1
-1
No files found.
research/syntaxnet/dragnn/python/mst_ops.py
0 → 100644
View file @
80178fc6
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TensorFlow ops for maximum spanning tree problems."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
import
dragnn.python.load_mst_cc_impl
from
dragnn.mst.ops
import
gen_mst_ops
from
dragnn.python
import
digraph_ops
from
syntaxnet.util
import
check
# Re-export the generated MST op.
maximum_spanning_tree
=
gen_mst_ops
.
maximum_spanning_tree
@
tf
.
RegisterGradient
(
"MaximumSpanningTree"
)
def
maximum_spanning_tree_gradient
(
mst_op
,
d_loss_d_max_scores
,
*
_
):
"""Returns a subgradient of the MaximumSpanningTree op.
Note that MaximumSpanningTree is only differentiable w.r.t. its |scores| input
and its |max_scores| output.
Args:
mst_op: The MaximumSpanningTree op being differentiated.
d_loss_d_max_scores: [B] vector where entry b is the gradient of the network
loss w.r.t. entry b of the |max_scores| output of the
|mst_op|.
*_: The gradients w.r.t. the other outputs; ignored.
Returns:
1. None, since the op is not differentiable w.r.t. its |num_nodes| input.
2. [B,M,M] tensor where entry b,t,s is a subgradient of the network loss
w.r.t. entry b,t,s of the |scores| input, with the same dtype as
|d_loss_d_max_scores|.
"""
dtype
=
d_loss_d_max_scores
.
dtype
.
base_dtype
check
.
NotNone
(
dtype
)
argmax_sources_bxm
=
mst_op
.
outputs
[
1
]
input_dim
=
tf
.
shape
(
argmax_sources_bxm
)[
1
]
# M in the docstring
# The one-hot argmax is a subgradient of max. Convert the batch of maximal
# spanning trees into 0/1 indicators, then scale them by the relevant output
# gradients from |d_loss_d_max_scores|. Note that |d_loss_d_max_scores| must
# be reshaped in order for it to broadcast across the batch dimension.
indicators_bxmxm
=
tf
.
one_hot
(
argmax_sources_bxm
,
input_dim
,
dtype
=
dtype
)
d_loss_d_max_scores_bx1
=
tf
.
expand_dims
(
d_loss_d_max_scores
,
-
1
)
d_loss_d_max_scores_bx1x1
=
tf
.
expand_dims
(
d_loss_d_max_scores_bx1
,
-
1
)
d_loss_d_scores_bxmxm
=
indicators_bxmxm
*
d_loss_d_max_scores_bx1x1
return
None
,
d_loss_d_scores_bxmxm
def
log_partition_function
(
num_nodes
,
scores
,
forest
=
False
,
max_dynamic_range
=
None
):
r
"""Returns the log of the sum-of-product of spanning trees or forests.
Computing the sum-of-product in the log domain reduces the chance of overflow
or underflow, and ML techniques (e.g., CRF loss functions) typically require
the log partition function anyways. For similar reasons, the scores input is
assumed to be specified in the log domain.
The partition function is caluclated via application of the Matrix-Tree
theorem; see the following for details:
https://en.wikipedia.org/wiki/Kirchhoff%27s_theorem
http://www.aclweb.org/anthology/D/D07/D07-1015.pdf
Computing the gradient of the log partition function requires inverting the
Laplacian matrix. Numerical issues may occur if the Laplacian is singular or
nearly-so. (Intuitively, the Laplacian will be close to singular when the
input scores strongly favor invalid structures such as cycles). In the EMNLP
paper, we alleviated the numerical issues by clipping the difference between
the minimum and maximum score for each node to 20 (in the log domain). The
|max_dynamic_range| argument can be used for this purpose.
TODO(googleuser): Try improving the condition number of the Laplacian matrix
directly, instead of using the indirect approach above. For example, one
could add c*I to the Laplacian (i.e., Tikhonov regularization).
Args:
num_nodes: [B] vector of graph sizes per batch item.
scores: [B,M,M] tensor of padded batched arc and root scores, in the format
used by the maximum_spanning_tree() op. Padding values must be finite.
forest: If true, sum over spanning forests instead of trees.
max_dynamic_range: If specified, incoming scores for each node are clipped
to at most this far from the maximum such score (in the log domain).
Returns:
[B] vector Z of log partition function values, where
Z[b] = log(
\sum_{tree spanning batch item b}
score(root_of(tree)) \prod_{arc in tree} score(arc))
"""
orig_dtype
=
scores
.
dtype
.
base_dtype
scores_bxmxm
=
tf
.
to_double
(
scores
)
# use doubles to reduce under/overflow
shape_bxmxm
=
tf
.
shape
(
scores_bxmxm
)
batch_size
=
shape_bxmxm
[
0
]
max_nodes
=
shape_bxmxm
[
1
]
total_nodes
=
batch_size
*
max_nodes
# To eliminate overflow, we locally normalize the scores. Specifically, for
# each node we divide its incoming arc scores and root selection score by the
# maximum such score. Since each node in a tree must select exactly one of
# these scores (i.e., it is either a root or has exactly one incoming arc),
# the local normalization factors are identical for all trees and can thus be
# factored out of the sum over trees.
#
# More concretely, we find the maximum per node, divide all scores for that
# node by the maximum, and then find the partition function of the normalized
# scores. Then we recover the un-normalized partition function by multiplying
# the per-node maxima back in. This final step is performed in the log domain
# to avoid overflow.
#
# Note that underflow is still possible, but unlikely as long as the scores
# are close to feasible (i.e., there is not too much mass on non-trees). The
# |max_dynamic_range| argument can be used to mitigate this.
# Finding the maximum incoming score is difficult, because the batch padding
# may contain arbitrary values. We restrict the maximization to valid arcs
# using tf.unsorted_segment_max() with a specially-constructed set of IDs.
_
,
valid_tokens_bxm
=
digraph_ops
.
ValidArcAndTokenMasks
(
num_nodes
,
max_nodes
,
dtype
=
tf
.
int32
)
# Create a tensor of "target IDs". In each row of each sub-matrix, the
# positions of valid source tokens are filled with the 1-origin index of that
# row in the entire batch, and zero elsewhere. For example, given a batch
# with num_nodes=[2, 3] we might have
# [[[1, 1, 0],
# [2, 2, 0],
# [3, 3, 0]],
# [[4, 4, 4],
# [5, 5, 5],
# [6, 6, 6]]]
#
# TODO(googleuser): The dynamic masking is pretty awkward. Find an op that does
# this (I looked, but maybe not hard enough), or write a custom op for this.
valid_tokens_bx1xm
=
tf
.
expand_dims
(
valid_tokens_bxm
,
1
)
valid_sources_bxmxm
=
tf
.
tile
(
valid_tokens_bx1xm
,
[
1
,
max_nodes
,
1
])
sequence_bm
=
1
+
tf
.
range
(
total_nodes
,
dtype
=
tf
.
int32
)
sequence_bxmx1
=
tf
.
reshape
(
sequence_bm
,
[
batch_size
,
max_nodes
,
1
])
target_ids_bxmxm
=
valid_sources_bxmxm
*
sequence_bxmx1
max_scores_bm1
=
tf
.
unsorted_segment_max
(
scores_bxmxm
,
target_ids_bxmxm
,
total_nodes
+
1
)
max_scores_bm
=
max_scores_bm1
[
1
:]
# ID 0 corresponds to padding
# Similar to above, we need to sum over the valid tokens. We analogously use
# tf.unsorted_segment_sum() with a specially-constructed set of "batch IDs".
sequence_b
=
1
+
tf
.
range
(
batch_size
,
dtype
=
tf
.
int32
)
sequence_bx1
=
tf
.
expand_dims
(
sequence_b
,
1
)
batch_ids_bxm
=
valid_tokens_bxm
*
sequence_bx1
batch_ids_bm
=
tf
.
reshape
(
batch_ids_bxm
,
[
-
1
])
log_normalization_factor_b1
=
tf
.
unsorted_segment_sum
(
max_scores_bm
,
batch_ids_bm
,
batch_size
+
1
)
log_normalization_factor_b
=
log_normalization_factor_b1
[
1
:]
# Locally-normalize and optionally clip the scores.
max_scores_bxmx1
=
tf
.
reshape
(
max_scores_bm
,
[
batch_size
,
max_nodes
,
1
])
scores_bxmxm
-=
max_scores_bxmx1
if
max_dynamic_range
is
not
None
:
# After normalization, the scores are non-positive with max=0, so the
# |max_dynamic_range| can be applied directly.
#
# PyLint thinks "-max_dynamic_range" is invalid because it defaults to None.
scores_bxmxm
=
tf
.
maximum
(
scores_bxmxm
,
-
max_dynamic_range
)
scores_bxmxm
=
tf
.
exp
(
scores_bxmxm
)
# Apply the Matrix-Tree theorem.
exp_normalized_laplacian_bxmxm
=
digraph_ops
.
LaplacianMatrix
(
num_nodes
,
scores_bxmxm
,
forest
=
forest
)
log_normalized_partition_function_b
=
tf
.
log
(
tf
.
matrix_determinant
(
exp_normalized_laplacian_bxmxm
))
# Reapply the normalization factor that was divided out.
log_partition_function_b
=
(
log_normalized_partition_function_b
+
log_normalization_factor_b
)
return
tf
.
cast
(
log_partition_function_b
,
orig_dtype
)
research/syntaxnet/dragnn/python/mst_ops_test.py
0 → 100644
View file @
80178fc6
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for maximum spanning tree ops."""
import
math
import
numpy
as
np
import
tensorflow
as
tf
from
dragnn.python
import
mst_ops
class
MstOpsTest
(
tf
.
test
.
TestCase
):
"""Testing rig."""
def
testMaximumSpanningTree
(
self
):
"""Tests that the MST op can recover a simple tree."""
with
self
.
test_session
()
as
session
:
# The first batch element prefers 3 as root, then 3->0->1->2, for a total
# score of 4+2+1=7. The second batch element is smaller and has reversed
# scores, so 0 is root and 0->2->1.
num_nodes
=
tf
.
constant
([
4
,
3
],
tf
.
int32
)
scores
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
2
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
[[
4
,
3
,
2
,
9
],
[
0
,
0
,
2
,
9
],
[
0
,
0
,
0
,
9
],
[
9
,
9
,
9
,
9
]]],
tf
.
int32
)
# pyformat: disable
mst_outputs
=
mst_ops
.
maximum_spanning_tree
(
num_nodes
,
scores
,
forest
=
False
)
max_scores
,
argmax_sources
=
session
.
run
(
mst_outputs
)
tf
.
logging
.
info
(
'
\n
max_scores=%s
\n
argmax_sources=
\n
%s'
,
max_scores
,
argmax_sources
)
self
.
assertAllEqual
(
max_scores
,
[
7
,
6
])
self
.
assertAllEqual
(
argmax_sources
,
[[
3
,
0
,
1
,
3
],
[
0
,
2
,
0
,
-
1
]])
# pyformat: disable
def
testMaximumSpanningTreeGradient
(
self
):
"""Tests the MST max score gradient."""
with
self
.
test_session
()
as
session
:
num_nodes
=
tf
.
constant
([
4
,
3
],
tf
.
int32
)
scores
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
2
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
[[
4
,
3
,
2
,
9
],
[
0
,
0
,
2
,
9
],
[
0
,
0
,
0
,
9
],
[
9
,
9
,
9
,
9
]]],
tf
.
int32
)
# pyformat: disable
mst_ops
.
maximum_spanning_tree
(
num_nodes
,
scores
,
forest
=
False
,
name
=
'MST'
)
mst_op
=
session
.
graph
.
get_operation_by_name
(
'MST'
)
d_loss_d_max_scores
=
tf
.
constant
([
3
,
7
],
tf
.
float32
)
d_loss_d_num_nodes
,
d_loss_d_scores
=
(
mst_ops
.
maximum_spanning_tree_gradient
(
mst_op
,
d_loss_d_max_scores
))
# The num_nodes input is non-differentiable.
self
.
assertTrue
(
d_loss_d_num_nodes
is
None
)
tf
.
logging
.
info
(
'
\n
d_loss_d_scores=
\n
%s'
,
d_loss_d_scores
.
eval
())
self
.
assertAllEqual
(
d_loss_d_scores
.
eval
(),
[[[
0
,
0
,
0
,
3
],
[
3
,
0
,
0
,
0
],
[
0
,
3
,
0
,
0
],
[
0
,
0
,
0
,
3
]],
[[
7
,
0
,
0
,
0
],
[
0
,
0
,
7
,
0
],
[
7
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
# pyformat: disable
def
testMaximumSpanningTreeGradientError
(
self
):
"""Numerically validates the max score gradient."""
with
self
.
test_session
():
# The maximum-spanning-tree-score function, as a max of linear functions,
# is piecewise-linear (i.e., faceted). The numerical gradient estimate
# may be inaccurate if the epsilon ball used for the estimate crosses an
# edge from one facet to another. To avoid spurious errors, we manually
# set the sample point so the epsilon ball fits in a facet. Or in other
# words, we set the scores so there is a non-trivial margin between the
# best and second-best trees.
scores_raw
=
[[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
2
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
[[
4
,
3
,
2
,
9
],
[
0
,
0
,
2
,
9
],
[
0
,
0
,
0
,
9
],
[
9
,
9
,
9
,
9
]]]
# pyformat: disable
# Use 64-bit floats to reduce numerical error.
scores
=
tf
.
constant
(
scores_raw
,
tf
.
float64
)
init_scores
=
np
.
array
(
scores_raw
)
num_nodes
=
tf
.
constant
([
4
,
3
],
tf
.
int32
)
max_scores
=
mst_ops
.
maximum_spanning_tree
(
num_nodes
,
scores
,
forest
=
False
)[
0
]
gradient_error
=
tf
.
test
.
compute_gradient_error
(
scores
,
[
2
,
4
,
4
],
max_scores
,
[
2
],
init_scores
)
tf
.
logging
.
info
(
'gradient_error=%s'
,
gradient_error
)
def
testLogPartitionFunctionOneTree
(
self
):
"""Tests the log partition function with one feasible tree with score 1."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# Each score matrix supports exactly one tree with score=1*1*1, and
# the rest with score=0. Thus the log partition function will be 1.0
# in each case.
pad
=
12345.6
scores
=
tf
.
constant
([[[
1
,
pad
,
pad
],
[
pad
,
pad
,
pad
],
[
pad
,
pad
,
pad
]],
[[
1
,
0
,
pad
],
[
1
,
0
,
pad
],
[
pad
,
pad
,
pad
]],
[[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
1
,
0
]]],
tf
.
float64
)
# pyformat: disable
scores
=
tf
.
log
(
scores
)
num_nodes
=
tf
.
constant
([
1
,
2
,
3
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
0
]).
eval
(),
1.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
1
]).
eval
(),
1.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
2
]).
eval
(),
1.0
)
def
testLogPartitionFunctionOneTreeScaled
(
self
):
"""Tests the log partition function with one feasible tree."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# Each score matrix supports exactly one tree with varying score, and
# the rest with score=0. Thus the log partition function will equal
# the score of that single tree in each case.
pad
=
12345.6
scores
=
tf
.
constant
([[[
2
,
pad
,
pad
],
[
pad
,
pad
,
pad
],
[
pad
,
pad
,
pad
]],
[[
3
,
0
,
pad
],
[
5
,
0
,
pad
],
[
pad
,
pad
,
pad
]],
[[
7
,
0
,
0
],
[
11
,
0
,
0
],
[
0
,
13
,
0
]]],
tf
.
float64
)
# pyformat: disable
scores
=
tf
.
log
(
scores
)
num_nodes
=
tf
.
constant
([
1
,
2
,
3
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
0
]).
eval
(),
2.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
1
]).
eval
(),
3.0
*
5.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
2
]).
eval
(),
7.0
*
11.0
*
13.0
)
def
testLogPartitionFunctionTwoTreesScaled
(
self
):
"""Tests the log partition function with two feasible trees."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# Each score matrix supports exactly two trees with varying score, and
# the rest with score=0. Thus the log partition function will equal
# the sum of scores of those two trees in each case.
pad
=
12345.6
scores
=
tf
.
constant
([[[
2
,
0
,
0
,
pad
],
[
3
,
0
,
0
,
pad
],
[
5
,
7
,
0
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
11
,
0
,
13
],
[
0
,
17
,
0
,
0
],
[
0
,
19
,
0
,
0
],
[
0
,
23
,
0
,
0
]]],
tf
.
float64
)
# pyformat: disable
scores
=
tf
.
log
(
scores
)
num_nodes
=
tf
.
constant
([
3
,
4
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
0
]).
eval
(),
2.0
*
3.0
*
5.0
+
2.0
*
3.0
*
7.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
1
]).
eval
(),
11.0
*
17.0
*
19.0
*
23.0
+
13.0
*
17.0
*
19.0
*
23.0
)
def
testLogPartitionFunctionInfeasible
(
self
):
"""Tests the log partition function on infeasible scores."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# The scores form cycles of various sizes. Note that one can compute
# the partition function for infeasible scores---it's the gradient that
# may be impacted by numerical error.
pad
=
12345.6
scores
=
tf
.
constant
([[[
0
,
1
,
pad
,
pad
],
[
1
,
0
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
pad
],
[
0
,
0
,
1
,
pad
],
[
1
,
0
,
0
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
]]],
tf
.
float64
)
# pyformat: disable
scores
=
tf
.
log
(
scores
)
num_nodes
=
tf
.
constant
([
2
,
3
,
4
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
0
]).
eval
(),
0.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
1
]).
eval
(),
0.0
)
self
.
assertAlmostEqual
(
tf
.
exp
(
log_partition_functions
[
2
]).
eval
(),
0.0
)
def
testLogPartitionFunctionAllTrees
(
self
):
"""Tests the log partition function with all trees feasible."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# The scores allow all trees. Using Cayley's formula, the
# number of directed spanning trees and forests in a complete
# digraph of n nodes is n^{n-1} and (n+1)^{n-1}, respectively.
# https://en.wikipedia.org/wiki/Cayley%27s_formula
scores
=
tf
.
zeros
([
10
,
10
,
10
],
tf
.
float64
)
# = 1 in log domain
num_nodes
=
tf
.
range
(
1
,
11
,
dtype
=
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
base_offset
=
1
if
forest
else
0
# n+1 for forest, n for tree
for
size
in
range
(
1
,
11
):
self
.
assertAlmostEqual
(
log_partition_functions
[
size
-
1
].
eval
(),
(
size
-
1
)
*
math
.
log
(
size
+
base_offset
))
def
testLogPartitionFunctionWithVeryHighValues
(
self
):
"""Tests the overflow protection in the log partition function."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# Set the scores to very high values to test overflow protection.
scores
=
1000
*
tf
.
ones
([
10
,
10
,
10
],
tf
.
float64
)
num_nodes
=
tf
.
range
(
1
,
11
,
dtype
=
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
base_offset
=
1
if
forest
else
0
# n+1 for forest, n for tree
for
size
in
range
(
1
,
11
):
self
.
assertAlmostEqual
(
log_partition_functions
[
size
-
1
].
eval
(),
(
size
-
1
)
*
math
.
log
(
size
+
base_offset
)
+
size
*
1000
)
def
testLogPartitionFunctionWithVeryLowValues
(
self
):
"""Tests the underflow protection in the log partition function."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# Set the scores to very low values to test underflow protection.
scores
=
-
1000
*
tf
.
ones
([
10
,
10
,
10
],
tf
.
float64
)
num_nodes
=
tf
.
range
(
1
,
11
,
dtype
=
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
base_offset
=
1
if
forest
else
0
# n+1 for forest, n for tree
for
size
in
range
(
1
,
11
):
self
.
assertAlmostEqual
(
log_partition_functions
[
size
-
1
].
eval
(),
(
size
-
1
)
*
math
.
log
(
size
+
base_offset
)
-
size
*
1000
)
def
testLogPartitionFunctionGradientError
(
self
):
"""Validates the log partition function gradient."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# To avoid numerical issues, provide score matrices that are weighted
# towards feasible trees or forests.
scores_raw
=
[[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
2
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
[[
4
,
3
,
2
,
9
],
[
0
,
0
,
2
,
9
],
[
0
,
0
,
0
,
9
],
[
9
,
9
,
9
,
9
]]]
# pyformat: disable
scores
=
tf
.
constant
(
scores_raw
,
tf
.
float64
)
init_scores
=
np
.
array
(
scores_raw
)
num_nodes
=
tf
.
constant
([
4
,
3
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
gradient_error
=
tf
.
test
.
compute_gradient_error
(
scores
,
[
2
,
4
,
4
],
log_partition_functions
,
[
2
],
init_scores
)
tf
.
logging
.
info
(
'forest=%s gradient_error=%s'
,
forest
,
gradient_error
)
self
.
assertLessEqual
(
gradient_error
,
1e-7
)
def
testLogPartitionFunctionGradientErrorFailsIfInfeasible
(
self
):
"""Tests that the partition function gradient fails on infeasible scores."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# The scores form cycles of various sizes.
pad
=
12345.6
scores_raw
=
[[[
0
,
1
,
pad
,
pad
],
[
1
,
0
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
pad
],
[
0
,
0
,
1
,
pad
],
[
1
,
0
,
0
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
]]]
# pyformat: disable
scores
=
tf
.
log
(
scores_raw
)
init_scores
=
np
.
log
(
np
.
array
(
scores_raw
))
num_nodes
=
tf
.
constant
([
2
,
3
,
4
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
)
with
self
.
assertRaises
(
Exception
):
tf
.
test
.
compute_gradient_error
(
scores
,
[
3
,
4
,
4
],
log_partition_functions
,
[
3
],
init_scores
)
def
testLogPartitionFunctionGradientErrorOkIfInfeasibleWithClipping
(
self
):
"""Tests that the log partition function gradient is OK after clipping."""
with
self
.
test_session
():
for
forest
in
[
False
,
True
]:
# The scores form cycles of various sizes.
pad
=
12345.6
scores_raw
=
[[[
0
,
1
,
pad
,
pad
],
[
1
,
0
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
pad
],
[
0
,
0
,
1
,
pad
],
[
1
,
0
,
0
,
pad
],
[
pad
,
pad
,
pad
,
pad
]],
[[
0
,
1
,
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
]]]
# pyformat: disable
scores
=
tf
.
log
(
scores_raw
)
init_scores
=
np
.
log
(
np
.
array
(
scores_raw
))
num_nodes
=
tf
.
constant
([
2
,
3
,
4
],
tf
.
int32
)
log_partition_functions
=
mst_ops
.
log_partition_function
(
num_nodes
,
scores
,
forest
=
forest
,
max_dynamic_range
=
10
)
gradient_error
=
tf
.
test
.
compute_gradient_error
(
scores
,
[
3
,
4
,
4
],
log_partition_functions
,
[
3
],
init_scores
)
tf
.
logging
.
info
(
'forest=%s gradient_error=%s'
,
forest
,
gradient_error
)
# There's still a lot of error.
self
.
assertLessEqual
(
gradient_error
,
1e-3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/syntaxnet/dragnn/python/mst_units.py
0 → 100644
View file @
80178fc6
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DRAGNN wrappers for the MST solver."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
dragnn.python
import
mst_ops
from
dragnn.python
import
network_units
from
syntaxnet.util
import
check
class
MstSolverNetwork
(
network_units
.
NetworkUnitInterface
):
"""Network unit that performs MST prediction with structured loss.
Parameters:
forest: If true, solve for a spanning forest instead of a spanning tree.
loss: The loss function for training. Select from
softmax: Default unstructured softmax (prediction is still structured).
m3n: Max-Margin Markov Networks loss.
crf_max_dynamic_range: Max dynamic range for the log partition function.
Links:
lengths: [B, 1] sequence lengths per batch item.
scores: [B * N, N] matrix of padded batched arc scores.
Layers:
lengths: [B] sequence lengths per batch item.
scores: [B, N, N] tensor of padded batched arc scores.
logits: [B * N, N] matrix of padded batched arc scores.
arcs: [B * N, N] matrix of padded batched 0/1 indicators for MST arcs.
"""
def
__init__
(
self
,
component
):
"""Initializes layers.
Args:
component: Parent ComponentBuilderBase object.
"""
layers
=
[
network_units
.
Layer
(
self
,
'lengths'
,
-
1
),
network_units
.
Layer
(
self
,
'scores'
,
-
1
),
network_units
.
Layer
(
self
,
'logits'
,
-
1
),
network_units
.
Layer
(
self
,
'arcs'
,
-
1
),
]
super
(
MstSolverNetwork
,
self
).
__init__
(
component
,
init_layers
=
layers
)
self
.
_attrs
=
network_units
.
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
'forest'
:
False
,
'loss'
:
'softmax'
,
'crf_max_dynamic_range'
:
20
,
})
check
.
Eq
(
len
(
self
.
_fixed_feature_dims
.
items
()),
0
,
'Expected no fixed features'
)
check
.
Eq
(
len
(
self
.
_linked_feature_dims
.
items
()),
2
,
'Expected two linked features'
)
check
.
In
(
'lengths'
,
self
.
_linked_feature_dims
,
'Missing required linked feature'
)
check
.
In
(
'scores'
,
self
.
_linked_feature_dims
,
'Missing required linked feature'
)
def
create
(
self
,
fixed_embeddings
,
linked_embeddings
,
context_tensor_arrays
,
attention_tensor
,
during_training
,
stride
=
None
):
"""Forwards the lengths and scores."""
check
.
NotNone
(
stride
,
'MstSolverNetwork requires stride'
)
lengths
=
network_units
.
lookup_named_tensor
(
'lengths'
,
linked_embeddings
)
lengths_b
=
tf
.
to_int32
(
tf
.
squeeze
(
lengths
.
tensor
,
[
1
]))
scores
=
network_units
.
lookup_named_tensor
(
'scores'
,
linked_embeddings
)
scores_bnxn
=
scores
.
tensor
max_length
=
tf
.
shape
(
scores_bnxn
)[
1
]
scores_bxnxn
=
tf
.
reshape
(
scores_bnxn
,
[
stride
,
max_length
,
max_length
])
_
,
argmax_sources_bxn
=
mst_ops
.
maximum_spanning_tree
(
forest
=
self
.
_attrs
[
'forest'
],
num_nodes
=
lengths_b
,
scores
=
scores_bxnxn
)
argmax_sources_bn
=
tf
.
reshape
(
argmax_sources_bxn
,
[
-
1
])
arcs_bnxn
=
tf
.
one_hot
(
argmax_sources_bn
,
max_length
,
dtype
=
tf
.
float32
)
return
[
lengths_b
,
scores_bxnxn
,
scores_bnxn
,
arcs_bnxn
]
def
get_logits
(
self
,
network_tensors
):
return
network_tensors
[
self
.
get_layer_index
(
'logits'
)]
def
get_bulk_predictions
(
self
,
stride
,
network_tensors
):
return
network_tensors
[
self
.
get_layer_index
(
'arcs'
)]
def
compute_bulk_loss
(
self
,
stride
,
network_tensors
,
gold
):
"""See base class."""
if
self
.
_attrs
[
'loss'
]
==
'softmax'
:
return
(
None
,
None
,
None
)
# fall back to default bulk softmax
lengths_b
,
scores_bxnxn
,
_
,
arcs_bnxn
=
network_tensors
max_length
=
tf
.
shape
(
scores_bxnxn
)[
2
]
arcs_bxnxn
=
tf
.
reshape
(
arcs_bnxn
,
[
stride
,
max_length
,
max_length
])
gold_bxn
=
tf
.
reshape
(
gold
,
[
stride
,
max_length
])
gold_bxnxn
=
tf
.
one_hot
(
gold_bxn
,
max_length
,
dtype
=
tf
.
float32
)
loss
=
self
.
_compute_loss
(
lengths_b
,
scores_bxnxn
,
gold_bxnxn
)
correct
=
tf
.
reduce_sum
(
tf
.
to_int32
(
arcs_bxnxn
*
gold_bxnxn
))
total
=
tf
.
reduce_sum
(
lengths_b
)
return
loss
,
correct
,
total
def
_compute_loss
(
self
,
lengths
,
scores
,
gold
):
"""Computes the configured structured loss for a batch.
Args:
lengths: [B] sequence lengths per batch item.
scores: [B, N, N] tensor of padded batched arc scores.
gold: [B, N, N] tensor of 0/1 indicators for gold arcs.
Returns:
Scalar sum of losses across the batch.
"""
# Dispatch to one of the _compute_*_loss() methods.
method_name
=
'_compute_%s_loss'
%
self
.
_attrs
[
'loss'
]
loss_b
=
getattr
(
self
,
method_name
)(
lengths
,
scores
,
gold
)
return
tf
.
reduce_sum
(
loss_b
)
def
_compute_m3n_loss
(
self
,
lengths
,
scores
,
gold
):
"""Computes the M3N-style structured hinge loss for a batch."""
# Perform hamming-loss-augmented inference.
gold_scores_b
=
tf
.
reduce_sum
(
scores
*
gold
,
axis
=
[
1
,
2
])
hamming_loss_bxnxn
=
1
-
gold
scores_bxnxn
=
scores
+
hamming_loss_bxnxn
max_scores_b
,
_
=
mst_ops
.
maximum_spanning_tree
(
num_nodes
=
lengths
,
scores
=
scores_bxnxn
,
forest
=
self
.
_attrs
[
'forest'
])
return
max_scores_b
-
gold_scores_b
def
_compute_crf_loss
(
self
,
lengths
,
scores
,
gold
):
"""Computes the negative CRF log-probability for a batch."""
# The |scores| are assumed to be in the log domain.
log_gold_scores_b
=
tf
.
reduce_sum
(
scores
*
gold
,
axis
=
[
1
,
2
])
log_partition_functions_b
=
mst_ops
.
log_partition_function
(
num_nodes
=
lengths
,
scores
=
scores
,
forest
=
self
.
_attrs
[
'forest'
],
max_dynamic_range
=
self
.
_attrs
[
'crf_max_dynamic_range'
])
return
log_partition_functions_b
-
log_gold_scores_b
# negative log-prob
research/syntaxnet/dragnn/python/mst_units_test.py
0 → 100644
View file @
80178fc6
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for DRAGNN wrappers for the MST solver."""
import
math
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
mst_units
from
dragnn.python
import
network_units
_MASTER_SPEC
=
r
"""
component {
name: 'test'
linked_feature {
name: 'lengths'
size: 1
embedding_dim: -1
fml: 'input.focus'
source_translator: 'identity'
source_component: 'previous'
source_layer: 'lengths'
}
linked_feature {
name: 'scores'
size: 1
embedding_dim: -1
fml: 'input.focus'
source_translator: 'identity'
source_component: 'previous'
source_layer: 'scores'
}
}
"""
class
MockNetwork
(
object
):
def
get_layer_size
(
self
,
unused_name
):
return
-
1
class
MockComponent
(
object
):
def
__init__
(
self
,
master
,
component_spec
):
self
.
master
=
master
self
.
spec
=
component_spec
self
.
name
=
component_spec
.
name
self
.
beam_size
=
1
self
.
num_actions
=
-
1
self
.
network
=
MockNetwork
()
class
MockMaster
(
object
):
def
__init__
(
self
,
build_runtime_graph
=
False
):
self
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
_MASTER_SPEC
,
self
.
spec
)
self
.
hyperparams
=
spec_pb2
.
GridPoint
()
self
.
lookup_component
=
{
'previous'
:
MockComponent
(
self
,
spec_pb2
.
ComponentSpec
())
}
self
.
build_runtime_graph
=
build_runtime_graph
class
MstSolverNetworkTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf
.
reset_default_graph
()
def
testCreate
(
self
):
with
self
.
test_session
():
master
=
MockMaster
()
component
=
MockComponent
(
master
,
master
.
spec
.
component
[
0
])
component
.
network
=
mst_units
.
MstSolverNetwork
(
component
)
stride
=
1
lengths
=
tf
.
constant
([[
3
]],
dtype
=
tf
.
int64
)
scores
=
tf
.
constant
([[
1.0
,
0.5
,
0.5
],
[
2.0
,
0.5
,
0.5
],
[
0.5
,
3.0
,
0.5
]],
dtype
=
tf
.
float32
)
# pyformat: disable
linked_embeddings
=
[
network_units
.
NamedTensor
(
lengths
,
'lengths'
),
network_units
.
NamedTensor
(
scores
,
'scores'
)
]
network_tensors
=
component
.
network
.
create
([],
linked_embeddings
,
[],
None
,
False
,
stride
)
self
.
assertAllEqual
(
network_tensors
[
0
].
eval
(),
[
3
])
self
.
assertAllEqual
(
network_tensors
[
1
].
eval
(),
[[[
1.0
,
0.5
,
0.5
],
[
2.0
,
0.5
,
0.5
],
[
0.5
,
3.0
,
0.5
]]])
# pyformat: disable
self
.
assertAllEqual
(
network_tensors
[
2
].
eval
(),
[[
1.0
,
0.5
,
0.5
],
[
2.0
,
0.5
,
0.5
],
[
0.5
,
3.0
,
0.5
]])
# pyformat: disable
self
.
assertAllEqual
(
network_tensors
[
3
].
eval
(),
[[
1.0
,
0.0
,
0.0
],
[
1.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
]])
# pyformat: disable
def
testGetBulkPredictions
(
self
):
with
self
.
test_session
():
master
=
MockMaster
()
component
=
MockComponent
(
master
,
master
.
spec
.
component
[
0
])
component
.
network
=
mst_units
.
MstSolverNetwork
(
component
)
stride
=
2
lengths
=
tf
.
constant
([[
2
],
[
3
]],
dtype
=
tf
.
int64
)
pad
=
-
12345.6
scores
=
tf
.
constant
([[
1.0
,
2.0
,
pad
],
[
1.8
,
2.0
,
pad
],
[
pad
,
pad
,
pad
],
[
3.8
,
4.0
,
3.9
],
[
3.9
,
3.8
,
4.0
],
[
3.8
,
0.9
,
4.0
]],
dtype
=
tf
.
float32
)
# pyformat: disable
linked_embeddings
=
[
network_units
.
NamedTensor
(
lengths
,
'lengths'
),
network_units
.
NamedTensor
(
scores
,
'scores'
)
]
network_tensors
=
component
.
network
.
create
([],
linked_embeddings
,
[],
None
,
False
,
stride
)
predictions
=
component
.
network
.
get_bulk_predictions
(
stride
,
network_tensors
)
self
.
assertAllEqual
(
predictions
.
eval
(),
[[
0.0
,
1.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
[
0.0
,
0.0
,
0.0
],
[
0.0
,
1.0
,
0.0
],
[
0.0
,
0.0
,
1.0
],
[
0.0
,
0.0
,
1.0
]])
# pyformat: disable
def
testComputeBulkLossM3n
(
self
):
with
self
.
test_session
():
master
=
MockMaster
()
component
=
MockComponent
(
master
,
master
.
spec
.
component
[
0
])
component
.
spec
.
network_unit
.
parameters
[
'loss'
]
=
'm3n'
component
.
network
=
mst_units
.
MstSolverNetwork
(
component
)
stride
=
2
lengths
=
tf
.
constant
([[
2
],
[
3
]],
dtype
=
tf
.
int64
)
# Note that these scores are large enough to overcome the +1 hamming loss
# terms in the M3N loss. Therefore, the score matrix determines the tree
# that is used to compute the M3N loss.
pad
=
-
12345.6
scores
=
tf
.
constant
([[
0.5
,
2.0
,
pad
],
[
0.5
,
2.0
,
pad
],
[
pad
,
pad
,
pad
],
[
2.5
,
4.0
,
2.5
],
[
2.5
,
2.5
,
4.0
],
[
2.5
,
2.5
,
4.0
]],
dtype
=
tf
.
float32
)
# pyformat: disable
# For the first tree, the gold and scores agree on one arc (that index 1
# is a root), and for the second tree, the gold and scores agree on none
# of the arcs. Therefore, we expect +1 and +3 for the first and second
# trees in the M3N loss.
gold
=
tf
.
constant
([
0
,
1
,
-
1
,
0
,
0
,
1
],
tf
.
int32
)
first_gold_score
=
0.5
+
2.0
second_gold_score
=
2.5
+
2.5
+
2.5
first_tree_correct
=
1
second_tree_correct
=
0
first_tree_loss
=
2
*
2.0
+
2
-
first_tree_correct
-
first_gold_score
second_tree_loss
=
3
*
4.0
+
3
-
second_tree_correct
-
second_gold_score
linked_embeddings
=
[
network_units
.
NamedTensor
(
lengths
,
'lengths'
),
network_units
.
NamedTensor
(
scores
,
'scores'
)
]
network_tensors
=
component
.
network
.
create
([],
linked_embeddings
,
[],
None
,
False
,
stride
)
cost
,
correct
,
total
=
component
.
network
.
compute_bulk_loss
(
stride
,
network_tensors
,
gold
)
self
.
assertEqual
(
cost
.
eval
(),
first_tree_loss
+
second_tree_loss
)
self
.
assertEqual
(
correct
.
eval
(),
first_tree_correct
+
second_tree_correct
)
self
.
assertEqual
(
total
.
eval
(),
2
+
3
)
def
testComputeBulkLossCrf
(
self
):
with
self
.
test_session
():
master
=
MockMaster
()
component
=
MockComponent
(
master
,
master
.
spec
.
component
[
0
])
component
.
spec
.
network_unit
.
parameters
[
'loss'
]
=
'crf'
component
.
network
=
mst_units
.
MstSolverNetwork
(
component
)
stride
=
2
lengths
=
tf
.
constant
([[
2
],
[
3
]],
dtype
=
tf
.
int64
)
# These scores have 2.0 (in the log domain) on the gold arcs and 1.0
# elsewhere.
pad
=
-
12345.6
one
=
math
.
log
(
1.0
)
two
=
math
.
log
(
2.0
)
scores
=
tf
.
constant
([[
one
,
two
,
pad
],
[
one
,
two
,
pad
],
[
pad
,
pad
,
pad
],
[
one
,
two
,
one
],
[
one
,
one
,
two
],
[
one
,
one
,
two
]],
dtype
=
tf
.
float32
)
# pyformat: disable
gold
=
tf
.
constant
([
1
,
1
,
-
1
,
1
,
2
,
2
],
tf
.
int32
)
first_partition_function
=
(
2.0
*
2.0
+
# 0 -> 1 (gold)
1.0
*
1.0
)
# 1 -> 0
first_loss
=
-
math
.
log
(
2.0
*
2.0
/
first_partition_function
)
second_partition_function
=
(
2.0
*
2.0
*
2.0
+
# 0 -> 1 -> 2 (gold)
1.0
*
1.0
*
1.0
+
# 2 -> 1 -> 0
1.0
*
1.0
*
1.0
+
# 0 -> 2 -> 1
2.0
*
1.0
*
1.0
+
# 1 -> 2 -> 0
2.0
*
1.0
*
1.0
+
# 1 -> 0 -> 2
2.0
*
1.0
*
1.0
+
# 2 -> 0 -> 1
2.0
*
2.0
*
1.0
+
# {0, 1} -> 2
2.0
*
1.0
*
1.0
+
# {0, 2} -> 1
1.0
*
1.0
*
1.0
)
# {1, 2} -> 0
second_loss
=
-
math
.
log
(
2.0
*
2.0
*
2.0
/
second_partition_function
)
linked_embeddings
=
[
network_units
.
NamedTensor
(
lengths
,
'lengths'
),
network_units
.
NamedTensor
(
scores
,
'scores'
)
]
network_tensors
=
component
.
network
.
create
([],
linked_embeddings
,
[],
None
,
False
,
stride
)
cost
,
correct
,
total
=
component
.
network
.
compute_bulk_loss
(
stride
,
network_tensors
,
gold
)
self
.
assertAlmostEqual
(
cost
.
eval
(),
first_loss
+
second_loss
)
self
.
assertEqual
(
correct
.
eval
(),
2
+
3
)
self
.
assertEqual
(
total
.
eval
(),
2
+
3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/syntaxnet/dragnn/python/network_units.py
View file @
80178fc6
...
...
@@ -22,7 +22,6 @@ import abc
import
numpy
as
np
from
six.moves
import
xrange
import
tensorflow
as
tf
from
tensorflow.python.ops
import
nn
from
tensorflow.python.ops
import
tensor_array_ops
as
ta
...
...
@@ -76,11 +75,13 @@ class StoredActivations(object):
check
.
NotNone
(
dim
,
'Dim is required for bulk tensor'
)
self
.
_bulk_tensor
=
tensor
with
tf
.
name_scope
(
'convert_to_dyn'
):
tensor
=
tf
.
reshape
(
tensor
,
[
stride
,
-
1
,
dim
])
tensor
=
tf
.
transpose
(
tensor
,
perm
=
[
1
,
0
,
2
])
pad
=
tf
.
zeros
([
1
,
stride
,
dim
],
dtype
=
tensor
.
dtype
)
self
.
_array_tensor
=
tf
.
concat
([
pad
,
tensor
],
0
)
if
dim
>=
0
:
# These operations will fail if |dim| is negative.
with
tf
.
name_scope
(
'convert_to_dyn'
):
tensor
=
tf
.
reshape
(
tensor
,
[
stride
,
-
1
,
dim
])
tensor
=
tf
.
transpose
(
tensor
,
perm
=
[
1
,
0
,
2
])
pad
=
tf
.
zeros
([
1
,
stride
,
dim
],
dtype
=
tensor
.
dtype
)
self
.
_array_tensor
=
tf
.
concat
([
pad
,
tensor
],
0
)
if
array
is
not
None
:
check
.
IsNone
(
tensor
,
'Cannot initialize from both tensor and array'
)
...
...
@@ -130,7 +131,8 @@ def add_embeddings(channel_id, feature_spec, seed=None):
check
.
Gt
(
feature_spec
.
embedding_dim
,
0
,
'Embeddings requested for non-embedded feature: %s'
%
feature_spec
)
name
=
fixed_embeddings_name
(
channel_id
)
shape
=
[
feature_spec
.
vocabulary_size
+
1
,
feature_spec
.
embedding_dim
]
row_num
=
feature_spec
.
vocabulary_size
+
1
shape
=
[
row_num
,
feature_spec
.
embedding_dim
]
if
feature_spec
.
HasField
(
'pretrained_embedding_matrix'
):
if
len
(
feature_spec
.
pretrained_embedding_matrix
.
part
)
>
1
:
raise
RuntimeError
(
'pretrained_embedding_matrix resource contains '
...
...
@@ -143,9 +145,9 @@ def add_embeddings(channel_id, feature_spec, seed=None):
embeddings
=
syntaxnet_ops
.
word_embedding_initializer
(
vectors
=
feature_spec
.
pretrained_embedding_matrix
.
part
[
0
].
file_pattern
,
vocabulary
=
feature_spec
.
vocab
.
part
[
0
].
file_pattern
,
override_num_embeddings
=
row_num
,
num_special_embeddings
=
1
,
embedding_init
=
1.0
,
embedding_init
=
0.0
,
# zero out rows with no pretrained values
seed
=
seed1
,
seed2
=
seed2
)
return
tf
.
get_variable
(
...
...
@@ -183,7 +185,57 @@ def embedding_lookup(embedding_matrix, indices, ids, weights, size):
return
embeddings
def
fixed_feature_lookup
(
component
,
state
,
channel_id
,
stride
):
def
apply_feature_id_dropout
(
ids
,
weights
,
channel
):
"""Randomly perturbs a vector of feature IDs.
Args:
ids: Vector of feature IDs.
weights: Vector of feature weights.
channel: FixedFeatureChannel that extracted the |ids|.
Returns:
Copy of |ids| and |weights| where each ID is randomly replaced with
|channel.dropout_id|, according to the probabilities in
|channel.dropout_keep_probabilities|. The weights of dropped features are
set to zero if |channel.dropped_id| equals |channel.vocabulary_size|.
"""
check
.
Gt
(
len
(
channel
.
dropout_keep_probability
),
0
,
'Channel {} dropout_keep_probability is empty'
.
format
(
channel
.
name
))
check
.
Le
(
len
(
channel
.
dropout_keep_probability
),
channel
.
vocabulary_size
,
'Channel {} dropout_keep_probability is too long'
.
format
(
channel
.
name
))
# Channel fields, converted from proto to constant tensor.
dropout_id
=
tf
.
constant
(
channel
.
dropout_id
,
name
=
'dropout_id'
,
dtype
=
tf
.
int64
)
dropout_keep_probabilities
=
tf
.
constant
(
list
(
channel
.
dropout_keep_probability
),
name
=
'dropout_keep_probability'
,
dtype
=
tf
.
float32
,
shape
=
[
channel
.
vocabulary_size
])
# The keep probabilities for the current batch of feature IDs.
keep_probabilities
=
tf
.
gather
(
dropout_keep_probabilities
,
ids
)
# Draw random values and determine which IDs should be kept.
shape
=
tf
.
shape
(
ids
)
noise
=
tf
.
random_uniform
(
shape
)
# \in [0,1)^d
should_keep
=
noise
<
keep_probabilities
# Replace dropped IDs with the specified replacement ID.
dropout_ids
=
tf
.
fill
(
shape
,
dropout_id
)
new_ids
=
tf
.
where
(
should_keep
,
ids
,
dropout_ids
)
if
channel
.
dropout_id
==
channel
.
vocabulary_size
:
# Replace weights of dropped IDs with 0.
zeros
=
tf
.
zeros
(
shape
,
dtype
=
tf
.
float32
)
new_weights
=
tf
.
where
(
should_keep
,
weights
,
zeros
)
else
:
new_weights
=
weights
return
new_ids
,
new_weights
def
fixed_feature_lookup
(
component
,
state
,
channel_id
,
stride
,
during_training
):
"""Looks up fixed features and passes them through embeddings.
Embedding vectors may be scaled by weights if the features specify it.
...
...
@@ -193,6 +245,8 @@ def fixed_feature_lookup(component, state, channel_id, stride):
state: MasterState object for the live ComputeSession.
channel_id: int id of the fixed feature to look up.
stride: int Tensor of current batch * beam size.
during_training: True if this is being called from a training code path.
This controls, e.g., the use of feature ID dropout.
Returns:
NamedTensor object containing the embedding vectors.
...
...
@@ -200,13 +254,35 @@ def fixed_feature_lookup(component, state, channel_id, stride):
feature_spec
=
component
.
spec
.
fixed_feature
[
channel_id
]
check
.
Gt
(
feature_spec
.
embedding_dim
,
0
,
'Embeddings requested for non-embedded feature: %s'
%
feature_spec
)
embedding_matrix
=
component
.
get_variable
(
fixed_embeddings_name
(
channel_id
))
if
feature_spec
.
is_constant
:
embedding_matrix
=
tf
.
get_variable
(
fixed_embeddings_name
(
channel_id
))
else
:
embedding_matrix
=
component
.
get_variable
(
fixed_embeddings_name
(
channel_id
))
with
tf
.
op_scope
([
embedding_matrix
],
'fixed_embedding_'
+
feature_spec
.
name
):
indices
,
ids
,
weights
=
dragnn_ops
.
extract_fixed_features
(
state
.
handle
,
component
=
component
.
name
,
channel_id
=
channel_id
)
size
=
stride
*
feature_spec
.
size
embeddings
=
embedding_lookup
(
embedding_matrix
,
indices
,
ids
,
weights
,
size
)
if
during_training
and
feature_spec
.
dropout_id
>=
0
:
ids
,
weights
=
apply_feature_id_dropout
(
ids
,
weights
,
feature_spec
)
if
component
.
master
.
build_runtime_graph
:
# To simplify integration with NN compilers, assume that each feature in
# the channel extracts exactly one ID and no weights.
# TODO(googleuser): Relax this restriction?
embeddings
=
[]
for
index
in
range
(
feature_spec
.
size
):
feature_id
=
component
.
add_cell_input
(
tf
.
int32
,
[
1
],
'fixed_channel_{}_index_{}_ids'
.
format
(
channel_id
,
index
))
embeddings
.
append
(
tf
.
gather
(
embedding_matrix
,
feature_id
))
embeddings
=
tf
.
concat
(
embeddings
,
1
)
else
:
size
=
stride
*
feature_spec
.
size
embeddings
=
embedding_lookup
(
embedding_matrix
,
indices
,
ids
,
weights
,
size
)
dim
=
feature_spec
.
size
*
feature_spec
.
embedding_dim
return
NamedTensor
(
tf
.
reshape
(
embeddings
,
[
-
1
,
dim
]),
feature_spec
.
name
,
dim
=
dim
)
...
...
@@ -368,12 +444,16 @@ def convert_network_state_tensorarray(tensorarray):
return
tf
.
reshape
(
tensor
,
[
-
1
,
tf
.
shape
(
tensor
)[
2
]])
def
pass_through_embedding_matrix
(
act_block
,
embedding_matrix
,
step_idx
):
def
pass_through_embedding_matrix
(
component
,
channel_id
,
size
,
act_block
,
embedding_matrix
,
step_idx
):
"""Passes the activations through the embedding_matrix.
Takes care to handle out of bounds lookups.
Args:
component: Component that produced the linked features.
channel_id: Channel that produced the linked features.
size: Number of linked embeddings in the channel.
act_block: matrix of activations.
embedding_matrix: matrix of weights.
step_idx: vector containing step indices, with -1 indicating out of bounds.
...
...
@@ -383,14 +463,36 @@ def pass_through_embedding_matrix(act_block, embedding_matrix, step_idx):
"""
# Indicator vector for out of bounds lookups.
step_idx_mask
=
tf
.
expand_dims
(
tf
.
equal
(
step_idx
,
-
1
),
-
1
)
step_idx_mask
=
tf
.
to_float
(
step_idx_mask
)
if
component
.
master
.
build_runtime_graph
:
step_idx_mask
=
component
.
add_cell_input
(
step_idx_mask
.
dtype
,
[
size
,
1
],
'linked_channel_{}_out_of_bounds'
.
format
(
channel_id
))
# Pad the last column of the activation vectors with the indicator.
act_block
=
tf
.
concat
([
act_block
,
tf
.
to_float
(
step_idx_mask
)
],
1
)
act_block
=
tf
.
concat
([
act_block
,
step_idx_mask
],
1
)
return
tf
.
matmul
(
act_block
,
embedding_matrix
)
def
lookup_named_tensor_or_none
(
name
,
named_tensors
):
"""Retrieves a NamedTensor by name, or None if it doesn't exist.
Args:
name: Name of the tensor to retrieve.
named_tensors: List of NamedTensor objects to search.
Returns:
The NamedTensor in |named_tensors| with the |name| or None.
"""
for
named_tensor
in
named_tensors
:
if
named_tensor
.
name
==
name
:
return
named_tensor
return
None
def
lookup_named_tensor
(
name
,
named_tensors
):
"""Retrieves a NamedTensor by name.
"""Retrieves a NamedTensor by name
, raising KeyError if it doesn't exist
.
Args:
name: Name of the tensor to retrieve.
...
...
@@ -402,11 +504,11 @@ def lookup_named_tensor(name, named_tensors):
Raises:
KeyError: If the |name| is not found among the |named_tensors|.
"""
for
named_tensor
in
named_tensors
:
if
named_tensor
.
name
==
nam
e
:
retur
n
named
_
tensor
raise
KeyError
(
'Name "%s" not found in
named
tensors
: %s'
%
(
name
,
named_tensors
))
result
=
lookup_named_tensor_or_none
(
name
,
named_tensors
)
if
result
is
Non
e
:
raise
KeyError
(
'Name "%s" not found i
n named
tensor
s: %s'
%
(
name
,
named
_
tensors
))
return
result
def
activation_lookup_recurrent
(
component
,
state
,
channel_id
,
source_array
,
...
...
@@ -417,9 +519,9 @@ def activation_lookup_recurrent(component, state, channel_id, source_array,
not passed through (i.e. multiplied by) an embedding matrix.
Args:
component: Component object in which to look up the
fix
ed features.
component: Component object in which to look up the
link
ed features.
state: MasterState object for the live ComputeSession.
channel_id: int id of the
fix
ed feature to look up.
channel_id: int id of the
link
ed feature to look up.
source_array: TensorArray from which to fetch feature vectors, expected to
have size [steps + 1] elements of shape [stride, D] each.
source_layer_size: int length of feature vectors before embedding.
...
...
@@ -459,11 +561,17 @@ def activation_lookup_recurrent(component, state, channel_id, source_array,
act_block
=
tf
.
gather
(
act_block
,
flat_idx
)
act_block
=
tf
.
reshape
(
act_block
,
[
-
1
,
source_layer_size
])
if
component
.
master
.
build_runtime_graph
:
act_block
=
component
.
add_cell_input
(
act_block
.
dtype
,
[
feature_spec
.
size
,
source_layer_size
],
'linked_channel_{}_activations'
.
format
(
channel_id
))
if
feature_spec
.
embedding_dim
!=
-
1
:
embedding_matrix
=
component
.
get_variable
(
linked_embeddings_name
(
channel_id
))
act_block
=
pass_through_embedding_matrix
(
act_block
,
embedding_matrix
,
step_idx
)
act_block
=
pass_through_embedding_matrix
(
component
,
channel_id
,
feature_spec
.
size
,
act_block
,
embedding_matrix
,
step_idx
)
dim
=
feature_spec
.
size
*
feature_spec
.
embedding_dim
else
:
# If embedding_dim is -1, just output concatenation of activations.
...
...
@@ -481,9 +589,9 @@ def activation_lookup_other(component, state, channel_id, source_tensor,
not passed through (i.e. multiplied by) an embedding matrix.
Args:
component: Component object in which to look up the
fix
ed features.
component: Component object in which to look up the
link
ed features.
state: MasterState object for the live ComputeSession.
channel_id: int id of the
fix
ed feature to look up.
channel_id: int id of the
link
ed feature to look up.
source_tensor: Tensor from which to fetch feature vectors. Expected to have
have shape [steps + 1, stride, D].
source_layer_size: int length of feature vectors before embedding (D). It
...
...
@@ -508,11 +616,17 @@ def activation_lookup_other(component, state, channel_id, source_tensor,
act_block
=
tf
.
gather_nd
(
source_tensor
,
indices
)
act_block
=
tf
.
reshape
(
act_block
,
[
-
1
,
source_layer_size
])
if
component
.
master
.
build_runtime_graph
:
act_block
=
component
.
add_cell_input
(
act_block
.
dtype
,
[
feature_spec
.
size
,
source_layer_size
],
'linked_channel_{}_activations'
.
format
(
channel_id
))
if
feature_spec
.
embedding_dim
!=
-
1
:
embedding_matrix
=
component
.
get_variable
(
linked_embeddings_name
(
channel_id
))
act_block
=
pass_through_embedding_matrix
(
act_block
,
embedding_matrix
,
step_idx
)
act_block
=
pass_through_embedding_matrix
(
component
,
channel_id
,
feature_spec
.
size
,
act_block
,
embedding_matrix
,
step_idx
)
dim
=
feature_spec
.
size
*
feature_spec
.
embedding_dim
else
:
# If embedding_dim is -1, just output concatenation of activations.
...
...
@@ -629,7 +743,7 @@ class Layer(object):
Returns:
TensorArray object
"""
check
.
G
t
(
self
.
dim
,
0
,
'Cannot create array when dimension is dynamic'
)
check
.
G
e
(
self
.
dim
,
0
,
'Cannot create array when dimension is dynamic'
)
tensor_array
=
ta
.
TensorArray
(
dtype
=
tf
.
float32
,
size
=
0
,
...
...
@@ -671,7 +785,19 @@ def get_attrs_with_defaults(parameters, defaults):
return
attrs
def
maybe_apply_dropout
(
inputs
,
keep_prob
,
per_sequence
,
stride
=
None
):
def
maybe_make_dropout_mask
(
shape
,
keep_prob
):
"""Returns a reusable dropout mask, or None if dropout would not occur."""
if
keep_prob
>=
1.0
:
return
None
return
tf
.
nn
.
dropout
(
tf
.
ones
(
shape
,
dtype
=
tf
.
float32
),
keep_prob
)
def
maybe_apply_dropout
(
inputs
,
keep_prob
,
per_sequence
,
stride
=
None
,
dropout_mask
=
None
,
name
=
None
):
"""Applies dropout, if so configured, to an input tensor.
The input may be rank 2 or 3 depending on whether the stride (i.e., batch
...
...
@@ -682,20 +808,27 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
keep_prob: Scalar probability of keeping each input element. If >= 1.0, no
dropout is performed.
per_sequence: If true, sample the dropout mask once per sequence, instead of
once per step. Requires |stride| when true.
stride: Scalar batch size. Optional if |per_sequence| is false.
once per step. Either |stride| or |dropout_mask| must be set when true.
stride: Scalar batch size. Optional if |per_sequence| is false, or if
|dropout_mask| is provided.
dropout_mask: Precomputed dropout mask to apply to the |inputs|; must be
broadcastable to |inputs|. Optional if |per_sequence| is false, or if
|stride| is provided.
name: Optional name for the dropout operation, if dropout is applied.
Returns:
[stride * num_steps, dim] or [stride, num_steps, dim] tensor, matching the
shape of |inputs|, containing the masked or original inputs, depending on
whether dropout was actually performed.
"""
if
keep_prob
>=
1.0
:
return
inputs
if
not
per_sequence
:
return
tf
.
nn
.
dropout
(
inputs
,
keep_prob
)
return
tf
.
nn
.
dropout
(
inputs
,
keep_prob
,
name
=
name
)
if
dropout_mask
is
not
None
:
return
tf
.
multiply
(
inputs
,
dropout_mask
,
name
=
name
)
# We only check the dims if we are applying per-sequence dropout
check
.
Ge
(
inputs
.
get_shape
().
ndims
,
2
,
'inputs must be rank 2 or 3'
)
...
...
@@ -713,7 +846,7 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
# Replace |num_steps| with 1 in |noise_shape|, so the dropout mask broadcasts
# to all steps for a particular sequence.
noise_shape
=
[
stride
,
1
,
dim
]
masked_sxnxd
=
tf
.
nn
.
dropout
(
inputs_sxnxd
,
keep_prob
,
noise_shape
)
masked_sxnxd
=
tf
.
nn
.
dropout
(
inputs_sxnxd
,
keep_prob
,
noise_shape
,
name
=
name
)
# If needed, flatten out the batch dimension in the return value.
return
tf
.
reshape
(
masked_sxnxd
,
[
-
1
,
dim
])
if
flat
else
masked_sxnxd
...
...
@@ -749,6 +882,7 @@ class NetworkUnitInterface(object):
"""
self
.
_component
=
component
self
.
_params
=
[]
self
.
_derived_params
=
[]
self
.
_layers
=
init_layers
if
init_layers
else
[]
self
.
_regularized_weights
=
[]
self
.
_context_layers
=
init_context_layers
if
init_context_layers
else
[]
...
...
@@ -764,7 +898,10 @@ class NetworkUnitInterface(object):
check
.
Gt
(
spec
.
size
,
0
,
'Invalid fixed feature size'
)
if
spec
.
embedding_dim
>
0
:
fixed_dim
=
spec
.
embedding_dim
self
.
_params
.
append
(
add_embeddings
(
channel_id
,
spec
))
if
spec
.
is_constant
:
add_embeddings
(
channel_id
,
spec
)
else
:
self
.
_params
.
append
(
add_embeddings
(
channel_id
,
spec
))
else
:
fixed_dim
=
1
# assume feature ID extraction; only one ID per step
self
.
_fixed_feature_dims
[
spec
.
name
]
=
spec
.
size
*
fixed_dim
...
...
@@ -802,8 +939,8 @@ class NetworkUnitInterface(object):
self
.
_concatenated_input_dim
=
-
1
else
:
self
.
_concatenated_input_dim
=
sum
(
input_dims
)
tf
.
logging
.
info
(
'component %s concat_input_dim %s'
,
component
.
name
,
self
.
_concatenated_input_dim
)
tf
.
logging
.
debug
(
'component %s concat_input_dim %s'
,
component
.
name
,
self
.
_concatenated_input_dim
)
# Allocate attention parameters.
if
self
.
_component
.
spec
.
attention_component
:
...
...
@@ -845,6 +982,19 @@ class NetworkUnitInterface(object):
[
attention_hidden_layer_size
,
component
.
num_actions
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
)))
def
pre_create
(
self
,
stride
):
"""Prepares this network for inputs of the given stride.
This will be called before entering the main transition loop and calling
create(). Networks can use this to pre-compute values that are reused in
the main transition loop. Note that this may be called multiple times;
e.g., once for the training graph, and again for the inference graph.
Args:
stride: Scalar batch_size * beam_size.
"""
pass
@
abc
.
abstractmethod
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -878,6 +1028,18 @@ class NetworkUnitInterface(object):
def
params
(
self
):
return
self
.
_params
@
property
def
derived_params
(
self
):
"""Gets the list of derived parameters.
Derived parameters are similar to `params`, but reformatted slightly
(because doing so is easier in Python).
Returns:
List of zero-argument getters, each of which return a tensor when called.
"""
return
self
.
_derived_params
@
property
def
regularized_weights
(
self
):
return
self
.
_regularized_weights
...
...
@@ -919,6 +1081,38 @@ class NetworkUnitInterface(object):
"""
raise
NotImplementedError
()
def
get_bulk_predictions
(
self
,
stride
,
network_tensors
):
"""Returns custom bulk predictions, if supported.
The returned predictions will be used to advance the batch of states, like
logits. For example, a network may perform structured prediction, and then
return 0/1 indicators of the jointly-predicted annotations. The difference
between this and get_logits() is that this is only used at inference time.
Args:
stride: Scalar stride for segmenting bulk tensors.
network_tensors: List of tensors as returned by create().
Returns:
[stride * steps, dim] matrix of predictions, or None if not supported.
"""
del
stride
,
network_tensors
return
None
def
compute_bulk_loss
(
self
,
stride
,
network_tensors
,
gold
):
"""Returns a custom bulk training loss, if supported.
Args:
stride: Scalar stride for segmenting bulk tensors.
network_tensors: List of tensors as returned by create().
gold: [stride * steps] vector of gold actions.
Returns:
Tuple of (loss, correct, total), or (None, None, None) if not supported.
"""
del
stride
,
network_tensors
,
gold
return
(
None
,
None
,
None
)
def
get_l2_regularized_weights
(
self
):
"""Gets the weights that need to be regularized."""
return
self
.
regularized_weights
...
...
@@ -1026,6 +1220,12 @@ class FeedForwardNetwork(NetworkUnitInterface):
(https://arxiv.org/abs/1512.05287).
dropout_all_layers (False): If true, apply dropout to the input of all
hidden layers, instead of just applying it to the network input.
initialize_bias_zero (False): If true, initialize bias vectors to 0.
Otherwise, they are initialized to a small constant value.
initialize_softmax_zero (False): If true, initialize softmax weights to 0.
Otherwise, they are initialized to small random values.
initialize_hidden_orthogonal (False): If true, initialize hidden weights
orthogonally. Otherwise, they are initialized to small random values.
Hyperparameters used:
dropout_rate: The probability that an input is not dropped. Only used
...
...
@@ -1041,9 +1241,25 @@ class FeedForwardNetwork(NetworkUnitInterface):
'nonlinearity'
:
'relu'
,
'dropout_keep_prob'
:
-
1.0
,
'dropout_per_sequence'
:
False
,
'dropout_all_layers'
:
False
'dropout_all_layers'
:
False
,
'initialize_bias_zero'
:
False
,
'initialize_softmax_zero'
:
False
,
'initialize_hidden_orthogonal'
:
False
,
})
def
_make_bias_initializer
():
return
(
tf
.
zeros_initializer
()
if
self
.
_attrs
[
'initialize_bias_zero'
]
else
tf
.
constant_initializer
(
0.2
,
dtype
=
tf
.
float32
))
def
_make_softmax_initializer
():
return
(
tf
.
zeros_initializer
()
if
self
.
_attrs
[
'initialize_softmax_zero'
]
else
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
def
_make_hidden_initializer
():
return
(
tf
.
orthogonal_initializer
()
if
self
.
_attrs
[
'initialize_hidden_orthogonal'
]
else
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
# Initialize the hidden layer sizes before running the base initializer, as
# the base initializer may need to know the size of the hidden layer for
# recurrent connections.
...
...
@@ -1084,13 +1300,13 @@ class FeedForwardNetwork(NetworkUnitInterface):
for
index
,
hidden_layer_size
in
enumerate
(
self
.
_hidden_layer_sizes
):
weights
=
tf
.
get_variable
(
'weights_%d'
%
index
,
[
last_layer_dim
,
hidden_layer_size
],
initializer
=
tf
.
random_normal
_initializer
(
stddev
=
1e-4
))
initializer
=
_make_hidden
_initializer
())
self
.
_params
.
append
(
weights
)
if
index
>
0
or
self
.
_layer_norm_hidden
is
None
:
self
.
_params
.
append
(
tf
.
get_variable
(
'bias_%d'
%
index
,
[
hidden_layer_size
],
initializer
=
tf
.
constant
_initializer
(
0.2
,
dtype
=
tf
.
float32
)))
initializer
=
_make_bias
_initializer
()))
self
.
_weights
.
append
(
weights
)
self
.
_layers
.
append
(
...
...
@@ -1108,7 +1324,7 @@ class FeedForwardNetwork(NetworkUnitInterface):
self
.
_params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
last_layer_dim
,
component
.
num_actions
],
initializer
=
tf
.
random_nor
ma
l
_initializer
(
stddev
=
1e-4
)))
initializer
=
_make_soft
ma
x
_initializer
()))
self
.
_params
.
append
(
tf
.
get_variable
(
'bias_softmax'
,
[
component
.
num_actions
],
...
...
@@ -1199,67 +1415,133 @@ class FeedForwardNetwork(NetworkUnitInterface):
class
LSTMNetwork
(
NetworkUnitInterface
):
"""Implementation of action LSTM style network."""
"""Implementation of action LSTM style network.
Note that this is not a vanilla LSTM: it adds peephole connections and couples
the input and forget gates.
This implementation treats linked features called lstm_h and lstm_c specially.
Instead of treating them as normal linked features, it uses them as the
previous LSTM states. This allows having a single LSTM component actually
consist of several LSTMs, or to have a tree-shaped LSTM.
"""
def
__init__
(
self
,
component
):
"""Initializes LSTM parameters.
Args:
component: parent ComponentBuilderBase object.
Parameters used to construct the network:
hidden_layer_sizes: In spite of its name, a single int indicating the
number of hidden units in each hidden layer.
factored_hidden_dim: If positive, the weight matrix is factored into a
product of two matrices with this inner dimension.
omit_logits (False): Whether to elide the logits layer.
initialize_bias_zero (False): If true, initialize bias vectors to 0.
Otherwise, they are initialized to small random values.
initialize_softmax_zero (False): If true, initialize softmax weights to 0.
Otherwise, they are initialized to small random values.
initialize_hidden_orthogonal (False): If true, initialize hidden weights
orthogonally. Otherwise, they are initialized to small random values.
input_dropout_rate (-1.0): Keep probability for inputs. If negative, fall
back to the |dropout_rate| hyperparameter.
recurrent_dropout_rate (-1.0): Keep probability for recurrences. If
negative, fall back to the |recurrent_dropout_rate| hyperparameter.
dropout_per_sequence (False): If true, sample the dropout mask once per
sequence, instead of once per step. See Gal and Ghahramani
(https://arxiv.org/abs/1512.05287).
"""
assert
component
.
num_actions
>
0
,
'Component num actions must be positive.'
network_unit_spec
=
component
.
spec
.
network_unit
self
.
_hidden_layer_sizes
=
(
int
)(
network_unit_spec
.
parameters
[
'hidden_layer_sizes'
])
self
.
_attrs
=
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
'hidden_layer_sizes'
:
-
1
,
# NB: a single dim, not a list
'factored_hidden_dim'
:
-
1
,
'omit_logits'
:
False
,
'initialize_bias_zero'
:
False
,
'initialize_softmax_zero'
:
False
,
'initialize_hidden_orthogonal'
:
False
,
'input_dropout_rate'
:
-
1.0
,
'recurrent_dropout_rate'
:
-
1.0
,
'dropout_per_sequence'
:
False
,
})
def
_make_bias_initializer
():
return
(
tf
.
zeros_initializer
()
if
self
.
_attrs
[
'initialize_bias_zero'
]
else
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_input_dropout_rate
=
component
.
master
.
hyperparams
.
dropout_rate
self
.
_recurrent_dropout_rate
=
(
component
.
master
.
hyperparams
.
recurrent_dropout_rate
)
def
_make_softmax_initializer
():
return
(
tf
.
zeros_initializer
()
if
self
.
_attrs
[
'initialize_softmax_zero'
]
else
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_hidden_layer_sizes
=
self
.
_attrs
[
'hidden_layer_sizes'
]
self
.
_factored_hidden_dim
=
self
.
_attrs
[
'factored_hidden_dim'
]
self
.
_compute_logits
=
not
self
.
_attrs
[
'omit_logits'
]
self
.
_dropout_per_sequence
=
self
.
_attrs
[
'dropout_per_sequence'
]
self
.
_input_dropout_rate
=
self
.
_attrs
[
'input_dropout_rate'
]
if
self
.
_input_dropout_rate
<
0.0
:
self
.
_input_dropout_rate
=
component
.
master
.
hyperparams
.
dropout_rate
self
.
_recurrent_dropout_rate
=
self
.
_attrs
[
'recurrent_dropout_rate'
]
if
self
.
_recurrent_dropout_rate
<
0.0
:
self
.
_recurrent_dropout_rate
=
(
component
.
master
.
hyperparams
.
recurrent_dropout_rate
)
if
self
.
_recurrent_dropout_rate
<
0.0
:
self
.
_recurrent_dropout_rate
=
component
.
master
.
hyperparams
.
dropout_rate
tf
.
logging
.
info
(
'[%s] dropout: input=%s recurrent=%s per_sequence=%s'
,
component
.
name
,
self
.
_input_dropout_rate
,
self
.
_recurrent_dropout_rate
,
self
.
_dropout_per_sequence
)
super
(
LSTMNetwork
,
self
).
__init__
(
component
)
layer_input_dim
=
self
.
_concatenated_input_dim
self
.
_layer_input_dim
=
self
.
_concatenated_input_dim
if
self
.
_layer_input_dim
>
1
:
for
skipped_link
in
[
'lstm_h'
,
'lstm_c'
]:
if
skipped_link
in
self
.
_linked_feature_dims
:
self
.
_layer_input_dim
-=
self
.
_linked_feature_dims
[
skipped_link
]
self
.
_input_dropout_mask
=
None
self
.
_recurrent_dropout_mask
=
None
self
.
_context_layers
=
[]
# TODO(googleuser): should we choose different initilizer,
# e.g. truncated_normal_initializer?
self
.
_x2i
=
tf
.
get_variable
(
'x2i'
,
[
layer_input_dim
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_h2i
=
tf
.
get_variable
(
'h2i'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_c2i
=
tf
.
get_variable
(
'c2i'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_bi
=
tf
.
get_variable
(
'bi'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_x2o
=
tf
.
get_variable
(
'x2o'
,
[
layer_input_dim
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_h2o
=
tf
.
get_variable
(
'h2o'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_c2o
=
tf
.
get_variable
(
'c2o'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_bo
=
tf
.
get_variable
(
'bo'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_x2c
=
tf
.
get_variable
(
'x2c'
,
[
layer_input_dim
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_h2c
=
tf
.
get_variable
(
'h2c'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_bc
=
tf
.
get_variable
(
'bc'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
self
.
_params
.
extend
([
self
.
_x2i
,
self
.
_h2i
,
self
.
_c2i
,
self
.
_bi
,
self
.
_x2o
,
self
.
_h2o
,
self
.
_c2o
,
self
.
_bo
,
self
.
_x2c
,
self
.
_h2c
,
self
.
_bc
])
self
.
_create_hidden_weights
(
'x2i'
,
[
self
.
_layer_input_dim
,
self
.
_hidden_layer_sizes
])
self
.
_create_hidden_weights
(
'h2i'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
])
self
.
_create_hidden_weights
(
'c2i'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
])
self
.
_params
.
append
(
tf
.
get_variable
(
'bi'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
_make_bias_initializer
()))
self
.
_create_hidden_weights
(
'x2o'
,
[
self
.
_layer_input_dim
,
self
.
_hidden_layer_sizes
])
self
.
_create_hidden_weights
(
'h2o'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
])
self
.
_create_hidden_weights
(
'c2o'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
])
self
.
_params
.
append
(
tf
.
get_variable
(
'bo'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
_make_bias_initializer
()))
self
.
_create_hidden_weights
(
'x2c'
,
[
self
.
_layer_input_dim
,
self
.
_hidden_layer_sizes
])
self
.
_create_hidden_weights
(
'h2c'
,
[
self
.
_hidden_layer_sizes
,
self
.
_hidden_layer_sizes
])
self
.
_params
.
append
(
tf
.
get_variable
(
'bc'
,
[
self
.
_hidden_layer_sizes
],
initializer
=
_make_bias_initializer
()))
# Add runtime hooks for combined matrices.
self
.
_derived_params
.
append
(
self
.
_get_x_to_ico
)
self
.
_derived_params
.
append
(
self
.
_get_h_to_ico
)
self
.
_derived_params
.
append
(
self
.
_get_ico_bias
)
lstm_h_layer
=
Layer
(
component
,
name
=
'lstm_h'
,
dim
=
self
.
_hidden_layer_sizes
)
lstm_c_layer
=
Layer
(
component
,
name
=
'lstm_c'
,
dim
=
self
.
_hidden_layer_sizes
)
...
...
@@ -1272,18 +1554,92 @@ class LSTMNetwork(NetworkUnitInterface):
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'layer_0'
,
dim
=
self
.
_hidden_layer_sizes
))
self
.
params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
self
.
_hidden_layer_sizes
,
component
.
num_actions
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
)))
self
.
params
.
append
(
tf
.
get_variable
(
'bias_softmax'
,
[
component
.
num_actions
],
initializer
=
tf
.
zeros_initializer
()))
if
self
.
_compute_logits
:
self
.
params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
self
.
_hidden_layer_sizes
,
component
.
num_actions
],
initializer
=
_make_softmax_initializer
()))
self
.
params
.
append
(
tf
.
get_variable
(
'bias_softmax'
,
[
component
.
num_actions
],
initializer
=
tf
.
zeros_initializer
()))
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
def
_get_variable_name_prefix
(
self
):
"""Returns the prefix for variable names."""
# The bias variables are always present; infer the prefix from one of them.
bi
=
self
.
_component
.
get_variable
(
'bi'
)
tokens
=
bi
.
op
.
name
.
split
(
'/'
)
while
tokens
.
pop
()
!=
'bi'
:
pass
# remove the last 'bi' and everything after it
return
'/'
.
join
(
tokens
)
+
'/'
def
_get_x_to_ico
(
self
):
# TODO(googleuser): Export the factored representation, if available.
x2i
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_layer_input_dim
),
'x2i'
)
x2c
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_layer_input_dim
),
'x2c'
)
x2o
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_layer_input_dim
),
'x2o'
)
prefix
=
self
.
_get_variable_name_prefix
()
with
tf
.
name_scope
(
None
):
return
tf
.
concat
([
x2i
,
x2c
,
x2o
],
axis
=
1
,
name
=
prefix
+
'x_to_ico'
)
def
_get_h_to_ico
(
self
):
# TODO(googleuser): Export the factored representation, if available.
h2i
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_hidden_layer_sizes
),
'h2i'
)
h2c
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_hidden_layer_sizes
),
'h2c'
)
h2o
=
self
.
_multiply_hidden_weights
(
tf
.
eye
(
self
.
_hidden_layer_sizes
),
'h2o'
)
prefix
=
self
.
_get_variable_name_prefix
()
with
tf
.
name_scope
(
None
):
return
tf
.
concat
([
h2i
,
h2c
,
h2o
],
axis
=
1
,
name
=
prefix
+
'h_to_ico'
)
def
_get_ico_bias
(
self
):
bi
=
self
.
_component
.
get_variable
(
'bi'
)
bc
=
self
.
_component
.
get_variable
(
'bc'
)
bo
=
self
.
_component
.
get_variable
(
'bo'
)
prefix
=
self
.
_get_variable_name_prefix
()
with
tf
.
name_scope
(
None
):
return
tf
.
concat
([
bi
,
bc
,
bo
],
axis
=
0
,
name
=
prefix
+
'ico_bias'
)
def
_create_hidden_weights
(
self
,
name
,
shape
):
"""Creates params for hidden weight matrix of the given shape."""
check
.
Eq
(
len
(
shape
),
2
,
'Hidden weights %s must be a matrix'
%
name
)
def
_initializer
():
return
(
tf
.
orthogonal_initializer
()
if
self
.
_attrs
[
'initialize_hidden_orthogonal'
]
else
tf
.
random_normal_initializer
(
stddev
=
1e-4
))
if
self
.
_factored_hidden_dim
>
0
:
self
.
_params
.
append
(
tf
.
get_variable
(
'%s_in'
%
name
,
[
shape
[
0
],
self
.
_factored_hidden_dim
],
initializer
=
_initializer
()))
self
.
_params
.
append
(
tf
.
get_variable
(
'%s_out'
%
name
,
[
self
.
_factored_hidden_dim
,
shape
[
1
]],
initializer
=
_initializer
()))
else
:
self
.
_params
.
append
(
tf
.
get_variable
(
name
,
shape
,
initializer
=
_initializer
()))
def
_multiply_hidden_weights
(
self
,
inputs
,
name
):
"""Multiplies the inputs with the named hidden weight matrix."""
if
self
.
_factored_hidden_dim
>
0
:
inputs
=
tf
.
matmul
(
inputs
,
self
.
_component
.
get_variable
(
'%s_in'
%
name
))
return
tf
.
matmul
(
inputs
,
self
.
_component
.
get_variable
(
'%s_out'
%
name
))
else
:
return
tf
.
matmul
(
inputs
,
self
.
_component
.
get_variable
(
name
))
def
pre_create
(
self
,
stride
):
"""Refreshes the dropout masks, if applicable."""
if
self
.
_dropout_per_sequence
:
self
.
_input_dropout_mask
=
maybe_make_dropout_mask
(
[
stride
,
self
.
_layer_input_dim
],
self
.
_input_dropout_rate
)
self
.
_recurrent_dropout_mask
=
maybe_make_dropout_mask
(
[
stride
,
self
.
_hidden_layer_sizes
],
self
.
_recurrent_dropout_rate
)
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -1293,51 +1649,84 @@ class LSTMNetwork(NetworkUnitInterface):
during_training
,
stride
=
None
):
"""See base class."""
input_tensor
=
get_input_tensor
(
fixed_embeddings
,
linked_embeddings
)
# context_tensor_arrays[0] is lstm_h
# context_tensor_arrays[1] is lstm_c
assert
len
(
context_tensor_arrays
)
==
2
length
=
context_tensor_arrays
[
0
].
size
()
# Get the (possibly averaged) parameters to execute the network.
x2i
=
self
.
_component
.
get_variable
(
'x2i'
)
h2i
=
self
.
_component
.
get_variable
(
'h2i'
)
c2i
=
self
.
_component
.
get_variable
(
'c2i'
)
# Get the (possibly averaged) biases to execute the network.
bi
=
self
.
_component
.
get_variable
(
'bi'
)
x2o
=
self
.
_component
.
get_variable
(
'x2o'
)
h2o
=
self
.
_component
.
get_variable
(
'h2o'
)
c2o
=
self
.
_component
.
get_variable
(
'c2o'
)
bo
=
self
.
_component
.
get_variable
(
'bo'
)
x2c
=
self
.
_component
.
get_variable
(
'x2c'
)
h2c
=
self
.
_component
.
get_variable
(
'h2c'
)
bc
=
self
.
_component
.
get_variable
(
'bc'
)
if
self
.
_compute_logits
:
weights_softmax
=
self
.
_component
.
get_variable
(
'weights_softmax'
)
bias_softmax
=
self
.
_component
.
get_variable
(
'bias_softmax'
)
i_h_tm1
=
lookup_named_tensor_or_none
(
'lstm_h'
,
linked_embeddings
)
h_from_linked
=
False
if
i_h_tm1
is
not
None
:
h_from_linked
=
True
i_h_tm1
=
i_h_tm1
.
tensor
i_c_tm1
=
lookup_named_tensor_or_none
(
'lstm_c'
,
linked_embeddings
)
c_from_linked
=
False
if
i_c_tm1
is
not
None
:
c_from_linked
=
True
i_c_tm1
=
i_c_tm1
.
tensor
# i_h_tm1, i_c_tm1 = h_{t-1}, c_{t-1} and label c and h inputs
if
i_h_tm1
is
None
:
i_h_tm1
=
context_tensor_arrays
[
0
].
read
(
length
-
1
)
if
i_c_tm1
is
None
:
i_c_tm1
=
context_tensor_arrays
[
1
].
read
(
length
-
1
)
i_h_tm1
=
tf
.
identity
(
i_h_tm1
,
name
=
'lstm_h_in'
)
i_c_tm1
=
tf
.
identity
(
i_c_tm1
,
name
=
'lstm_c_in'
)
# i_h_tm1, i_c_tm1 = h_{t-1}, c_{t-1}
i_h_tm1
=
context_tensor_arrays
[
0
].
read
(
length
-
1
)
i_c_tm1
=
context_tensor_arrays
[
1
].
read
(
length
-
1
)
# Add hard-coded recurrent inputs to the exported cell.
if
self
.
_component
.
master
.
build_runtime_graph
:
shape
=
[
1
,
self
.
_hidden_layer_sizes
]
if
not
c_from_linked
:
i_c_tm1
=
self
.
_component
.
add_cell_input
(
i_c_tm1
.
dtype
,
shape
,
'lstm_c'
,
'TYPE_RECURRENT'
)
if
not
h_from_linked
:
i_h_tm1
=
self
.
_component
.
add_cell_input
(
i_h_tm1
.
dtype
,
shape
,
'lstm_h'
,
'TYPE_RECURRENT'
)
# Remove 'lstm_h' and 'lstm_c' from linked_embeddings, since they are used
# in a special way.
linked_embeddings
=
[
x
for
x
in
linked_embeddings
if
x
.
name
not
in
[
'lstm_h'
,
'lstm_c'
]
]
# label c and h inputs
i_c_tm1
=
tf
.
identity
(
i_c_tm1
,
name
=
'lstm_c_in'
)
i_h_tm1
=
tf
.
identity
(
i_h_tm1
,
name
=
'lstm_h_in'
)
input_tensor
=
get_input_tensor
(
fixed_embeddings
,
linked_embeddings
)
# label the feature input (for debugging purposes)
input_tensor
=
tf
.
identity
(
input_tensor
,
name
=
'input_tensor'
)
# apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf
if
during_training
and
self
.
_input_dropout_rate
<
1
:
input_tensor
=
tf
.
nn
.
dropout
(
input_tensor
,
self
.
_input_dropout_rate
)
if
during_training
:
input_tensor
=
maybe_apply_dropout
(
input_tensor
,
self
.
_input_dropout_rate
,
self
.
_dropout_per_sequence
,
dropout_mask
=
self
.
_input_dropout_mask
)
# input -- i_t = sigmoid(affine(x_t, h_{t-1}, c_{t-1}))
i_ait
=
tf
.
matmul
(
input_tensor
,
x2i
)
+
tf
.
matmul
(
i_h_tm1
,
h2i
)
+
tf
.
matmul
(
i_c_tm1
,
c2i
)
+
bi
# Note peephole connection to previous cell state.
i_ait
=
(
self
.
_multiply_hidden_weights
(
input_tensor
,
'x2i'
)
+
self
.
_multiply_hidden_weights
(
i_h_tm1
,
'h2i'
)
+
self
.
_multiply_hidden_weights
(
i_c_tm1
,
'c2i'
)
+
bi
)
i_it
=
tf
.
sigmoid
(
i_ait
)
# forget -- f_t = 1 - i_t
# Note coupling with input gate.
i_ft
=
tf
.
ones
([
1
,
1
])
-
i_it
# write memory cell -- tanh(affine(x_t, h_{t-1}))
i_awt
=
tf
.
matmul
(
input_tensor
,
x2c
)
+
tf
.
matmul
(
i_h_tm1
,
h2c
)
+
bc
i_awt
=
(
self
.
_multiply_hidden_weights
(
input_tensor
,
'x2c'
)
+
self
.
_multiply_hidden_weights
(
i_h_tm1
,
'h2c'
)
+
bc
)
i_wt
=
tf
.
tanh
(
i_awt
)
# c_t = f_t \odot c_{t-1} + i_t \odot tanh(affine(x_t, h_{t-1}))
...
...
@@ -1345,8 +1734,11 @@ class LSTMNetwork(NetworkUnitInterface):
tf
.
multiply
(
i_it
,
i_wt
),
tf
.
multiply
(
i_ft
,
i_c_tm1
),
name
=
'lstm_c'
)
# output -- o_t = sigmoid(affine(x_t, h_{t-1}, c_t))
i_aot
=
tf
.
matmul
(
input_tensor
,
x2o
)
+
tf
.
matmul
(
ct
,
c2o
)
+
tf
.
matmul
(
i_h_tm1
,
h2o
)
+
bo
# Note peephole connection to current cell state.
i_aot
=
(
self
.
_multiply_hidden_weights
(
input_tensor
,
'x2o'
)
+
self
.
_multiply_hidden_weights
(
ct
,
'c2o'
)
+
self
.
_multiply_hidden_weights
(
i_h_tm1
,
'h2o'
)
+
bo
)
i_ot
=
tf
.
sigmoid
(
i_aot
)
...
...
@@ -1354,27 +1746,35 @@ class LSTMNetwork(NetworkUnitInterface):
ph_t
=
tf
.
tanh
(
ct
)
ht
=
tf
.
multiply
(
i_ot
,
ph_t
,
name
=
'lstm_h'
)
if
during_training
and
self
.
_recurrent_dropout_rate
<
1
:
ht
=
tf
.
nn
.
dropout
(
ht
,
self
.
_recurrent_dropout_rate
,
name
=
'lstm_h_dropout'
)
if
during_training
:
ht
=
maybe_apply_dropout
(
ht
,
self
.
_recurrent_dropout_rate
,
self
.
_dropout_per_sequence
,
dropout_mask
=
self
.
_recurrent_dropout_mask
,
name
=
'lstm_h_dropout'
)
h
=
tf
.
identity
(
ht
,
name
=
'layer_0'
)
logits
=
tf
.
nn
.
xw_plus_b
(
ht
,
tf
.
get_variable
(
'weights_softmax'
),
tf
.
get_variable
(
'bias_softmax'
))
# tensors will be consistent with the layers:
# [lstm_h, lstm_c, layer_0, (optional) logits]
tensors
=
[
ht
,
ct
,
h
]
if
self
.
_component
.
spec
.
attention_component
:
logits
+=
self
.
attention
(
ht
,
attention_tensor
)
if
self
.
_compute_logits
:
logits
=
tf
.
nn
.
xw_plus_b
(
ht
,
weights_softmax
,
bias_softmax
)
if
self
.
_component
.
spec
.
attention_component
:
logits
+=
self
.
attention
(
ht
,
attention_tensor
)
logits
=
tf
.
identity
(
logits
,
name
=
'logits'
)
tensors
.
append
(
logits
)
logits
=
tf
.
identity
(
logits
,
name
=
'logits'
)
# tensors will be consistent with the layers:
# [lstm_h, lstm_c, layer_0, logits]
tensors
=
[
ht
,
ct
,
h
,
logits
]
return
tensors
def
get_layer_size
(
self
,
layer_name
):
assert
layer_name
==
'layer_0'
,
'Can only retrieve from first hidden layer.'
assert
layer_name
in
{
'layer_0'
,
'lstm_h'
,
'lstm_c'
},
'Can only retrieve from first hidden layer, lstm_h or lstm_c.'
return
self
.
_hidden_layer_sizes
def
get_logits
(
self
,
network_tensors
):
...
...
@@ -1846,10 +2246,9 @@ class PairwiseConvNetwork(NetworkUnitInterface):
self
.
_widths
,
self
.
_dropout
,
self
.
_bias_init
,
self
.
_initialization
])
if
not
all
(
param_lengths
[
0
]
==
param_len
for
param_len
in
param_lengths
):
raise
RuntimeError
(
'Unmatched widths/dropout/bias_init/initialization: '
+
'%d/%d/%d/%d'
%
(
param_lengths
[
0
],
param_lengths
[
1
],
param_lengths
[
2
],
param_lengths
[
3
]))
raise
RuntimeError
(
'Unmatched widths/dropout/bias_init/initialization: '
+
'%d/%d/%d/%d'
%
(
param_lengths
[
0
],
param_lengths
[
1
],
param_lengths
[
2
],
param_lengths
[
3
]))
self
.
_depths
.
extend
(
map
(
int
,
parameters
[
'depths'
].
split
(
','
)))
if
len
(
self
.
_depths
)
!=
len
(
self
.
_widths
)
+
1
:
...
...
@@ -1866,9 +2265,8 @@ class PairwiseConvNetwork(NetworkUnitInterface):
self
.
_num_labels
=
self
.
_depths
[
-
1
]
if
parameters
[
'activation_layers'
]:
self
.
_activation_layers
=
set
(
map
(
int
,
parameters
[
'activation_layers'
].
split
(
','
)))
self
.
_activation_layers
=
set
(
map
(
int
,
parameters
[
'activation_layers'
].
split
(
','
)))
else
:
self
.
_activation_layers
=
set
(
range
(
self
.
_num_layers
-
1
))
...
...
@@ -1876,7 +2274,7 @@ class PairwiseConvNetwork(NetworkUnitInterface):
for
i
,
width
in
enumerate
(
self
.
_widths
):
if
self
.
_activation
==
'glu'
and
i
in
self
.
_activation_layers
:
self
.
_kernel_shapes
.
append
(
[
width
,
width
,
self
.
_depths
[
i
],
2
*
self
.
_depths
[
i
+
1
]])
[
width
,
width
,
self
.
_depths
[
i
],
2
*
self
.
_depths
[
i
+
1
]])
else
:
self
.
_kernel_shapes
.
append
(
[
width
,
width
,
self
.
_depths
[
i
],
self
.
_depths
[
i
+
1
]])
...
...
@@ -1910,7 +2308,8 @@ class PairwiseConvNetwork(NetworkUnitInterface):
del
context_tensor_arrays
,
attention_tensor
# Unused.
# TODO(googleuser): Normalize the arguments to create(). 'stride'
# is unused by the recurrent network units, while 'context_tensor_arrays'
# and 'attenion_tensor_array' is unused by bulk network units. b/33587044
# and 'attenion_tensor_array' is unused by bulk network units.
if
stride
is
None
:
raise
ValueError
(
"PairwiseConvNetwork needs 'stride'"
)
...
...
@@ -1926,8 +2325,9 @@ class PairwiseConvNetwork(NetworkUnitInterface):
sources_shape
=
tf
.
shape
(
source_tokens
)
targets_shape
=
tf
.
shape
(
target_tokens
)
num_steps
=
sources_shape
[
1
]
with
tf
.
control_dependencies
([
tf
.
assert_equal
(
num_steps
,
targets_shape
[
2
],
name
=
'num_steps_mismatch'
)]):
with
tf
.
control_dependencies
([
tf
.
assert_equal
(
num_steps
,
targets_shape
[
2
],
name
=
'num_steps_mismatch'
)
]):
arg1
=
tf
.
tile
(
source_tokens
,
tf
.
stack
([
1
,
1
,
num_steps
,
1
]))
arg2
=
tf
.
tile
(
target_tokens
,
tf
.
stack
([
1
,
num_steps
,
1
,
1
]))
conv
=
tf
.
concat
([
arg1
,
arg2
],
3
)
...
...
@@ -1935,10 +2335,10 @@ class PairwiseConvNetwork(NetworkUnitInterface):
with
tf
.
variable_scope
(
'conv%d'
%
i
,
reuse
=
True
)
as
scope
:
if
during_training
:
conv
=
maybe_apply_dropout
(
conv
,
self
.
_dropout
[
i
],
False
)
conv
=
tf
.
nn
.
conv2d
(
conv
,
self
.
_component
.
get_variable
(
'weights'
)
,
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
conv
=
tf
.
nn
.
conv2d
(
conv
,
self
.
_component
.
get_variable
(
'weights'
),
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
conv
=
tf
.
nn
.
bias_add
(
conv
,
self
.
_component
.
get_variable
(
'biases'
))
if
i
in
self
.
_activation_layers
:
conv
=
self
.
_activation_fn
(
conv
,
name
=
scope
.
name
)
...
...
research/syntaxnet/dragnn/python/network_units_test.py
View file @
80178fc6
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for network_units."""
...
...
@@ -26,8 +25,6 @@ from tensorflow.python.platform import googletest
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
network_units
FLAGS
=
tf
.
app
.
flags
.
FLAGS
class
NetworkUnitsConverterTest
(
test_util
.
TensorFlowTestCase
):
...
...
@@ -61,6 +58,7 @@ class MockComponent(object):
self
.
spec
=
component_spec
self
.
name
=
component_spec
.
name
self
.
beam_size
=
1
self
.
num_actions
=
45
self
.
_attrs
=
{}
def
attr
(
self
,
name
):
...
...
@@ -72,12 +70,13 @@ class MockComponent(object):
class
MockMaster
(
object
):
def
__init__
(
self
):
def
__init__
(
self
,
build_runtime_graph
=
False
):
self
.
spec
=
spec_pb2
.
MasterSpec
()
self
.
hyperparams
=
spec_pb2
.
GridPoint
()
self
.
lookup_component
=
{
'previous'
:
MockComponent
(
self
,
spec_pb2
.
ComponentSpec
())
}
self
.
build_runtime_graph
=
build_runtime_graph
class
MockNetwork
(
object
):
...
...
@@ -167,6 +166,164 @@ class GetAttrsWithDefaultsTest(test_util.TensorFlowTestCase):
_assert_attr_is_true
(
'TRUE'
)
class
LstmNetworkTest
(
test_util
.
TensorFlowTestCase
):
test_spec_1
=
"""
component {
name: 'bi_lstm'
backend { registered_name: 'TestComponent' }
fixed_feature {
name: 'words'
fml: 'words'
size: 1
embedding_dim: 32
vocabulary_size: 1079813,
}
network_unit {
registered_name: 'LSTMNetwork'
parameters {
key: "hidden_layer_sizes"
value: "128"
}
}
}
"""
test_spec_linked
=
"""
component {
name: 'bi_lstm'
backend { registered_name: 'TestComponent' }
fixed_feature {
name: 'words'
fml: 'words'
size: 1
embedding_dim: 32
vocabulary_size: 1079813,
}
linked_feature {
name: 'lstm_h'
fml: 'bias(0)'
embedding_dim: -1
size: 1
source_component: 'bi_lstm'
source_translator: 'history'
source_layer: 'lstm_h'
}
linked_feature {
name: 'lstm_c'
fml: 'bias(0)'
embedding_dim: -1
size: 1
source_component: 'bi_lstm'
source_translator: 'history'
source_layer: 'lstm_c'
}
network_unit {
registered_name: 'LSTMNetwork'
parameters {
key: "hidden_layer_sizes"
value: "128"
}
}
}
"""
def
setUp
(
self
):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf
.
reset_default_graph
()
def
construct_lstm_network_unit
(
self
,
master
):
"""Helper to construct a LSTMNetwork. Doesn't call create() yet."""
component
=
MockComponent
(
master
,
master
.
spec
.
component
[
0
])
with
tf
.
variable_scope
(
'bi_lstm'
):
lstm_network_unit
=
network_units
.
LSTMNetwork
(
component
)
return
lstm_network_unit
def
get_context_tensor_arrays
(
self
,
lstm_network_unit
):
context_tensor_arrays
=
[]
for
context_layer
in
lstm_network_unit
.
context_layers
:
context_tensor_arrays
.
append
(
context_layer
.
create_array
(
1
))
return
context_tensor_arrays
def
fixed_word_embeddings
(
self
):
"""Helper for returning fixed embeddings, for 1 word feature."""
words_tensor
=
tf
.
constant
([[
1.0
]
*
32
],
dtype
=
tf
.
float32
)
return
[
network_units
.
NamedTensor
(
words_tensor
,
'words'
)]
def
testCanCreate
(
self
):
"""Smoke test that the create() function doesn't raise errors."""
master
=
MockMaster
()
master
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
self
.
test_spec_1
,
master
.
spec
)
lstm_network_unit
=
self
.
construct_lstm_network_unit
(
master
)
with
tf
.
variable_scope
(
'bi_lstm'
,
reuse
=
True
):
lstm_network_unit
.
create
(
self
.
fixed_word_embeddings
(),
[],
self
.
get_context_tensor_arrays
(
lstm_network_unit
),
None
,
True
)
def
testCanCreateLinked
(
self
):
"""Smoke test that the create() function doesn't raise errors."""
master
=
MockMaster
()
master
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
self
.
test_spec_linked
,
master
.
spec
)
lstm_network_unit
=
self
.
construct_lstm_network_unit
(
master
)
with
tf
.
variable_scope
(
'bi_lstm'
,
reuse
=
True
):
lstm_network_unit
.
create
(
self
.
fixed_word_embeddings
(),
[],
self
.
get_context_tensor_arrays
(
lstm_network_unit
),
None
,
True
)
def
testRuntimeConcatentatedMatrices
(
self
):
"""Test generation of concatenated matrices."""
# TODO(googleuser): Make MockComponent support runtime graph generation.
master
=
MockMaster
(
build_runtime_graph
=
False
)
master
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
self
.
test_spec_1
,
master
.
spec
)
lstm_network_unit
=
self
.
construct_lstm_network_unit
(
master
)
with
tf
.
variable_scope
(
'bi_lstm'
,
reuse
=
True
):
lstm_network_unit
.
create
(
self
.
fixed_word_embeddings
(),
[],
self
.
get_context_tensor_arrays
(
lstm_network_unit
),
None
,
False
)
x_to_ico
=
lstm_network_unit
.
derived_params
[
0
]()
h_to_ico
=
lstm_network_unit
.
derived_params
[
1
]()
ico_bias
=
lstm_network_unit
.
derived_params
[
2
]()
# Should be the word dimension (32) to 3x the hidden dimension (128).
self
.
assertEqual
(
x_to_ico
.
shape
,
(
32
,
384
))
self
.
assertEqual
(
x_to_ico
.
op
.
name
,
'bi_lstm/x_to_ico'
)
# Should be the hidden dimension (128) to 3x the hidden dimension (128).
self
.
assertEqual
(
h_to_ico
.
shape
,
(
128
,
384
))
self
.
assertEqual
(
h_to_ico
.
op
.
name
,
'bi_lstm/h_to_ico'
)
# Should be equal to the hidden dimension (128) times 3.
self
.
assertEqual
(
ico_bias
.
shape
,
(
384
,))
self
.
assertEqual
(
ico_bias
.
op
.
name
,
'bi_lstm/ico_bias'
)
def
testRuntimeConcatentatedMatricesLinked
(
self
):
"""Test generation of concatenated matrices."""
# TODO(googleuser): Make MockComponent support runtime graph generation.
master
=
MockMaster
(
build_runtime_graph
=
False
)
master
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
self
.
test_spec_linked
,
master
.
spec
)
lstm_network_unit
=
self
.
construct_lstm_network_unit
(
master
)
with
tf
.
variable_scope
(
'bi_lstm'
,
reuse
=
True
):
lstm_network_unit
.
create
(
self
.
fixed_word_embeddings
(),
[],
self
.
get_context_tensor_arrays
(
lstm_network_unit
),
None
,
False
)
x_to_ico
=
lstm_network_unit
.
derived_params
[
0
]()
h_to_ico
=
lstm_network_unit
.
derived_params
[
1
]()
ico_bias
=
lstm_network_unit
.
derived_params
[
2
]()
# Should be the word dimension (32) to 3x the hidden dimension (128).
self
.
assertEqual
(
x_to_ico
.
shape
,
(
32
,
384
))
# Should be the hidden dimension (128) to 3x the hidden dimension (128).
self
.
assertEqual
(
h_to_ico
.
shape
,
(
128
,
384
))
# Should be equal to the hidden dimension (128) times 3.
self
.
assertEqual
(
ico_bias
.
shape
,
(
384
,))
class
GatherNetworkTest
(
test_util
.
TensorFlowTestCase
):
def
setUp
(
self
):
...
...
@@ -214,12 +371,30 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
network
=
network_units
.
GatherNetwork
(
self
.
_component
)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices
=
tf
.
constant
([[
1
],
[
2
],
[
0
],
# item 1
[
-
1
],
[
0
],
[
-
1
]],
# item 2
dtype
=
tf
.
int64
)
features
=
tf
.
constant
([[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 1
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]],
# item 2
dtype
=
tf
.
float32
)
indices
=
tf
.
constant
(
[
# item 1
[
1
],
[
2
],
[
0
],
# item 2
[
-
1
],
[
0
],
[
-
1
]
],
dtype
=
tf
.
int64
)
features
=
tf
.
constant
(
[
# item 1
[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 2
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]
],
dtype
=
tf
.
float32
)
fixed_embeddings
=
[]
linked_embeddings
=
[
...
...
@@ -233,13 +408,16 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
gathered
=
outputs
[
0
]
# Zeros will be substituted for index -1.
self
.
assertAllEqual
(
gathered
.
eval
(),
[[
2.0
,
2.5
],
# gathered from 1
[
3.0
,
3.5
],
# gathered from 2
[
1.0
,
1.5
],
# gathered from 0
[
0.0
,
0.0
],
# gathered from -1
[
4.0
,
4.5
],
# gathered from 0
[
0.0
,
0.0
]])
# gathered from -1
self
.
assertAllEqual
(
gathered
.
eval
(),
[
[
2.0
,
2.5
],
# gathered from 1
[
3.0
,
3.5
],
# gathered from 2
[
1.0
,
1.5
],
# gathered from 0
[
0.0
,
0.0
],
# gathered from -1
[
4.0
,
4.5
],
# gathered from 0
[
0.0
,
0.0
]
# gathered from -1
])
def
testTrainablePadding
(
self
):
self
.
_component
.
spec
.
network_unit
.
parameters
[
'trainable_padding'
]
=
'true'
...
...
@@ -248,12 +426,30 @@ class GatherNetworkTest(test_util.TensorFlowTestCase):
network
=
network_units
.
GatherNetwork
(
self
.
_component
)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices
=
tf
.
constant
([[
1
],
[
2
],
[
0
],
# item 1
[
-
1
],
[
0
],
[
-
1
]],
# item 2
dtype
=
tf
.
int64
)
features
=
tf
.
constant
([[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 1
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]],
# item 2
dtype
=
tf
.
float32
)
indices
=
tf
.
constant
(
[
# item 1
[
1
],
[
2
],
[
0
],
# item 2
[
-
1
],
[
0
],
[
-
1
]
],
dtype
=
tf
.
int64
)
features
=
tf
.
constant
(
[
# item 1
[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 2
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]
],
dtype
=
tf
.
float32
)
fixed_embeddings
=
[]
linked_embeddings
=
[
...
...
@@ -299,8 +495,8 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
"""
with
tf
.
Graph
().
as_default
(),
self
.
test_session
()
as
session
:
np
.
random
.
seed
(
4
)
tensor
=
network_units
.
add_var_initialized
(
'tensor'
,
shape
,
'identity'
,
divisor
=
divisor
,
stddev
=
std
)
tensor
=
network_units
.
add_var_initialized
(
'tensor'
,
shape
,
'identity'
,
divisor
=
divisor
,
stddev
=
std
)
session
.
run
(
tf
.
global_variables_initializer
())
actual
=
session
.
run
(
tensor
)
self
.
assertAllClose
(
actual
,
expected
,
1e-8
,
1e-8
)
...
...
@@ -345,13 +541,13 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
divisor
=
3.
std
=
1e-3
shape
=
(
6
,
3
)
m
=
divisor
/
shape
[
-
1
]
expected
=
[[
m
,
4.99951362e-04
,
-
9.95908980e-04
],
[
m
,
-
4.18301526e-04
,
-
1.58457726e-03
],
[
-
6.47706795e-04
,
m
,
3.32250027e-04
],
[
-
1.14747661e-03
,
m
,
-
8.79869258e-05
],
[
4.25072387e-04
,
3.32253141e-04
,
m
],
[
3.50997143e-04
,
-
6.06887275e-04
,
m
]]
m
=
divisor
/
shape
[
-
1
]
expected
=
[[
m
,
4.99951362e-04
,
-
9.95908980e-04
],
[
m
,
-
4.18301526e-04
,
-
1.58457726e-03
],
[
-
6.47706795e-04
,
m
,
3.32250027e-04
],
[
-
1.14747661e-03
,
m
,
-
8.79869258e-05
],
[
4.25072387e-04
,
3.32253141e-04
,
m
],
[
3.50997143e-04
,
-
6.06887275e-04
,
m
]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
def
testIdentityInitializerNonSquareRank2FirstDimSmaller
(
self
):
...
...
@@ -368,14 +564,14 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
std
=
1e-3
shape
=
(
2
,
2
,
6
)
m
=
divisor
/
shape
[
-
1
]
expected
=
[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529e-04
,
-
4.18301526e-04
,
-
1.58457726
e-0
3
]
,
[
-
6.47706795e-04
,
5.98575163e-04
,
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
]
],
[[
m
,
m
,
m
,
3.50997143e-04
,
-
6.06887275e-04
,
1.54697930e-03
],
[
7.23341596e-04
,
4.61355667
e-0
5
,
-
9.82991653
e-0
4
,
m
,
m
,
m
]]]
expected
=
[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529
e-0
4
,
-
4.18301526e-04
,
-
1.58457726e-03
],
[
-
6.47706795e-04
,
5.98575163e-04
,
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
]],
[[
m
,
m
,
m
,
3.50997143e-04
,
-
6.06887275
e-0
4
,
1.54697930
e-0
3
]
,
[
7.23341596e-04
,
4.61355667e-05
,
-
9.82991653e-04
,
m
,
m
,
m
]]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
def
testIdentityInitializerNonSquareRank4
(
self
):
...
...
@@ -383,40 +579,110 @@ class IdentityInitializerTest(test_util.TensorFlowTestCase):
std
=
1e-3
shape
=
(
2
,
3
,
2
,
8
)
m
=
divisor
/
float
(
shape
[
-
1
])
expected
=
[
[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529e-04
,
-
4.18301526e-04
,
-
1.58457726e-03
,
-
6.47706795e-04
,
5.98575163e-04
],
[
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
,
4.25072387e-04
,
3.32253141e-04
,
-
1.15681626e-03
,
3.50997143e-04
]],
[[
-
6.06887275e-04
,
1.54697930e-03
,
7.23341596e-04
,
4.61355667e-05
,
-
9.82991653e-04
,
5.44327377e-05
,
1.59892938e-04
,
-
1.20894820e-03
],
[
2.22336012e-03
,
3.94295203e-04
,
1.69235771e-03
,
-
1.11281220e-03
,
1.63574750e-03
,
-
1.36096554e-03
,
-
6.51225855e-04
,
5.42451337e-04
]],
[[
4.80062481e-05
,
-
2.35807360e-03
,
-
1.10558409e-03
,
8.37836356e-04
,
2.08787085e-03
,
9.14840959e-04
,
-
2.76203355e-04
,
7.96511886e-04
],
[
-
1.14379858e-03
,
5.09919773e-04
,
-
1.34746032e-03
,
-
9.36010019e-06
,
-
1.30704633e-04
,
8.02086608e-04
,
-
3.02963977e-04
,
1.20200263e-03
]]],
[[[
-
1.96745284e-04
,
8.36528721e-04
,
7.86602264e-04
,
-
1.84087583e-03
,
3.75474883e-05
,
3.59280530e-05
,
-
7.78739923e-04
,
1.79410708e-04
],
[
-
1.45553437e-03
,
5.56185201e-04
,
5.09778853e-04
,
3.00445536e-04
,
2.47658417e-03
,
3.52343399e-04
,
6.74710027e-05
,
-
7.32264714e-04
]],
[[
m
,
m
,
m
,
m
,
1.58469542e-04
,
1.99008291e-03
,
1.16418756e-03
,
2.42660157e-04
],
[
1.37992005e-03
,
-
5.45587063e-05
,
7.95233937e-04
,
1.90899627e-05
,
m
,
m
,
m
,
m
]],
[[
-
1.09712186e-03
,
-
5.28196048e-04
,
-
2.37977528e-03
,
-
6.07683673e-04
,
-
1.07529014e-03
,
2.02240516e-03
,
-
5.64875314e-04
,
-
1.54292909e-03
],
[
8.70841788e-04
,
-
1.75210531e-04
,
4.86030076e-05
,
1.88646198e-04
,
2.09313483e-04
,
-
3.74444906e-04
,
9.54698597e-04
,
5.23247640e-04
]]]
]
expected
=
[[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529e-04
,
-
4.18301526e-04
,
-
1.58457726e-03
,
-
6.47706795e-04
,
5.98575163e-04
],
[
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
,
4.25072387e-04
,
3.32253141e-04
,
-
1.15681626e-03
,
3.50997143e-04
]],
[[
-
6.06887275e-04
,
1.54697930e-03
,
7.23341596e-04
,
4.61355667e-05
,
-
9.82991653e-04
,
5.44327377e-05
,
1.59892938e-04
,
-
1.20894820e-03
],
[
2.22336012e-03
,
3.94295203e-04
,
1.69235771e-03
,
-
1.11281220e-03
,
1.63574750e-03
,
-
1.36096554e-03
,
-
6.51225855e-04
,
5.42451337e-04
]],
[[
4.80062481e-05
,
-
2.35807360e-03
,
-
1.10558409e-03
,
8.37836356e-04
,
2.08787085e-03
,
9.14840959e-04
,
-
2.76203355e-04
,
7.96511886e-04
],
[
-
1.14379858e-03
,
5.09919773e-04
,
-
1.34746032e-03
,
-
9.36010019e-06
,
-
1.30704633e-04
,
8.02086608e-04
,
-
3.02963977e-04
,
1.20200263e-03
]]],
[[[
-
1.96745284e-04
,
8.36528721e-04
,
7.86602264e-04
,
-
1.84087583e-03
,
3.75474883e-05
,
3.59280530e-05
,
-
7.78739923e-04
,
1.79410708e-04
],
[
-
1.45553437e-03
,
5.56185201e-04
,
5.09778853e-04
,
3.00445536e-04
,
2.47658417e-03
,
3.52343399e-04
,
6.74710027e-05
,
-
7.32264714e-04
]],
[[
m
,
m
,
m
,
m
,
1.58469542e-04
,
1.99008291e-03
,
1.16418756e-03
,
2.42660157e-04
],
[
1.37992005e-03
,
-
5.45587063e-05
,
7.95233937e-04
,
1.90899627e-05
,
m
,
m
,
m
,
m
]],
[[
-
1.09712186e-03
,
-
5.28196048e-04
,
-
2.37977528e-03
,
-
6.07683673e-04
,
-
1.07529014e-03
,
2.02240516e-03
,
-
5.64875314e-04
,
-
1.54292909e-03
],
[
8.70841788e-04
,
-
1.75210531e-04
,
4.86030076e-05
,
1.88646198e-04
,
2.09313483e-04
,
-
3.74444906e-04
,
9.54698597e-04
,
5.23247640e-04
]]]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
class
FeatureIdDropoutTest
(
test_util
.
TensorFlowTestCase
):
def
setUp
(
self
):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf
.
reset_default_graph
()
def
testApplyFeatureIdDropout
(
self
):
channel
=
spec_pb2
.
FixedFeatureChannel
()
text_format
.
Parse
(
"""
vocabulary_size: 10
dropout_id: 8
dropout_keep_probability: [0.0, 0.25, 0.5, 0.75, 1.0]
"""
,
channel
)
with
tf
.
Graph
().
as_default
(),
self
.
test_session
():
with
tf
.
variable_scope
(
'test_scope'
):
ids
=
tf
.
constant
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
],
dtype
=
tf
.
int64
)
weights
=
tf
.
constant
([
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
],
dtype
=
tf
.
float32
)
tensors
=
network_units
.
apply_feature_id_dropout
(
ids
,
weights
,
channel
)
perturbed_ids
=
tensors
[
0
].
eval
()
tf
.
logging
.
info
(
'perturbed_ids = %s'
,
perturbed_ids
)
# Given the dropout_keep_probability values specified above:
# * ID 0 is never kept.
# * IDs 1-3 are randomly kept with varying probability.
# * IDs 4-9 are always kept.
# To avoid non-determinism, we only check for specific feature IDs at
# the extremes (never/always kept). Behavior in between the extremes
# should interpolate between the two extremes.
self
.
assertEqual
(
perturbed_ids
[
0
],
channel
.
dropout_id
)
self
.
assertTrue
(
perturbed_ids
[
1
]
in
(
1
,
channel
.
dropout_id
))
self
.
assertTrue
(
perturbed_ids
[
2
]
in
(
2
,
channel
.
dropout_id
))
self
.
assertTrue
(
perturbed_ids
[
3
]
in
(
3
,
channel
.
dropout_id
))
self
.
assertAllEqual
(
perturbed_ids
[
4
:],
[
4
,
5
,
6
,
7
,
8
,
9
])
def
testApplyFeatureIdDropoutSkip
(
self
):
channel
=
spec_pb2
.
FixedFeatureChannel
()
text_format
.
Parse
(
"""
vocabulary_size: 2
dropout_id: 2
dropout_keep_probability: [0.0, 1.0]
"""
,
channel
)
with
tf
.
Graph
().
as_default
(),
self
.
test_session
():
with
tf
.
variable_scope
(
'test_scope'
):
ids
=
tf
.
constant
([
0
,
1
],
dtype
=
tf
.
int64
)
weights
=
tf
.
constant
([
1
,
1
],
dtype
=
tf
.
float32
)
tensors
=
network_units
.
apply_feature_id_dropout
(
ids
,
weights
,
channel
)
perturbed_ids
,
perturbed_weights
=
tensors
[
0
].
eval
(),
tensors
[
1
].
eval
()
tf
.
logging
.
info
(
'perturbed_ids = %s'
,
perturbed_ids
)
tf
.
logging
.
info
(
'perturbed_weights = %s'
,
perturbed_weights
)
# Given the dropout_keep_probability values specified above:
# * ID 0 is never kept, its weight is set to 0.
# * IDs 1 are always kept.
# To avoid non-determinism, we only check for specific feature IDs at
# the extremes (never/always kept).
self
.
assertEqual
(
perturbed_ids
[
0
],
channel
.
dropout_id
)
self
.
assertEqual
(
perturbed_weights
[
0
],
0
)
self
.
assertEqual
(
perturbed_ids
[
1
],
1
)
self
.
assertEqual
(
perturbed_weights
[
1
],
1
)
if
__name__
==
'__main__'
:
googletest
.
main
()
research/syntaxnet/dragnn/python/perf_test_data/master-spec
deleted
100644 → 0
View file @
a84e1ef9
component {
name: "convnet"
transition_system {
registered_name: "shift-only"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "words-embedding-input"
part {
file_pattern: "/readahead/512M/cns/lg-d/home/saft/corpora/word-embeddings/en/word2vec/1billion/word2vec-embedding-bi-true-32.sst"
file_format: "sstable"
record_format: "dist_belief.TokenEmbedding"
}
}
resource {
name: "words-vocab-input"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/vocab"
file_format: "text"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
fixed_feature {
name: "char_ngram"
fml: "input.token.lexifuse-char-ngram"
embedding_dim: 16
vocabulary_size: 16500
size: 1
predicate_map: "hashed"
}
fixed_feature {
name: "words"
fml: "input.word"
embedding_dim: 32
vocabulary_size: 39395
size: 1
predicate_map: "hashed"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 1
attention_component: ""
component_builder {
registered_name: "components.common.dragnn.python.conv_component.ConvComponentBuilder"
parameters {
key: "depths"
value: "48,128"
}
parameters {
key: "output_dims"
value: "45"
}
parameters {
key: "widths"
value: "7"
}
}
training_beam_size: 1
inference_beam_size: 1
}
component {
name: "tagger"
transition_system {
registered_name: "tagger"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "tag-map"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexicon/tag-map"
file_format: "text"
record_format: ""
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
linked_feature {
name: "convnet"
fml: "input.focus"
embedding_dim: -1
size: 1
source_component: "convnet"
source_translator: "identity"
source_layer: "conv0_logits"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 45
attention_component: ""
component_builder {
registered_name: "bulk_component.BulkAnnotatorComponentBuilder"
}
training_beam_size: 1
inference_beam_size: 1
}
research/syntaxnet/dragnn/python/perf_test_data/params
deleted
100644 → 0
View file @
a84e1ef9
File deleted
research/syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
deleted
100644 → 0
View file @
a84e1ef9
File deleted
research/syntaxnet/dragnn/python/runtime_support.py
0 → 100644
View file @
80178fc6
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for supporting the DRAGNN runtime from the TF side."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
re
import
tensorflow
as
tf
from
dragnn.python
import
network_units
from
syntaxnet.util
import
check
def
add_hooks
(
component
,
cell_subgraph_spec
):
"""Adds "hook" nodes to the graph, for use by the runtime.
The runtime hook nodes are not on the path to any required output, and will
not be called when running TF-based DRAGNN. As long as the TF graph is not
pruned, however, the DRAGNN runtime can call them.
Runtime hook nodes can perform any TF computation. Possible uses include:
* Applying stable names to existing tensors (e.g., via tf.identity()).
* Converting variable data from a TF-friendly or training-friendly format
into a runtime-friendly format.
NB: There are several restrictions on the context in which this function is
called. In brief, call ComponentBuilderBase._add_runtime_hooks() at the top
of each ComponentBuilderSubclass.build_*() method. In detail, this:
* Must be called in the variable scope of the |component|, so variable
references in component.get_variable() work.
* Must be called, possibly transitively, from one of the |component|'s
build_*() methods, so MasterBuilder.read_from_avg is set properly for
component.get_variable().
* Must not be called from within a tf.while_loop(), or the hook nodes will
not work. In particular, NetworkUnitInterface.create() is called from a
tf.while_loop() in DynamicComponentBuilder.
Args:
component: Component for which to add hooks.
cell_subgraph_spec: CellSubgraphSpec for which to add hooks.
"""
for
channel_id
,
feature_spec
in
enumerate
(
component
.
spec
.
linked_feature
):
if
feature_spec
.
embedding_dim
!=
-
1
:
_add_hooks_for_linked_embedding_matrix
(
component
,
channel_id
)
for
channel_id
,
feature_spec
in
enumerate
(
component
.
spec
.
fixed_feature
):
if
feature_spec
.
embedding_dim
!=
-
1
:
_add_hooks_for_fixed_embedding_matrix
(
component
,
channel_id
)
for
params
in
component
.
network
.
params
:
_add_hooks_for_trainable_params
(
component
,
params
)
for
parameter_getter
in
component
.
network
.
derived_params
:
_add_hooks_for_derived_parameter
(
parameter_getter
)
_add_hook_node
(
tf
.
constant
(
cell_subgraph_spec
.
SerializeToString
(),
tf
.
string
),
'{}/EXPORT/CellSubgraphSpec'
.
format
(
component
.
name
))
def
_blocked_and_dtype_transformations
(
tensor
):
"""Yields variants of a tensor, for standard blocking/dtype variants.
Args:
tensor (tf.Tensor): Input tensor.
Yields:
(modified_tensor, suffix) pairs, where `modified_tensor` is a transformed
version of the input, and `suffix` is a string like "/blocked32".
"""
for
blocking_level
in
(
32
,
48
):
blocked
=
make_padded_blocked_matrix
(
tensor
,
blocking_level
)
bfloat16_blocked
=
tf
.
to_bfloat16
(
bfloat16_permutation
(
blocked
))
yield
blocked
,
'/blocked{}'
.
format
(
blocking_level
)
yield
bfloat16_blocked
,
'/blocked{}/bfloat16'
.
format
(
blocking_level
)
def
_add_hooks_for_linked_embedding_matrix
(
component
,
channel_id
):
"""Adds runtime hooks for a linked embedding matrix.
The computation performed by network_units.pass_through_embedding_matrix() is
equivalent to the following:
for i in range(stride):
if step_idx[i] == -1:
outputs[i,:] = out_of_bounds_vector
else:
outputs[i,:] = tf.matmul(act_block[i,:], weight_matrix)
The implementation uses clever arithmetic to do this in one matmul per batch.
Specifically, the weight_matrix is extended with the out_of_bounds_vector and
each activation vector is extended with a 0/1 out-of-bounds indicator. Then,
multiplying the two suffices, assuming that act_block[i,:] is set to zero for
out-of-bounds links.
While this works well for training and high-throughput batched computation, it
isn't the best for the runtime:
* Appending a 0/1 indicator to the input activation vector requires a copy.
Ideally, we could use the input activation vector by reference alone.
* In order to access to the |out_of_bounds_vector| as a contiguous array,
the runtime must load the linked embedding matrix in row-major format,
which may not be the fastest format for arithmetic.
* The dimensions of the extended-by-1 matrix and vector are likely to be
pessimal. Most dimensions are specified as 2^n, and adding one element
produces maximal padding on the trailing elements, which in turn wastes
memory, reduces cache utilization, etc.
Therefore, in the runtime we split the linked embedding matrix into a separate
weight matrix and out-of-bounds vector.
Args:
component: Component for which to add hooks.
channel_id: Linked embedding channel for which to add hooks.
"""
var_name
=
network_units
.
linked_embeddings_name
(
channel_id
)
extended_matrix
=
component
.
get_variable
(
var_name
)
extended_num_rows
=
tf
.
shape
(
extended_matrix
)[
0
]
matrix
,
vector
=
tf
.
split
(
extended_matrix
,
[
extended_num_rows
-
1
,
1
],
0
)
transposed
=
tf
.
transpose
(
matrix
)
hook_name
=
functools
.
partial
(
_get_hook_name
,
component
,
var_name
)
_add_hook_node
(
matrix
,
hook_name
(
'/weights'
))
_add_hook_node
(
transposed
,
hook_name
(
'/weights/transposed'
))
# Add blocked versions of the matrix and its transpose.
for
blocked
,
blocked_suffix
in
_blocked_and_dtype_transformations
(
matrix
):
blocked_name
=
hook_name
(
'/weights/matrix'
+
blocked_suffix
)
_add_hook_node
(
blocked
,
blocked_name
)
for
blocked
,
blocked_suffix
in
_blocked_and_dtype_transformations
(
transposed
):
blocked_name
=
hook_name
(
'/weights/transposed'
+
blocked_suffix
)
_add_hook_node
(
blocked
,
blocked_name
)
# Add shape and out-of-bounds information.
_add_hook_node
(
tf
.
shape
(
transposed
),
hook_name
(
'/weights/transposed/shape'
))
_add_hook_node
(
vector
,
_get_hook_name
(
component
,
var_name
,
'/out_of_bounds'
))
def
_add_hooks_for_fixed_embedding_matrix
(
component
,
channel_id
):
"""Adds runtime hooks for a fixed embedding matrix.
The hooks remove the last row from the embedding matrix. The extra row was
probably intended for out-of-vocabulary items, but those are handled in the
feature system and the extra row is never used.
Args:
component: Component for which to add hooks.
channel_id: Fixed embedding channel for which to add hooks.
"""
var_name
=
network_units
.
fixed_embeddings_name
(
channel_id
)
extended_matrix
=
component
.
get_variable
(
var_name
)
extended_num_rows
=
tf
.
shape
(
extended_matrix
)[
0
]
matrix
=
tf
.
slice
(
extended_matrix
,
[
0
,
0
],
[
extended_num_rows
-
1
,
-
1
])
# TODO(googleuser): If the extra row is removed from the variable itself, remove
# the tf.slice() and point the hook directly at the variable.
_add_hook_node
(
matrix
,
_get_hook_name
(
component
,
var_name
,
'/trimmed'
))
def
_add_hooks_for_derived_parameter
(
getter
):
"""Adds hooks for derived parameters.
Derived parameters are typically slight format modifications of regular
parameters, exposed because doing the computation in Python is more convenient
than as VariableStore wrappers.
Args:
getter: Function which, when called, will return the derived tensor.
"""
parameter
=
getter
()
full_name
=
parameter
.
op
.
name
def
_hook_name
(
base_name
):
"""Returns a hook node name constructed from a base name."""
return
full_name
+
base_name
if
parameter
.
shape
.
ndims
!=
2
:
tf
.
logging
.
info
(
'Not adding matrix hooks for derived parameter %s'
,
full_name
)
return
_add_hook_node
(
tf
.
transpose
(
parameter
),
_hook_name
(
'/transposed'
))
for
blocked
,
blocked_suffix
in
_blocked_and_dtype_transformations
(
parameter
):
_add_hook_node
(
blocked
,
_hook_name
(
'/matrix'
+
blocked_suffix
))
def
_add_hooks_for_trainable_params
(
component
,
params
):
"""Adds runtime hooks for a variable of trainable parameters.
Ignores parameters that are not statically-deducible as matrices.
Args:
component: Component for which to add hooks.
params: Variable for which to add hooks.
"""
full_name
=
params
.
op
.
name
matrix
=
component
.
get_variable
(
var_params
=
params
)
# Only add hooks for tensors that are statically-deducible as matrices.
if
params
.
shape
.
ndims
!=
2
:
tf
.
logging
.
info
(
'Not adding hooks for trainable params %s'
,
full_name
)
return
# Infer the suffix to append to variable names, if any, based on whether the
# possibly-averaged |matrix| is named differently than the |params|.
suffix
=
re
.
sub
(
'^'
+
re
.
escape
(
full_name
),
''
,
matrix
.
op
.
name
)
check
.
Ne
(
suffix
,
matrix
.
op
.
name
,
'Failed to find suffix for params %s'
%
full_name
)
def
_hook_name
(
base_name
):
"""Returns a hook node name constructed from a base name."""
return
full_name
+
base_name
+
suffix
# Add the matrix and its transpose.
transposed
=
tf
.
transpose
(
matrix
)
_add_hook_node
(
matrix
,
_hook_name
(
'/matrix'
))
_add_hook_node
(
transposed
,
_hook_name
(
'/transposed'
))
# Add blocked versions of the matrix and its transpose.
for
blocked
,
blocked_suffix
in
_blocked_and_dtype_transformations
(
matrix
):
_add_hook_node
(
blocked
,
_hook_name
(
'/matrix'
+
blocked_suffix
))
for
blocked
,
blocked_suffix
in
_blocked_and_dtype_transformations
(
transposed
):
_add_hook_node
(
blocked
,
_hook_name
(
'/transposed'
+
blocked_suffix
))
# Also add hooks for the original shapes, which are obscured by padding.
_add_hook_node
(
tf
.
shape
(
matrix
),
_hook_name
(
'/matrix/shape'
))
_add_hook_node
(
tf
.
shape
(
transposed
),
_hook_name
(
'/transposed/shape'
))
def
make_padded_blocked_matrix
(
matrix
,
block_size
):
"""Converts a matrix to padded column-blocked format.
For example, given a [64,127] matrix and block_size=16, this function returns
an [8,64,16] tensor where the 8 inner sub-matrices, when concatenated left to
right, re-constitute the original matrix. Note that the 8th sub-matrix has a
final column of padding.
Args:
matrix: The matrix to convert.
block_size: The number of columns per block.
Returns:
Padded column-blocked matrix.
"""
shape
=
tf
.
shape
(
matrix
)
num_rows
=
shape
[
0
]
num_columns
=
shape
[
1
]
# Compute the amount of padding and resulting number of blocks.
last_block_size
=
num_columns
%
block_size
padding_size
=
(
block_size
-
last_block_size
)
%
block_size
num_blocks
=
(
num_columns
+
padding_size
)
//
block_size
# Somehow the obvious approach based on tf.split() and tf.stack() doesn't work
# (seems that the number of splits needs to be statically-known), but this
# alternative based on tf.transpose() and tf.reshape() does. Continuing the
# example from the docstring...
padded
=
tf
.
pad
(
matrix
,
[[
0
,
0
],
[
0
,
padding_size
]])
# [64,127] => [64,128]
transposed
=
tf
.
transpose
(
padded
)
# => [128,64]
blocked
=
tf
.
reshape
(
transposed
,
[
num_blocks
,
block_size
,
num_rows
])
# => [8,16,64]
return
tf
.
transpose
(
blocked
,
[
0
,
2
,
1
])
# => [8,64,16]
def
bfloat16_permutation
(
tensor
):
"""Permutes values in the last dimension of a tensor.
This permutation is used so that we can directly use unpacklo/unpackhi AVX2
instructions on the matrix coefficients. These unpacking instructions
effectively permute the data. See FastUnpackPermutation() and
AvxFloatVecArray::Load(const TruncatedFloat16 *) in avx_vector_array.h for
more details.
Args:
tensor: Blocked matrix, the result of make_padded_blocked_matrix(). Must
have its last dimension a multiple of 16.
Returns:
Permuted matrix, suitable for calling tf.to_bfloat16() on. For testing
convenience we don't do so in this method.
Raises:
ValueError: If the matrix's block dimension is not a multiple of 16.
"""
orig_shape
=
tensor
.
shape
if
tensor
.
shape
[
-
1
]
%
16
!=
0
:
raise
ValueError
(
'Bad block dimension, must be divisible by 16'
)
permutation
=
[
0
,
1
,
2
,
3
,
8
,
9
,
10
,
11
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
]
indices
=
tf
.
constant
(
[
16
*
(
i
//
16
)
+
permutation
[
i
%
16
]
for
i
in
xrange
(
orig_shape
[
-
1
])])
return
tf
.
gather
(
tensor
,
indices
,
axis
=
len
(
orig_shape
)
-
1
)
def
_get_hook_name
(
component
,
variable_name
,
suffix
):
"""Builds the name of a hook node.
Specifically, the name of the hook node is:
<component.name>/<variable_name><suffix><remainder>
where <remainder> is whatever follows <variable_name> in the name of the op
that produces the named variable. Recall that component.get_variable() may
return either the original variable or its moving average. These might have
names like:
foo_component/bar_variable
foo_component/bar_variable/ExponentialMovingAverage
In the examples above, the <remainder> is "" for the original variable and
"/ExponentialMovingAverage" for its moving average. Calling this function
with suffix="/baz_suffix" in either case would add hook nodes named:
foo_component/bar_variable/baz_suffix
foo_component/bar_variable/baz_suffix/ExponentialMovingAverage
Note that the suffix is inserted after the variable name, not necessarily at
the end of the entire op name.
Args:
component: Component that the hook node belongs to.
variable_name: Variable that the hook node name is based on.
suffix: Suffix to append to the variable name.
Returns:
Name of the hook node.
"""
variable
=
component
.
get_variable
(
variable_name
)
full_name
=
variable
.
op
.
name
prefix
=
component
.
name
+
'/'
+
variable_name
hook_name
=
re
.
sub
(
'^'
+
re
.
escape
(
prefix
),
prefix
+
suffix
,
full_name
)
# If re.sub() did not match anything, it returns the unmodified input (i.e.,
# |full_name|). Enforce that some change was made.
check
.
Ne
(
full_name
,
hook_name
,
'Failed to match expected variable prefix "{}" in variable "{}"'
.
format
(
prefix
,
full_name
))
return
hook_name
def
_add_hook_node
(
tensor
,
fully_qualified_name
):
"""Adds a hook node that outputs a tensor with a fully-qualified name."""
# Since the name is fully-qualified, insert the hook node into the top-level
# name scope.
with
tf
.
name_scope
(
None
):
tf
.
identity
(
tensor
,
name
=
fully_qualified_name
)
research/syntaxnet/dragnn/python/runtime_support_test.py
0 → 100644
View file @
80178fc6
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the runtime support utils."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
dragnn.protos
import
export_pb2
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
network_units
from
dragnn.python
import
runtime_support
class
MockNetwork
(
object
):
"""Mock for tests."""
def
__init__
(
self
):
self
.
params
=
[
tf
.
get_variable
(
'rank2'
,
[
64
,
127
],
tf
.
float32
),
tf
.
get_variable
(
'rank3'
,
[
64
,
127
,
250
],
tf
.
float32
)
]
self
.
derived_params
=
[
self
.
_fake_derived_vector
,
self
.
_fake_derived_parameter
]
def
_fake_derived_vector
(
self
):
value
=
tf
.
constant
([
1
,
2
,
3
],
dtype
=
tf
.
float32
)
with
tf
.
name_scope
(
None
):
return
tf
.
identity
(
value
,
name
=
'derived/vector'
)
def
_fake_derived_parameter
(
self
):
# Use absolute scoping to put the derived parameter in the same namespace.
base_name
=
self
.
params
[
0
].
op
.
name
.
rsplit
(
'/'
,
1
)[
0
]
with
tf
.
name_scope
(
None
):
return
tf
.
concat
(
[
self
.
params
[
0
],
self
.
params
[
0
]],
axis
=
0
,
name
=
'{}/derived'
.
format
(
base_name
))
class
MockComponent
(
object
):
"""Mock for tests."""
def
__init__
(
self
):
self
.
name
=
'test_component'
self
.
spec
=
spec_pb2
.
ComponentSpec
()
with
tf
.
variable_scope
(
self
.
name
):
self
.
network
=
MockNetwork
()
def
get_variable
(
self
,
var_name
=
None
,
var_params
=
None
):
if
var_name
:
return
tf
.
get_variable
(
var_name
)
else
:
return
var_params
class
RuntimeSupportTest
(
tf
.
test
.
TestCase
):
"""Testing rig."""
def
testAddLinkedHooks
(
self
):
component
=
MockComponent
()
link0
=
component
.
spec
.
linked_feature
.
add
()
link1
=
component
.
spec
.
linked_feature
.
add
()
link0
.
embedding_dim
=
-
1
# direct link
link1
.
embedding_dim
=
32
# transformed link
link0_matrix_name
=
network_units
.
linked_embeddings_name
(
0
)
link1_matrix_name
=
network_units
.
linked_embeddings_name
(
1
)
with
self
.
test_session
()
as
session
:
graph
=
session
.
graph
# Create linked embedding matrices. Only channel 1 uses one.
with
tf
.
variable_scope
(
component
.
name
):
tf
.
get_variable
(
link1_matrix_name
,
shape
=
[
64
+
1
,
32
],
dtype
=
tf
.
float32
)
# Add hooks. This should ignore channel 0 and add hooks for channel 1.
with
tf
.
variable_scope
(
component
.
name
,
reuse
=
True
):
runtime_support
.
add_hooks
(
component
,
export_pb2
.
CellSubgraphSpec
())
# Check that no hooks were added for channel 0.
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/weights:0'
.
format
(
component
.
name
,
link0_matrix_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed:0'
.
format
(
component
.
name
,
link0_matrix_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/shape:0'
.
format
(
component
.
name
,
link0_matrix_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/blocked32:0'
.
format
(
component
.
name
,
link0_matrix_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/blocked48:0'
.
format
(
component
.
name
,
link0_matrix_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/out_of_bounds:0'
.
format
(
component
.
name
,
link0_matrix_name
))
# Get the hooks added for channel 1.
weights
=
graph
.
get_tensor_by_name
(
'{}/{}/weights:0'
.
format
(
component
.
name
,
link1_matrix_name
))
transposed
=
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed:0'
.
format
(
component
.
name
,
link1_matrix_name
))
transposed_shape
=
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/shape:0'
.
format
(
component
.
name
,
link1_matrix_name
))
transposed32
=
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/blocked32:0'
.
format
(
component
.
name
,
link1_matrix_name
))
transposed48
=
graph
.
get_tensor_by_name
(
'{}/{}/weights/transposed/blocked48:0'
.
format
(
component
.
name
,
link1_matrix_name
))
out_of_bounds
=
graph
.
get_tensor_by_name
(
'{}/{}/out_of_bounds:0'
.
format
(
component
.
name
,
link1_matrix_name
))
# Check dimensions of the hooks.
tf
.
global_variables_initializer
().
run
()
self
.
assertAllEqual
(
tf
.
shape
(
weights
).
eval
(),
[
64
,
32
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed
).
eval
(),
[
32
,
64
])
self
.
assertAllEqual
(
transposed_shape
.
eval
(),
[
32
,
64
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed32
).
eval
(),
[
2
,
32
,
32
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed48
).
eval
(),
[
2
,
32
,
48
])
self
.
assertAllEqual
(
tf
.
shape
(
out_of_bounds
).
eval
(),
[
1
,
32
])
def
testAddFixedHooks
(
self
):
component
=
MockComponent
()
fixed0
=
component
.
spec
.
fixed_feature
.
add
()
fixed1
=
component
.
spec
.
fixed_feature
.
add
()
fixed0
.
embedding_dim
=
-
1
fixed1
.
embedding_dim
=
32
fixed0
.
vocabulary_size
=
100
fixed1
.
vocabulary_size
=
1000
fixed0_matrix_name
=
network_units
.
fixed_embeddings_name
(
0
)
fixed1_matrix_name
=
network_units
.
fixed_embeddings_name
(
1
)
with
self
.
test_session
()
as
session
:
graph
=
session
.
graph
# Create fixed embedding matrices. Only channel 1 uses one.
with
tf
.
variable_scope
(
component
.
name
):
tf
.
get_variable
(
fixed1_matrix_name
,
shape
=
[
1000
+
1
,
32
],
dtype
=
tf
.
float32
)
# Add hooks. This should ignore channel 0 and add hooks for channel 1.
with
tf
.
variable_scope
(
component
.
name
,
reuse
=
True
):
runtime_support
.
add_hooks
(
component
,
export_pb2
.
CellSubgraphSpec
())
# Check that no hooks were added for channel 0.
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/trimmed:0'
.
format
(
component
.
name
,
fixed0_matrix_name
))
# Get the hooks added for channel 1.
trimmed
=
graph
.
get_tensor_by_name
(
'{}/{}/trimmed:0'
.
format
(
component
.
name
,
fixed1_matrix_name
))
# Check dimensions of the hooks.
tf
.
global_variables_initializer
().
run
()
self
.
assertAllEqual
(
tf
.
shape
(
trimmed
).
eval
(),
[
1000
,
32
])
def
testAddParamsHooks
(
self
):
component
=
MockComponent
()
rank2_name
=
'rank2'
rank3_name
=
'rank3'
with
self
.
test_session
()
as
session
:
graph
=
session
.
graph
# Add hooks. This should add hooks for all rank-2 params.
with
tf
.
variable_scope
(
component
.
name
,
reuse
=
True
):
runtime_support
.
add_hooks
(
component
,
export_pb2
.
CellSubgraphSpec
())
# Check that no hooks were added for the rank-3 params.
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/matrix:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/transposed:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked32:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked48:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/transposed/blocked32:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/transposed/blocked48:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/matrix/shape:0'
.
format
(
component
.
name
,
rank3_name
))
with
self
.
assertRaises
(
KeyError
):
graph
.
get_tensor_by_name
(
'{}/{}/transposed/shape:0'
.
format
(
component
.
name
,
rank3_name
))
# Get the hooks added for each variable.
matrix
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix:0'
.
format
(
component
.
name
,
rank2_name
))
transposed
=
graph
.
get_tensor_by_name
(
'{}/{}/transposed:0'
.
format
(
component
.
name
,
rank2_name
))
matrix32
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked32:0'
.
format
(
component
.
name
,
rank2_name
))
matrix48
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked48:0'
.
format
(
component
.
name
,
rank2_name
))
transposed32
=
graph
.
get_tensor_by_name
(
'{}/{}/transposed/blocked32:0'
.
format
(
component
.
name
,
rank2_name
))
transposed48
=
graph
.
get_tensor_by_name
(
'{}/{}/transposed/blocked48:0'
.
format
(
component
.
name
,
rank2_name
))
matrix_shape
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix/shape:0'
.
format
(
component
.
name
,
rank2_name
))
transposed_shape
=
graph
.
get_tensor_by_name
(
'{}/{}/transposed/shape:0'
.
format
(
component
.
name
,
rank2_name
))
# Check dimensions of the hooks.
tf
.
global_variables_initializer
().
run
()
self
.
assertAllEqual
(
tf
.
shape
(
matrix
).
eval
(),
[
64
,
127
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed
).
eval
(),
[
127
,
64
])
self
.
assertAllEqual
(
matrix_shape
.
eval
(),
[
64
,
127
])
self
.
assertAllEqual
(
transposed_shape
.
eval
(),
[
127
,
64
])
self
.
assertAllEqual
(
tf
.
shape
(
matrix32
).
eval
(),
[
4
,
64
,
32
])
self
.
assertAllEqual
(
tf
.
shape
(
matrix48
).
eval
(),
[
3
,
64
,
48
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed32
).
eval
(),
[
2
,
127
,
32
])
self
.
assertAllEqual
(
tf
.
shape
(
transposed48
).
eval
(),
[
2
,
127
,
48
])
def
testAddDerivedParamHooks
(
self
):
component
=
MockComponent
()
derived_name
=
'derived'
with
self
.
test_session
()
as
session
:
graph
=
session
.
graph
# Add hooks.
with
tf
.
variable_scope
(
component
.
name
,
reuse
=
True
):
runtime_support
.
add_hooks
(
component
,
export_pb2
.
CellSubgraphSpec
())
session
.
run
(
tf
.
global_variables_initializer
())
# Get hooks for the derived vector.
vector
=
graph
.
get_tensor_by_name
(
'derived/vector:0'
)
self
.
assertEqual
(
vector
.
shape
,
(
3
,))
# Get the hooks for the derived variable.
matrix
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked32:0'
.
format
(
component
.
name
,
derived_name
))
self
.
assertAllEqual
(
tf
.
shape
(
matrix
).
eval
(),
[
4
,
128
,
32
])
# Check the bfloat16 version. It should have the same shape.
bfloat16_matrix
=
graph
.
get_tensor_by_name
(
'{}/{}/matrix/blocked32/bfloat16:0'
.
format
(
component
.
name
,
derived_name
))
self
.
assertAllEqual
(
tf
.
shape
(
bfloat16_matrix
).
eval
(),
[
4
,
128
,
32
])
def
testMakePaddedBlockedMatrix
(
self
):
with
self
.
test_session
():
matrix
=
[[
1
,
2
,
3
,
4
,
5
],
[
6
,
7
,
8
,
9
,
10
],
[
11
,
12
,
13
,
14
,
15
],
[
16
,
17
,
18
,
19
,
20
]]
expected_blocked
=
[[[
1
,
2
],
[
6
,
7
],
[
11
,
12
],
[
16
,
17
]],
[[
3
,
4
],
[
8
,
9
],
[
13
,
14
],
[
18
,
19
]],
[[
5
,
0
],
[
10
,
0
],
[
15
,
0
],
[
20
,
0
]]]
matrix
=
tf
.
constant
(
matrix
,
tf
.
float32
)
actual_blocked
=
runtime_support
.
make_padded_blocked_matrix
(
matrix
,
2
)
self
.
assertAllEqual
(
actual_blocked
.
eval
(),
expected_blocked
)
def
testBfloat16Permutation
(
self
):
with
self
.
test_session
():
matrix
=
[
list
(
range
(
16
))]
expected_permuted
=
[[
0
,
1
,
2
,
3
,
8
,
9
,
10
,
11
,
4
,
5
,
6
,
7
,
12
,
13
,
14
,
15
]]
matrix
=
tf
.
constant
(
matrix
,
tf
.
float32
)
actual_permuted
=
runtime_support
.
bfloat16_permutation
(
matrix
)
self
.
assertAllEqual
(
actual_permuted
.
eval
(),
expected_permuted
)
def
testLargerBfloat16Permutation
(
self
):
with
self
.
test_session
()
as
session
:
matrix
=
tf
.
random_uniform
((
3
,
4
,
32
))
permuted
=
runtime_support
.
bfloat16_permutation
(
matrix
)
matrix
,
actual_permuted
=
session
.
run
([
matrix
,
permuted
])
# Just check a few items for now, hopefully that's sufficient to ensure
# the permutation is okay.
self
.
assertEqual
(
matrix
[
0
,
0
,
0
],
actual_permuted
[
0
,
0
,
0
])
self
.
assertEqual
(
matrix
[
0
,
0
,
1
],
actual_permuted
[
0
,
0
,
1
])
self
.
assertEqual
(
matrix
[
1
,
1
,
16
],
actual_permuted
[
1
,
1
,
16
])
self
.
assertEqual
(
matrix
[
2
,
0
,
4
],
actual_permuted
[
2
,
0
,
8
])
self
.
assertEqual
(
matrix
[
2
,
0
,
5
],
actual_permuted
[
2
,
0
,
9
])
self
.
assertEqual
(
matrix
[
2
,
1
,
8
],
actual_permuted
[
2
,
1
,
4
])
self
.
assertEqual
(
matrix
[
2
,
1
,
8
+
16
],
actual_permuted
[
2
,
1
,
4
+
16
])
def
testAddCellSubgraphSpecHook
(
self
):
component
=
MockComponent
()
cell
=
export_pb2
.
CellSubgraphSpec
()
cell
.
input
.
add
(
name
=
'feature'
,
tensor
=
'feature_tensor'
,
type
=
export_pb2
.
CellSubgraphSpec
.
Input
.
TYPE_FEATURE
)
cell
.
input
.
add
(
name
=
'recurrent'
,
tensor
=
'recurrent_tensor'
,
type
=
export_pb2
.
CellSubgraphSpec
.
Input
.
TYPE_RECURRENT
)
cell
.
output
.
add
(
name
=
'layer_0'
,
tensor
=
'layer_0_tensor'
)
cell
.
output
.
add
(
name
=
'logits'
,
tensor
=
'logits_tensor'
)
with
self
.
test_session
()
as
session
:
graph
=
session
.
graph
# Add hooks for the cell constructed above.
with
tf
.
variable_scope
(
component
.
name
,
reuse
=
True
):
runtime_support
.
add_hooks
(
component
,
cell
)
# Get the hook containing the wire-format proto.
cell_wire_format
=
graph
.
get_tensor_by_name
(
'{}/EXPORT/CellSubgraphSpec:0'
.
format
(
component
.
name
))
# Check that the hook matches the cell.
tf
.
global_variables_initializer
().
run
()
self
.
assertEqual
(
cell_wire_format
.
eval
(),
cell
.
SerializeToString
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/syntaxnet/dragnn/python/sentence_io_test.py
View file @
80178fc6
...
...
@@ -16,30 +16,19 @@
import
os
import
tensorflow
as
tf
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.platform
import
googletest
from
dragnn.python
import
dragnn_ops
from
dragnn.python
import
sentence_io
from
syntaxnet
import
sentence_pb2
FLAGS
=
tf
.
app
.
flags
.
FLAGS
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
from
syntaxnet
import
test_flags
class
ConllSentenceReaderTest
(
t
est_util
.
TensorFlow
TestCase
):
class
ConllSentenceReaderTest
(
t
f
.
test
.
TestCase
):
def
setUp
(
self
):
# This dataset contains 54 sentences.
self
.
filepath
=
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
test_flags
.
source_root
()
,
'syntaxnet/testdata/mini-training-set'
)
self
.
batch_size
=
20
...
...
@@ -82,4 +71,4 @@ class ConllSentenceReaderTest(test_util.TensorFlowTestCase):
if
__name__
==
'__main__'
:
google
test
.
main
()
tf
.
test
.
main
()
research/syntaxnet/dragnn/python/spec_builder.py
View file @
80178fc6
...
...
@@ -15,7 +15,6 @@
"""Utils for building DRAGNN specs."""
from
six.moves
import
xrange
import
tensorflow
as
tf
from
dragnn.protos
import
spec_pb2
...
...
@@ -110,7 +109,9 @@ class ComponentSpecBuilder(object):
if
transition_spec
.
registered_name
==
'arc-standard'
:
return
'shift-reduce-step'
if
transition_spec
.
registered_name
in
(
'shift-only'
,
'tagger'
):
if
transition_spec
.
registered_name
in
(
'shift-only'
,
'tagger'
,
'morpher'
,
'lm-transitions'
,
'dependency-label'
,
'category'
):
if
'left_to_right'
in
transition_spec
.
parameters
:
if
transition_spec
.
parameters
[
'left_to_right'
]
==
'false'
:
return
'reverse-token'
...
...
research/syntaxnet/dragnn/python/spec_builder_test.py
View file @
80178fc6
...
...
@@ -27,15 +27,6 @@ from dragnn.python import spec_builder
from
syntaxnet
import
parser_trainer
FLAGS
=
tf
.
app
.
flags
.
FLAGS
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
class
SpecBuilderTest
(
tf
.
test
.
TestCase
):
...
...
research/syntaxnet/dragnn/python/trainer_lib.py
View file @
80178fc6
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions to build DRAGNN MasterSpecs and schedule model training.
Provides functions to finish a MasterSpec, building required lexicons for it and
...
...
@@ -23,13 +22,12 @@ import random
import
tensorflow
as
tf
from
six.moves
import
xrange
from
tensorflow.core.framework.summary_pb2
import
Summary
from
tensorflow.python.framework
import
errors
from
tensorflow.python.platform
import
gfile
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
from
syntaxnet.util
import
check
def
calculate_component_accuracies
(
eval_res_values
):
...
...
@@ -59,7 +57,9 @@ def annotate_dataset(sess, annotator, eval_corpus):
end
=
min
(
start
+
batch_size
,
len
(
eval_corpus
))
serialized_annotations
=
sess
.
run
(
annotator
[
'annotations'
],
feed_dict
=
{
annotator
[
'input_batch'
]:
eval_corpus
[
start
:
end
]})
feed_dict
=
{
annotator
[
'input_batch'
]:
eval_corpus
[
start
:
end
]
})
assert
len
(
serialized_annotations
)
==
end
-
start
processed
.
extend
(
serialized_annotations
)
tf
.
logging
.
info
(
'Done. Produced %d annotations'
,
len
(
processed
))
...
...
@@ -81,16 +81,60 @@ def get_summary_writer(tensorboard_dir):
return
summary_writer
def
generate_target_per_step_schedule
(
pretrain_steps
,
train_steps
):
"""Generates a sampled training schedule.
Arguments:
pretrain_steps: List, number of pre-training steps per each target.
train_steps: List, number of sampled training steps per each target.
Returns:
Python list of length sum(pretrain_steps + train_steps), containing
target numbers per step.
"""
check
.
Eq
(
len
(
pretrain_steps
),
len
(
train_steps
))
# Arbitrary seed to make sure the return is deterministic.
random
.
seed
(
0x31337
)
tf
.
logging
.
info
(
'Determining the training schedule...'
)
target_per_step
=
[]
for
target_idx
in
xrange
(
len
(
pretrain_steps
)):
target_per_step
+=
[
target_idx
]
*
pretrain_steps
[
target_idx
]
train_steps
=
list
(
train_steps
)
while
sum
(
train_steps
)
>
0
:
step
=
random
.
randint
(
0
,
sum
(
train_steps
)
-
1
)
cumulative_steps
=
0
for
target_idx
in
xrange
(
len
(
train_steps
)):
cumulative_steps
+=
train_steps
[
target_idx
]
if
step
<
cumulative_steps
:
break
assert
train_steps
[
target_idx
]
>
0
train_steps
[
target_idx
]
-=
1
target_per_step
.
append
(
target_idx
)
tf
.
logging
.
info
(
'Training schedule defined!'
)
return
target_per_step
def
run_training_step
(
sess
,
trainer
,
train_corpus
,
batch_size
):
"""Runs a single iteration of train_op on a randomly sampled batch."""
batch
=
random
.
sample
(
train_corpus
,
batch_size
)
sess
.
run
(
trainer
[
'run'
],
feed_dict
=
{
trainer
[
'input_batch'
]:
batch
})
def
run_training
(
sess
,
trainers
,
annotator
,
evaluator
,
pretrain_steps
,
train_steps
,
train_corpus
,
eval_corpus
,
eval_gold
,
batch_size
,
summary_writer
,
report_every
,
saver
,
checkpoint_filename
,
checkpoint_stats
=
None
):
def
run_training
(
sess
,
trainers
,
annotator
,
evaluator
,
pretrain_steps
,
train_steps
,
train_corpus
,
eval_corpus
,
eval_gold
,
batch_size
,
summary_writer
,
report_every
,
saver
,
checkpoint_filename
,
checkpoint_stats
=
None
):
"""Runs multi-task DRAGNN training on a single corpus.
Arguments:
...
...
@@ -117,30 +161,15 @@ def run_training(sess, trainers, annotator, evaluator, pretrain_steps,
checkpoint_filename: File to save checkpoints to.
checkpoint_stats: Stats of checkpoint.
"""
random
.
seed
(
0x31337
)
if
not
checkpoint_stats
:
checkpoint_stats
=
[
0
]
*
(
len
(
train_steps
)
+
1
)
tf
.
logging
.
info
(
'Determining the training schedule...'
)
target_for_step
=
[]
for
target_idx
in
xrange
(
len
(
pretrain_steps
)):
target_for_step
+=
[
target_idx
]
*
pretrain_steps
[
target_idx
]
while
sum
(
train_steps
)
>
0
:
step
=
random
.
randint
(
0
,
sum
(
train_steps
)
-
1
)
cumulative_steps
=
0
for
target_idx
in
xrange
(
len
(
train_steps
)):
cumulative_steps
+=
train_steps
[
target_idx
]
if
step
<
cumulative_steps
:
break
assert
train_steps
[
target_idx
]
>
0
train_steps
[
target_idx
]
-=
1
target_for_step
.
append
(
target_idx
)
tf
.
logging
.
info
(
'Training schedule defined!'
)
target_per_step
=
generate_target_per_step_schedule
(
pretrain_steps
,
train_steps
)
best_eval_metric
=
-
1.0
tf
.
logging
.
info
(
'Starting training...'
)
actual_step
=
sum
(
checkpoint_stats
[
1
:])
for
step
,
target_idx
in
enumerate
(
target_
fo
r_step
):
for
step
,
target_idx
in
enumerate
(
target_
pe
r_step
):
run_training_step
(
sess
,
trainers
[
target_idx
],
train_corpus
,
batch_size
)
checkpoint_stats
[
target_idx
+
1
]
+=
1
if
step
%
100
==
0
:
...
...
research/syntaxnet/dragnn/python/trainer_lib_test.py
0 → 100644
View file @
80178fc6
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dragnn.python.trainer_lib."""
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.platform
import
googletest
from
dragnn.python
import
trainer_lib
class
TrainerLibTest
(
test_util
.
TensorFlowTestCase
):
def
testImmutabilityOfArguments
(
self
):
"""Tests that training schedule generation does not change its arguments."""
pretrain_steps
=
[
1
,
2
,
3
]
train_steps
=
[
5
,
5
,
5
]
trainer_lib
.
generate_target_per_step_schedule
(
pretrain_steps
,
train_steps
)
self
.
assertEqual
(
pretrain_steps
,
[
1
,
2
,
3
])
self
.
assertEqual
(
train_steps
,
[
5
,
5
,
5
])
def
testTrainingScheduleGenerationAndDeterminism
(
self
):
"""Non-trivial schedule, check generation and determinism."""
pretrain_steps
=
[
1
,
2
,
3
]
train_steps
=
[
5
,
5
,
5
]
generated_schedule
=
trainer_lib
.
generate_target_per_step_schedule
(
pretrain_steps
,
train_steps
)
expected_schedule
=
[
0
,
1
,
1
,
2
,
2
,
2
,
1
,
0
,
2
,
1
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
2
,
2
,
2
,
2
]
self
.
assertEqual
(
generated_schedule
,
expected_schedule
)
def
testNoPretrainSteps
(
self
):
"""Edge case, 1 target, no pretrain."""
generated_schedule
=
trainer_lib
.
generate_target_per_step_schedule
([
0
],
[
10
])
expected_schedule
=
[
0
]
*
10
self
.
assertEqual
(
generated_schedule
,
expected_schedule
)
def
testNoTrainSteps
(
self
):
"""Edge case, 1 target, only pretrain."""
generated_schedule
=
trainer_lib
.
generate_target_per_step_schedule
([
10
],
[
0
])
expected_schedule
=
[
0
]
*
10
self
.
assertEqual
(
generated_schedule
,
expected_schedule
)
if
__name__
==
'__main__'
:
googletest
.
main
()
research/syntaxnet/dragnn/python/wrapped_units.py
View file @
80178fc6
...
...
@@ -330,7 +330,7 @@ class LayerNormBasicLSTMNetwork(BaseLSTMNetwork):
def
_cell_closure
(
scope
):
"""Applies the LSTM cell to the current inputs and state."""
return
cell
(
input_tensor
,
state
,
scope
)
return
cell
(
input_tensor
,
state
,
scope
=
scope
)
unused_h
,
state
=
self
.
_apply_with_captured_variables
(
_cell_closure
)
...
...
research/syntaxnet/dragnn/tensorflow_ops.bzl
deleted
100644 → 0
View file @
a84e1ef9
# -*- Python -*-
# Given a source file, generate a test name.
# i.e. "common_runtime/direct_session_test.cc" becomes
# "common_runtime_direct_session_test"
def
src_to_test_name
(
src
):
return
src
.
replace
(
"/"
,
"_"
).
split
(
"."
)[
0
]
# Return the options to use for a C++ library or binary build.
# Uses the ":optmode" config_setting to pick the options.
load
(
"@org_tensorflow//tensorflow/core:platform/default/build_config_root.bzl"
,
"tf_cuda_tests_tags"
,
"tf_sycl_tests_tags"
,
)
load
(
"@local_config_cuda//cuda:build_defs.bzl"
,
"if_cuda"
,
"cuda_default_copts"
)
# List of proto files for android builds
def
tf_android_core_proto_sources
(
core_proto_sources_relative
):
return
[
"@org_tensorflow//tensorflow/core:"
+
p
for
p
in
core_proto_sources_relative
]
# Returns the list of pb.h and proto.h headers that are generated for
# tf_android_core_proto_sources().
def
tf_android_core_proto_headers
(
core_proto_sources_relative
):
return
([
"@org_tensorflow//tensorflow/core/"
+
p
.
replace
(
".proto"
,
".pb.h"
)
for
p
in
core_proto_sources_relative
]
+
[
"@org_tensorflow//tensorflow/core/"
+
p
.
replace
(
".proto"
,
".proto.h"
)
for
p
in
core_proto_sources_relative
])
def
if_android_arm
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android_arm"
:
a
,
"//conditions:default"
:
[],
})
def
if_android_arm64
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android_arm64"
:
a
,
"//conditions:default"
:
[],
})
def
if_not_android
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android"
:
[],
"//conditions:default"
:
a
,
})
def
if_android
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android"
:
a
,
"//conditions:default"
:
[],
})
def
if_ios
(
a
):
return
select
({
"@org_tensorflow//tensorflow:ios"
:
a
,
"//conditions:default"
:
[],
})
def
if_mobile
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android"
:
a
,
"@org_tensorflow//tensorflow:ios"
:
a
,
"//conditions:default"
:
[],
})
def
if_not_mobile
(
a
):
return
select
({
"@org_tensorflow//tensorflow:android"
:
[],
"@org_tensorflow//tensorflow:ios"
:
[],
"//conditions:default"
:
a
,
})
def
if_not_windows
(
a
):
return
select
({
"@org_tensorflow//tensorflow:windows"
:
[],
"//conditions:default"
:
a
,
})
def
if_x86
(
a
):
return
select
({
"@org_tensorflow//tensorflow:linux_x86_64"
:
a
,
"@org_tensorflow//tensorflow:windows"
:
a
,
"//conditions:default"
:
[],
})
def
tf_copts
():
return
([
"-DEIGEN_AVOID_STL_ARRAY"
,
"-Iexternal/gemmlowp"
,
"-Wno-sign-compare"
,
"-fno-exceptions"
,]
+
if_cuda
([
"-DGOOGLE_CUDA=1"
])
+
if_android_arm
([
"-mfpu=neon"
])
+
select
({
"@org_tensorflow//tensorflow:android"
:
[
"-std=c++11"
,
"-DTF_LEAN_BINARY"
,
"-O2"
,
],
"@org_tensorflow//tensorflow:darwin"
:
[],
"@org_tensorflow//tensorflow:windows"
:
[
"/DLANG_CXX11"
,
"/D__VERSION__=
\\\"
MSVC
\\\"
"
,
"/DPLATFORM_WINDOWS"
,
"/DEIGEN_HAS_C99_MATH"
,
"/DTENSORFLOW_USE_EIGEN_THREADPOOL"
,
],
"@org_tensorflow//tensorflow:ios"
:
[
"-std=c++11"
],
"//conditions:default"
:
[
"-pthread"
]}))
def
tf_opts_nortti_if_android
():
return
if_android
([
"-fno-rtti"
,
"-DGOOGLE_PROTOBUF_NO_RTTI"
,
"-DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER"
,
])
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate a library for that file.
def
tf_gen_op_libs
(
op_lib_names
,
deps
=
None
):
# Make library out of each op so it can also be used to generate wrappers
# for various languages.
if
not
deps
:
deps
=
[]
for
n
in
op_lib_names
:
native
.
cc_library
(
name
=
n
+
"_op_lib"
,
copts
=
tf_copts
(),
srcs
=
[
"ops/"
+
n
+
".cc"
],
deps
=
deps
+
[
"@org_tensorflow//tensorflow/core:framework"
],
visibility
=
[
"//visibility:public"
],
alwayslink
=
1
,
linkstatic
=
1
,)
def
tf_gen_op_wrapper_cc
(
name
,
out_ops_file
,
pkg
=
""
,
op_gen
=
"@org_tensorflow//tensorflow/cc:cc_op_gen_main"
,
deps
=
None
,
override_file
=
None
,
include_internal_ops
=
0
):
# Construct an op generator binary for these ops.
tool
=
out_ops_file
+
"_gen_cc"
if
deps
==
None
:
deps
=
[
pkg
+
":"
+
name
+
"_op_lib"
]
native
.
cc_binary
(
name
=
tool
,
copts
=
tf_copts
(),
linkopts
=
[
"-lm"
],
linkstatic
=
1
,
# Faster to link this one-time-use binary dynamically
deps
=
[
op_gen
]
+
deps
)
if
override_file
==
None
:
srcs
=
[]
override_arg
=
","
else
:
srcs
=
[
override_file
]
override_arg
=
"$(location "
+
override_file
+
")"
native
.
genrule
(
name
=
name
+
"_genrule"
,
outs
=
[
out_ops_file
+
".h"
,
out_ops_file
+
".cc"
,
out_ops_file
+
"_internal.h"
,
out_ops_file
+
"_internal.cc"
],
srcs
=
srcs
,
tools
=
[
":"
+
tool
],
cmd
=
(
"$(location :"
+
tool
+
") $(location :"
+
out_ops_file
+
".h) "
+
"$(location :"
+
out_ops_file
+
".cc) "
+
override_arg
+
" "
+
str
(
include_internal_ops
)))
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate individual C++ .cc and .h
# files for each of the ops files mentioned, and then generate a
# single cc_library called "name" that combines all the
# generated C++ code.
#
# For example, for:
# tf_gen_op_wrappers_cc("tf_ops_lib", [ "array_ops", "math_ops" ])
#
#
# This will ultimately generate ops/* files and a library like:
#
# cc_library(name = "tf_ops_lib",
# srcs = [ "ops/array_ops.cc",
# "ops/math_ops.cc" ],
# hdrs = [ "ops/array_ops.h",
# "ops/math_ops.h" ],
# deps = [ ... ])
#
# Plus a private library for the "hidden" ops.
# cc_library(name = "tf_ops_lib_internal",
# srcs = [ "ops/array_ops_internal.cc",
# "ops/math_ops_internal.cc" ],
# hdrs = [ "ops/array_ops_internal.h",
# "ops/math_ops_internal.h" ],
# deps = [ ... ])
# TODO(googleuser): Cleaner approach for hidden ops.
def
tf_gen_op_wrappers_cc
(
name
,
op_lib_names
=
[],
other_srcs
=
[],
other_hdrs
=
[],
pkg
=
""
,
deps
=
[
"@org_tensorflow//tensorflow/cc:ops"
,
"@org_tensorflow//tensorflow/cc:scope"
,
"@org_tensorflow//tensorflow/cc:const_op"
,
],
op_gen
=
"@org_tensorflow//tensorflow/cc:cc_op_gen_main"
,
override_file
=
None
,
include_internal_ops
=
0
,
visibility
=
None
):
subsrcs
=
other_srcs
subhdrs
=
other_hdrs
internalsrcs
=
[]
internalhdrs
=
[]
for
n
in
op_lib_names
:
tf_gen_op_wrapper_cc
(
n
,
"ops/"
+
n
,
pkg
=
pkg
,
op_gen
=
op_gen
,
override_file
=
override_file
,
include_internal_ops
=
include_internal_ops
)
subsrcs
+=
[
"ops/"
+
n
+
".cc"
]
subhdrs
+=
[
"ops/"
+
n
+
".h"
]
internalsrcs
+=
[
"ops/"
+
n
+
"_internal.cc"
]
internalhdrs
+=
[
"ops/"
+
n
+
"_internal.h"
]
native
.
cc_library
(
name
=
name
,
srcs
=
subsrcs
,
hdrs
=
subhdrs
,
deps
=
deps
+
if_not_android
([
"@org_tensorflow//tensorflow/core:core_cpu"
,
"@org_tensorflow//tensorflow/core:framework"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:protos_all_cc"
,
])
+
if_android
([
"@org_tensorflow//tensorflow/core:android_tensorflow_lib"
,
]),
copts
=
tf_copts
(),
alwayslink
=
1
,
visibility
=
visibility
)
native
.
cc_library
(
name
=
name
+
"_internal"
,
srcs
=
internalsrcs
,
hdrs
=
internalhdrs
,
deps
=
deps
+
if_not_android
([
"@org_tensorflow//tensorflow/core:core_cpu"
,
"@org_tensorflow//tensorflow/core:framework"
,
"@org_tensorflow//tensorflow/core:lib"
,
"@org_tensorflow//tensorflow/core:protos_all_cc"
,
])
+
if_android
([
"@org_tensorflow//tensorflow/core:android_tensorflow_lib"
,
]),
copts
=
tf_copts
(),
alwayslink
=
1
,
visibility
=
[
"@org_tensorflow//tensorflow:internal"
])
# Invoke this rule in .../tensorflow/python to build the wrapper library.
def
tf_gen_op_wrapper_py
(
name
,
out
=
None
,
hidden
=
None
,
visibility
=
None
,
deps
=
[],
require_shape_functions
=
False
,
hidden_file
=
None
,
generated_target_name
=
None
):
# Construct a cc_binary containing the specified ops.
tool_name
=
"gen_"
+
name
+
"_py_wrappers_cc"
if
not
deps
:
deps
=
[
"@org_tensorflow//tensorflow/core:"
+
name
+
"_op_lib"
]
native
.
cc_binary
(
name
=
tool_name
,
linkopts
=
[
"-lm"
],
copts
=
tf_copts
(),
linkstatic
=
1
,
# Faster to link this one-time-use binary dynamically
deps
=
([
"@org_tensorflow//tensorflow/core:framework"
,
"@org_tensorflow//tensorflow/python:python_op_gen_main"
]
+
deps
),
visibility
=
[
"@org_tensorflow//tensorflow:internal"
],
)
# Invoke the previous cc_binary to generate a python file.
if
not
out
:
out
=
"ops/gen_"
+
name
+
".py"
if
hidden
:
# `hidden` is a list of op names to be hidden in the generated module.
native
.
genrule
(
name
=
name
+
"_pygenrule"
,
outs
=
[
out
],
tools
=
[
tool_name
],
cmd
=
(
"$(location "
+
tool_name
+
") "
+
","
.
join
(
hidden
)
+
" "
+
(
"1"
if
require_shape_functions
else
"0"
)
+
" > $@"
))
elif
hidden_file
:
# `hidden_file` is file containing a list of op names to be hidden in the
# generated module.
native
.
genrule
(
name
=
name
+
"_pygenrule"
,
outs
=
[
out
],
srcs
=
[
hidden_file
],
tools
=
[
tool_name
],
cmd
=
(
"$(location "
+
tool_name
+
") @$(location "
+
hidden_file
+
") "
+
(
"1"
if
require_shape_functions
else
"0"
)
+
" > $@"
))
else
:
# No ops should be hidden in the generated module.
native
.
genrule
(
name
=
name
+
"_pygenrule"
,
outs
=
[
out
],
tools
=
[
tool_name
],
cmd
=
(
"$(location "
+
tool_name
+
") "
+
(
"1"
if
require_shape_functions
else
"0"
)
+
" > $@"
))
# Make a py_library out of the generated python file.
if
not
generated_target_name
:
generated_target_name
=
name
native
.
py_library
(
name
=
generated_target_name
,
srcs
=
[
out
],
srcs_version
=
"PY2AND3"
,
visibility
=
visibility
,
deps
=
[
"@org_tensorflow//tensorflow/python:framework_for_generated_wrappers"
,
],)
# Define a bazel macro that creates cc_test for tensorflow.
# TODO(googleuser): we need to enable this to work around the hidden symbol
# __cudaRegisterFatBinary error. Need more investigations.
def
tf_cc_test
(
name
,
srcs
,
deps
,
linkstatic
=
0
,
tags
=
[],
data
=
[],
size
=
"medium"
,
suffix
=
""
,
args
=
None
,
linkopts
=
[]):
native
.
cc_test
(
name
=
"%s%s"
%
(
name
,
suffix
),
srcs
=
srcs
,
size
=
size
,
args
=
args
,
copts
=
tf_copts
(),
data
=
data
,
deps
=
deps
,
linkopts
=
[
"-lpthread"
,
"-lm"
]
+
linkopts
,
linkstatic
=
linkstatic
,
tags
=
tags
)
# Part of the testing process requires a distinguishable name for the build
# rules that involve a GPU, even if otherwise identical to the base rule.
def
tf_cc_test_gpu
(
name
,
srcs
,
deps
,
linkstatic
=
0
,
tags
=
[],
data
=
[],
size
=
"medium"
,
suffix
=
""
,
args
=
None
):
tf_cc_test
(
name
,
srcs
,
deps
,
linkstatic
=
linkstatic
,
tags
=
tags
,
data
=
data
,
size
=
size
,
suffix
=
suffix
,
args
=
args
)
def
tf_cuda_cc_test
(
name
,
srcs
=
[],
deps
=
[],
tags
=
[],
data
=
[],
size
=
"medium"
,
linkstatic
=
0
,
args
=
[],
linkopts
=
[]):
tf_cc_test
(
name
=
name
,
srcs
=
srcs
,
deps
=
deps
,
tags
=
tags
+
[
"manual"
],
data
=
data
,
size
=
size
,
linkstatic
=
linkstatic
,
linkopts
=
linkopts
,
args
=
args
)
tf_cc_test
(
name
=
name
,
srcs
=
srcs
,
suffix
=
"_gpu"
,
deps
=
deps
+
if_cuda
([
"@org_tensorflow//tensorflow/core:gpu_runtime"
]),
linkstatic
=
if_cuda
(
1
,
0
),
tags
=
tags
+
tf_cuda_tests_tags
(),
data
=
data
,
size
=
size
,
linkopts
=
linkopts
,
args
=
args
)
# Create a cc_test for each of the tensorflow tests listed in "tests"
def
tf_cc_tests
(
srcs
,
deps
,
name
=
''
,
linkstatic
=
0
,
tags
=
[],
size
=
"medium"
,
args
=
None
,
linkopts
=
[]):
for
src
in
srcs
:
tf_cc_test
(
name
=
src_to_test_name
(
src
),
srcs
=
[
src
],
deps
=
deps
,
linkstatic
=
linkstatic
,
tags
=
tags
,
size
=
size
,
args
=
args
,
linkopts
=
linkopts
)
def
tf_cc_tests_gpu
(
srcs
,
deps
,
name
=
''
,
linkstatic
=
0
,
tags
=
[],
size
=
"medium"
,
args
=
None
):
tf_cc_tests
(
srcs
,
deps
,
linkstatic
,
tags
=
tags
,
size
=
size
,
args
=
args
)
def
tf_cuda_cc_tests
(
srcs
,
deps
,
name
=
''
,
tags
=
[],
size
=
"medium"
,
linkstatic
=
0
,
args
=
None
,
linkopts
=
[]):
for
src
in
srcs
:
tf_cuda_cc_test
(
name
=
src_to_test_name
(
src
),
srcs
=
[
src
],
deps
=
deps
,
tags
=
tags
,
size
=
size
,
linkstatic
=
linkstatic
,
args
=
args
,
linkopts
=
linkopts
)
def
_cuda_copts
():
"""Gets the appropriate set of copts for (maybe) CUDA compilation.
If we're doing CUDA compilation, returns copts for our particular CUDA
compiler. If we're not doing CUDA compilation, returns an empty list.
"""
return
cuda_default_copts
()
+
select
({
"//conditions:default"
:
[],
"@local_config_cuda//cuda:using_nvcc"
:
(
[
"-nvcc_options=relaxed-constexpr"
,
"-nvcc_options=ftz=true"
,
]
),
"@local_config_cuda//cuda:using_clang"
:
(
[
"-fcuda-flush-denormals-to-zero"
,
]
),
})
# Build defs for TensorFlow kernels
# When this target is built using --config=cuda, a cc_library is built
# that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
# libraries needed by GPU kernels.
def
tf_gpu_kernel_library
(
srcs
,
copts
=
[],
cuda_copts
=
[],
deps
=
[],
hdrs
=
[],
**
kwargs
):
copts
=
copts
+
_cuda_copts
()
+
if_cuda
(
cuda_copts
)
+
tf_copts
()
native
.
cc_library
(
srcs
=
srcs
,
hdrs
=
hdrs
,
copts
=
copts
,
deps
=
deps
+
if_cuda
([
"@org_tensorflow//tensorflow/core:cuda"
,
"@org_tensorflow//tensorflow/core:gpu_lib"
,
]),
alwayslink
=
1
,
**
kwargs
)
def
tf_cuda_library
(
deps
=
None
,
cuda_deps
=
None
,
copts
=
None
,
**
kwargs
):
"""Generate a cc_library with a conditional set of CUDA dependencies.
When the library is built with --config=cuda:
- both deps and cuda_deps are used as dependencies
- the cuda runtime is added as a dependency (if necessary)
- The library additionally passes -DGOOGLE_CUDA=1 to the list of copts
Args:
- cuda_deps: BUILD dependencies which will be linked if and only if:
'--config=cuda' is passed to the bazel command line.
- deps: dependencies which will always be linked.
- copts: copts always passed to the cc_library.
- kwargs: Any other argument to cc_library.
"""
if
not
deps
:
deps
=
[]
if
not
cuda_deps
:
cuda_deps
=
[]
if
not
copts
:
copts
=
[]
native
.
cc_library
(
deps
=
deps
+
if_cuda
(
cuda_deps
+
[
"@org_tensorflow//tensorflow/core:cuda"
,
"@local_config_cuda//cuda:cuda_headers"
]),
copts
=
copts
+
if_cuda
([
"-DGOOGLE_CUDA=1"
]),
**
kwargs
)
def
tf_kernel_library
(
name
,
prefix
=
None
,
srcs
=
None
,
gpu_srcs
=
None
,
hdrs
=
None
,
deps
=
None
,
alwayslink
=
1
,
copts
=
tf_copts
(),
**
kwargs
):
"""A rule to build a TensorFlow OpKernel.
May either specify srcs/hdrs or prefix. Similar to tf_cuda_library,
but with alwayslink=1 by default. If prefix is specified:
* prefix*.cc (except *.cu.cc) is added to srcs
* prefix*.h (except *.cu.h) is added to hdrs
* prefix*.cu.cc and prefix*.h (including *.cu.h) are added to gpu_srcs.
With the exception that test files are excluded.
For example, with prefix = "cast_op",
* srcs = ["cast_op.cc"]
* hdrs = ["cast_op.h"]
* gpu_srcs = ["cast_op_gpu.cu.cc", "cast_op.h"]
* "cast_op_test.cc" is excluded
With prefix = "cwise_op"
* srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
* hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
* gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
"cwise_ops.h", "cwise_ops_common.h",
"cwise_ops_gpu_common.cu.h"]
* "cwise_ops_test.cc" is excluded
"""
if
not
srcs
:
srcs
=
[]
if
not
hdrs
:
hdrs
=
[]
if
not
deps
:
deps
=
[]
if
prefix
:
if
native
.
glob
([
prefix
+
"*.cu.cc"
],
exclude
=
[
"*test*"
]):
if
not
gpu_srcs
:
gpu_srcs
=
[]
gpu_srcs
=
gpu_srcs
+
native
.
glob
([
prefix
+
"*.cu.cc"
,
prefix
+
"*.h"
],
exclude
=
[
"*test*"
])
srcs
=
srcs
+
native
.
glob
([
prefix
+
"*.cc"
],
exclude
=
[
"*test*"
,
"*.cu.cc"
])
hdrs
=
hdrs
+
native
.
glob
([
prefix
+
"*.h"
],
exclude
=
[
"*test*"
,
"*.cu.h"
])
cuda_deps
=
[
"@org_tensorflow//tensorflow/core:gpu_lib"
]
if
gpu_srcs
:
for
gpu_src
in
gpu_srcs
:
if
gpu_src
.
endswith
(
".cc"
)
and
not
gpu_src
.
endswith
(
".cu.cc"
):
fail
(
"{} not allowed in gpu_srcs. .cc sources must end with .cu.cc"
.
format
(
gpu_src
))
tf_gpu_kernel_library
(
name
=
name
+
"_gpu"
,
srcs
=
gpu_srcs
,
deps
=
deps
,
**
kwargs
)
cuda_deps
.
extend
([
":"
+
name
+
"_gpu"
])
tf_cuda_library
(
name
=
name
,
srcs
=
srcs
,
hdrs
=
hdrs
,
copts
=
copts
,
cuda_deps
=
cuda_deps
,
linkstatic
=
1
,
# Needed since alwayslink is broken in bazel b/27630669
alwayslink
=
alwayslink
,
deps
=
deps
,
**
kwargs
)
# Bazel rules for building swig files.
def
_py_wrap_cc_impl
(
ctx
):
srcs
=
ctx
.
files
.
srcs
if
len
(
srcs
)
!=
1
:
fail
(
"Exactly one SWIG source file label must be specified."
,
"srcs"
)
module_name
=
ctx
.
attr
.
module_name
src
=
ctx
.
files
.
srcs
[
0
]
inputs
=
depset
([
src
])
inputs
+=
ctx
.
files
.
swig_includes
for
dep
in
ctx
.
attr
.
deps
:
inputs
+=
dep
.
cc
.
transitive_headers
inputs
+=
ctx
.
files
.
_swiglib
inputs
+=
ctx
.
files
.
toolchain_deps
swig_include_dirs
=
depset
(
_get_repository_roots
(
ctx
,
inputs
))
swig_include_dirs
+=
sorted
([
f
.
dirname
for
f
in
ctx
.
files
.
_swiglib
])
args
=
[
"-c++"
,
"-python"
,
"-module"
,
module_name
,
"-o"
,
ctx
.
outputs
.
cc_out
.
path
,
"-outdir"
,
ctx
.
outputs
.
py_out
.
dirname
]
args
+=
[
"-l"
+
f
.
path
for
f
in
ctx
.
files
.
swig_includes
]
args
+=
[
"-I"
+
i
for
i
in
swig_include_dirs
]
args
+=
[
src
.
path
]
outputs
=
[
ctx
.
outputs
.
cc_out
,
ctx
.
outputs
.
py_out
]
ctx
.
action
(
executable
=
ctx
.
executable
.
_swig
,
arguments
=
args
,
inputs
=
list
(
inputs
),
outputs
=
outputs
,
mnemonic
=
"PythonSwig"
,
progress_message
=
"SWIGing "
+
src
.
path
)
return
struct
(
files
=
depset
(
outputs
))
_py_wrap_cc
=
rule
(
attrs
=
{
"srcs"
:
attr
.
label_list
(
mandatory
=
True
,
allow_files
=
True
,
),
"swig_includes"
:
attr
.
label_list
(
cfg
=
"data"
,
allow_files
=
True
,
),
"deps"
:
attr
.
label_list
(
allow_files
=
True
,
providers
=
[
"cc"
],
),
"toolchain_deps"
:
attr
.
label_list
(
allow_files
=
True
,
),
"module_name"
:
attr
.
string
(
mandatory
=
True
),
"py_module_name"
:
attr
.
string
(
mandatory
=
True
),
"_swig"
:
attr
.
label
(
default
=
Label
(
"@swig//:swig"
),
executable
=
True
,
cfg
=
"host"
,
),
"_swiglib"
:
attr
.
label
(
default
=
Label
(
"@swig//:templates"
),
allow_files
=
True
,
),
},
outputs
=
{
"cc_out"
:
"%{module_name}.cc"
,
"py_out"
:
"%{py_module_name}.py"
,
},
implementation
=
_py_wrap_cc_impl
,
)
def
_get_repository_roots
(
ctx
,
files
):
"""Returns abnormal root directories under which files reside.
When running a ctx.action, source files within the main repository are all
relative to the current directory; however, files that are generated or exist
in remote repositories will have their root directory be a subdirectory,
e.g. bazel-out/local-fastbuild/genfiles/external/jpeg_archive. This function
returns the set of these devious directories, ranked and sorted by popularity
in order to hopefully minimize the number of I/O system calls within the
compiler, because includes have quadratic complexity.
"""
result
=
{}
for
f
in
files
:
root
=
f
.
root
.
path
if
root
:
if
root
not
in
result
:
result
[
root
]
=
0
result
[
root
]
-=
1
work
=
f
.
owner
.
workspace_root
if
work
:
if
root
:
root
+=
"/"
root
+=
work
if
root
:
if
root
not
in
result
:
result
[
root
]
=
0
result
[
root
]
-=
1
return
[
k
for
v
,
k
in
sorted
([(
v
,
k
)
for
k
,
v
in
result
.
items
()])]
# Bazel rule for collecting the header files that a target depends on.
def
_transitive_hdrs_impl
(
ctx
):
outputs
=
depset
()
for
dep
in
ctx
.
attr
.
deps
:
outputs
+=
dep
.
cc
.
transitive_headers
return
struct
(
files
=
outputs
)
_transitive_hdrs
=
rule
(
attrs
=
{
"deps"
:
attr
.
label_list
(
allow_files
=
True
,
providers
=
[
"cc"
],
),
},
implementation
=
_transitive_hdrs_impl
,
)
def
transitive_hdrs
(
name
,
deps
=
[],
**
kwargs
):
_transitive_hdrs
(
name
=
name
+
"_gather"
,
deps
=
deps
)
native
.
filegroup
(
name
=
name
,
srcs
=
[
":"
+
name
+
"_gather"
])
# Create a header only library that includes all the headers exported by
# the libraries in deps.
def
cc_header_only_library
(
name
,
deps
=
[],
**
kwargs
):
_transitive_hdrs
(
name
=
name
+
"_gather"
,
deps
=
deps
)
native
.
cc_library
(
name
=
name
,
hdrs
=
[
":"
+
name
+
"_gather"
],
**
kwargs
)
def
tf_custom_op_library_additional_deps
():
return
[
"@protobuf_archive//:protobuf"
,
"//third_party/eigen3"
,
"@org_tensorflow//tensorflow/core:framework_headers_lib"
,
]
# Traverse the dependency graph along the "deps" attribute of the
# target and return a struct with one field called 'tf_collected_deps'.
# tf_collected_deps will be the union of the deps of the current target
# and the tf_collected_deps of the dependencies of this target.
def
_collect_deps_aspect_impl
(
target
,
ctx
):
alldeps
=
depset
()
if
hasattr
(
ctx
.
rule
.
attr
,
"deps"
):
for
dep
in
ctx
.
rule
.
attr
.
deps
:
alldeps
=
alldeps
|
depset
([
dep
.
label
])
if
hasattr
(
dep
,
"tf_collected_deps"
):
alldeps
=
alldeps
|
dep
.
tf_collected_deps
return
struct
(
tf_collected_deps
=
alldeps
)
collect_deps_aspect
=
aspect
(
implementation
=
_collect_deps_aspect_impl
,
attr_aspects
=
[
"deps"
])
def
_dep_label
(
dep
):
label
=
dep
.
label
return
label
.
package
+
":"
+
label
.
name
# This rule checks that the transitive dependencies of targets listed
# in the 'deps' attribute don't depend on the targets listed in
# the 'disallowed_deps' attribute.
def
_check_deps_impl
(
ctx
):
disallowed_deps
=
ctx
.
attr
.
disallowed_deps
for
input_dep
in
ctx
.
attr
.
deps
:
if
not
hasattr
(
input_dep
,
"tf_collected_deps"
):
continue
for
dep
in
input_dep
.
tf_collected_deps
:
for
disallowed_dep
in
disallowed_deps
:
if
dep
==
disallowed_dep
.
label
:
fail
(
_dep_label
(
input_dep
)
+
" cannot depend on "
+
_dep_label
(
disallowed_dep
))
return
struct
()
check_deps
=
rule
(
_check_deps_impl
,
attrs
=
{
"deps"
:
attr
.
label_list
(
aspects
=
[
collect_deps_aspect
],
mandatory
=
True
,
allow_files
=
True
),
"disallowed_deps"
:
attr
.
label_list
(
mandatory
=
True
,
allow_files
=
True
)},
)
# Helper to build a dynamic library (.so) from the sources containing
# implementations of custom ops and kernels.
def
tf_custom_op_library
(
name
,
srcs
=
[],
gpu_srcs
=
[],
deps
=
[]):
cuda_deps
=
[
"@org_tensorflow//tensorflow/core:stream_executor_headers_lib"
,
"@local_config_cuda//cuda:cudart_static"
,
]
deps
=
deps
+
tf_custom_op_library_additional_deps
()
if
gpu_srcs
:
basename
=
name
.
split
(
"."
)[
0
]
native
.
cc_library
(
name
=
basename
+
"_gpu"
,
srcs
=
gpu_srcs
,
copts
=
_cuda_copts
(),
deps
=
deps
+
if_cuda
(
cuda_deps
))
cuda_deps
.
extend
([
":"
+
basename
+
"_gpu"
])
check_deps
(
name
=
name
+
"_check_deps"
,
deps
=
deps
+
if_cuda
(
cuda_deps
),
disallowed_deps
=
[
"@org_tensorflow//tensorflow/core:framework"
,
"@org_tensorflow//tensorflow/core:lib"
])
native
.
cc_binary
(
name
=
name
,
srcs
=
srcs
,
deps
=
deps
+
if_cuda
(
cuda_deps
),
data
=
[
name
+
"_check_deps"
],
copts
=
tf_copts
(),
linkshared
=
1
,
linkopts
=
select
({
"//conditions:default"
:
[
"-lm"
,
],
"@org_tensorflow//tensorflow:darwin"
:
[],
}),
)
def
tf_extension_linkopts
():
return
[]
# No extension link opts
def
tf_extension_copts
():
return
[]
# No extension c opts
def
tf_py_wrap_cc
(
name
,
srcs
,
swig_includes
=
[],
deps
=
[],
copts
=
[],
**
kwargs
):
module_name
=
name
.
split
(
"/"
)[
-
1
]
# Convert a rule name such as foo/bar/baz to foo/bar/_baz.so
# and use that as the name for the rule producing the .so file.
cc_library_name
=
"/"
.
join
(
name
.
split
(
"/"
)[:
-
1
]
+
[
"_"
+
module_name
+
".so"
])
cc_library_pyd_name
=
"/"
.
join
(
name
.
split
(
"/"
)[:
-
1
]
+
[
"_"
+
module_name
+
".pyd"
])
extra_deps
=
[]
_py_wrap_cc
(
name
=
name
+
"_py_wrap"
,
srcs
=
srcs
,
swig_includes
=
swig_includes
,
deps
=
deps
+
extra_deps
,
toolchain_deps
=
[
"//tools/defaults:crosstool"
],
module_name
=
module_name
,
py_module_name
=
name
)
extra_linkopts
=
select
({
"@local_config_cuda//cuda:darwin"
:
[
"-Wl,-exported_symbols_list"
,
"@org_tensorflow//tensorflow:tf_exported_symbols.lds"
],
"@org_tensorflow//tensorflow:windows"
:
[
],
"//conditions:default"
:
[
"-Wl,--version-script"
,
"@org_tensorflow//tensorflow:tf_version_script.lds"
]})
extra_deps
+=
select
({
"@local_config_cuda//cuda:darwin"
:
[
"@org_tensorflow//tensorflow:tf_exported_symbols.lds"
],
"@org_tensorflow//tensorflow:windows"
:
[
],
"//conditions:default"
:
[
"@org_tensorflow//tensorflow:tf_version_script.lds"
]
})
native
.
cc_binary
(
name
=
cc_library_name
,
srcs
=
[
module_name
+
".cc"
],
copts
=
(
copts
+
[
"-Wno-self-assign"
,
"-Wno-sign-compare"
,
"-Wno-write-strings"
]
+
tf_extension_copts
()),
linkopts
=
tf_extension_linkopts
()
+
extra_linkopts
,
linkstatic
=
1
,
linkshared
=
1
,
deps
=
deps
+
extra_deps
)
native
.
genrule
(
name
=
"gen_"
+
cc_library_pyd_name
,
srcs
=
[
":"
+
cc_library_name
],
outs
=
[
cc_library_pyd_name
],
cmd
=
"cp $< $@"
,
)
native
.
py_library
(
name
=
name
,
srcs
=
[
":"
+
name
+
".py"
],
srcs_version
=
"PY2AND3"
,
data
=
select
({
"@org_tensorflow//tensorflow:windows"
:
[
":"
+
cc_library_pyd_name
],
"//conditions:default"
:
[
":"
+
cc_library_name
],
}))
def
py_test
(
deps
=
[],
**
kwargs
):
native
.
py_test
(
deps
=
select
({
"//conditions:default"
:
deps
,
"@org_tensorflow//tensorflow:no_tensorflow_py_deps"
:
[]
}),
**
kwargs
)
def
tf_py_test
(
name
,
srcs
,
size
=
"medium"
,
data
=
[],
main
=
None
,
args
=
[],
tags
=
[],
shard_count
=
1
,
additional_deps
=
[],
flaky
=
0
):
native
.
py_test
(
name
=
name
,
size
=
size
,
srcs
=
srcs
,
main
=
main
,
args
=
args
,
tags
=
tags
,
visibility
=
[
"@org_tensorflow//tensorflow:internal"
],
shard_count
=
shard_count
,
data
=
data
,
deps
=
select
({
"//conditions:default"
:
[
"@org_tensorflow//tensorflow/python:extra_py_tests_deps"
,
"@org_tensorflow//tensorflow/python:gradient_checker"
,
]
+
additional_deps
,
"@org_tensorflow//tensorflow:no_tensorflow_py_deps"
:
[]
}),
flaky
=
flaky
,
srcs_version
=
"PY2AND3"
)
def
cuda_py_test
(
name
,
srcs
,
size
=
"medium"
,
data
=
[],
main
=
None
,
args
=
[],
shard_count
=
1
,
additional_deps
=
[],
tags
=
[],
flaky
=
0
):
test_tags
=
tags
+
tf_cuda_tests_tags
()
tf_py_test
(
name
=
name
,
size
=
size
,
srcs
=
srcs
,
data
=
data
,
main
=
main
,
args
=
args
,
tags
=
test_tags
,
shard_count
=
shard_count
,
additional_deps
=
additional_deps
,
flaky
=
flaky
)
def
sycl_py_test
(
name
,
srcs
,
size
=
"medium"
,
data
=
[],
main
=
None
,
args
=
[],
shard_count
=
1
,
additional_deps
=
[],
tags
=
[],
flaky
=
0
):
test_tags
=
tags
+
tf_sycl_tests_tags
()
tf_py_test
(
name
=
name
,
size
=
size
,
srcs
=
srcs
,
data
=
data
,
main
=
main
,
args
=
args
,
tags
=
test_tags
,
shard_count
=
shard_count
,
additional_deps
=
additional_deps
,
flaky
=
flaky
)
def
py_tests
(
name
,
srcs
,
size
=
"medium"
,
additional_deps
=
[],
data
=
[],
tags
=
[],
shard_count
=
1
,
prefix
=
""
):
for
src
in
srcs
:
test_name
=
src
.
split
(
"/"
)[
-
1
].
split
(
"."
)[
0
]
if
prefix
:
test_name
=
"%s_%s"
%
(
prefix
,
test_name
)
tf_py_test
(
name
=
test_name
,
size
=
size
,
srcs
=
[
src
],
main
=
src
,
tags
=
tags
,
shard_count
=
shard_count
,
data
=
data
,
additional_deps
=
additional_deps
)
def
cuda_py_tests
(
name
,
srcs
,
size
=
"medium"
,
additional_deps
=
[],
data
=
[],
shard_count
=
1
,
tags
=
[],
prefix
=
""
):
test_tags
=
tags
+
tf_cuda_tests_tags
()
py_tests
(
name
=
name
,
size
=
size
,
srcs
=
srcs
,
additional_deps
=
additional_deps
,
data
=
data
,
tags
=
test_tags
,
shard_count
=
shard_count
,
prefix
=
prefix
)
# Creates a genrule named <name> for running tools/proto_text's generator to
# make the proto_text functions, for the protos passed in <srcs>.
#
# Return a struct with fields (hdrs, srcs) containing the names of the
# generated files.
def
tf_generate_proto_text_sources
(
name
,
srcs_relative_dir
,
srcs
):
out_hdrs
=
([
p
.
replace
(
".proto"
,
".pb_text.h"
)
for
p
in
srcs
]
+
[
p
.
replace
(
".proto"
,
".pb_text-impl.h"
)
for
p
in
srcs
])
out_srcs
=
[
p
.
replace
(
".proto"
,
".pb_text.cc"
)
for
p
in
srcs
]
native
.
genrule
(
name
=
name
,
srcs
=
srcs
+
[
"@org_tensorflow//tensorflow/tools/proto_text:placeholder.txt"
],
outs
=
out_hdrs
+
out_srcs
,
cmd
=
"$(location //tensorflow/tools/proto_text:gen_proto_text_functions) "
+
"$(@D) "
+
srcs_relative_dir
+
" $(SRCS)"
,
tools
=
[
"@org_tensorflow//tensorflow/tools/proto_text:gen_proto_text_functions"
],
)
return
struct
(
hdrs
=
out_hdrs
,
srcs
=
out_srcs
)
def
tf_genrule_cmd_append_to_srcs
(
to_append
):
return
(
"cat $(SRCS) > $(@) && "
+
"echo >> $(@) && "
+
"echo "
+
to_append
+
" >> $(@)"
)
def
tf_version_info_genrule
():
native
.
genrule
(
name
=
"version_info_gen"
,
srcs
=
[
"@org_tensorflow//tensorflow/tools/git:gen/spec.json"
,
"@org_tensorflow//tensorflow/tools/git:gen/head"
,
"@org_tensorflow//tensorflow/tools/git:gen/branch_ref"
,
],
outs
=
[
"util/version_info.cc"
],
cmd
=
"$(location //tensorflow/tools/git:gen_git_source.py) --generate $(SRCS)
\"
$@
\"
"
,
local
=
1
,
tools
=
[
"@org_tensorflow//tensorflow/tools/git:gen_git_source.py"
],
)
def
cc_library_with_android_deps
(
deps
,
android_deps
=
[],
common_deps
=
[],
**
kwargs
):
deps
=
if_not_android
(
deps
)
+
if_android
(
android_deps
)
+
common_deps
native
.
cc_library
(
deps
=
deps
,
**
kwargs
)
research/syntaxnet/dragnn/tools/BUILD
View file @
80178fc6
...
...
@@ -9,9 +9,10 @@ py_binary(
name
=
"conll_checkpoint_converter"
,
srcs
=
[
"conll_checkpoint_converter.py"
],
deps
=
[
"//dragnn/protos:spec_p
y_pb2
"
,
"//dragnn/protos:spec_p
b2_py
"
,
"//dragnn/python:dragnn_model_saver_lib"
,
"//dragnn/python:spec_builder"
,
"@absl_py//absl/flags"
,
"@org_tensorflow//tensorflow:tensorflow_py"
,
"@org_tensorflow//tensorflow/core:protos_all_py"
,
],
...
...
@@ -28,6 +29,7 @@ py_binary(
":components"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:spec_builder"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -43,6 +45,7 @@ py_binary(
"//dragnn/python:dragnn_ops"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:spec_builder"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -58,6 +61,7 @@ py_binary(
"//dragnn/python:dragnn_ops"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:spec_builder"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -73,6 +77,7 @@ py_binary(
"//dragnn/python:dragnn_ops"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:spec_builder"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -86,7 +91,8 @@ py_binary(
"//dragnn/python:lexicon"
,
"//dragnn/python:spec_builder"
,
"//dragnn/python:trainer_lib"
,
"//syntaxnet:task_spec_py_pb2"
,
"//syntaxnet:task_spec_pb2_py"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -100,7 +106,9 @@ py_binary(
"//dragnn/python:lexicon"
,
"//dragnn/python:spec_builder"
,
"//dragnn/python:trainer_lib"
,
"//syntaxnet:task_spec_py_pb2"
,
"//syntaxnet:task_spec_pb2_py"
,
"@absl_py//absl:app"
,
"@absl_py//absl/flags"
,
],
)
...
...
@@ -110,13 +118,14 @@ py_binary(
deps
=
[
"//dragnn/core:dragnn_bulk_ops"
,
"//dragnn/core:dragnn_ops"
,
"//dragnn/protos:spec_p
y_pb2
"
,
"//dragnn/protos:spec_p
b2_py
"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:graph_builder"
,
"//dragnn/python:sentence_io"
,
"//dragnn/python:spec_builder"
,
"//dragnn/python:trainer_lib"
,
"//syntaxnet:parser_ops"
,
"@absl_py//absl/flags"
,
"@org_tensorflow//tensorflow:tensorflow_py"
,
"@org_tensorflow//tensorflow/core:protos_all_py"
,
],
...
...
@@ -128,7 +137,7 @@ py_binary(
deps
=
[
"//dragnn/core:dragnn_bulk_ops"
,
"//dragnn/core:dragnn_ops"
,
"//dragnn/protos:spec_p
y_pb2
"
,
"//dragnn/protos:spec_p
b2_py
"
,
"//dragnn/python:dragnn_ops"
,
"//dragnn/python:evaluation"
,
"//dragnn/python:graph_builder"
,
...
...
@@ -136,9 +145,11 @@ py_binary(
"//dragnn/python:spec_builder"
,
"//dragnn/python:trainer_lib"
,
"//syntaxnet:parser_ops"
,
"//syntaxnet:sentence_p
y_pb2
"
,
"//syntaxnet:task_spec_p
y_pb2
"
,
"//syntaxnet:sentence_p
b2_py
"
,
"//syntaxnet:task_spec_p
b2_py
"
,
"//syntaxnet/util:check"
,
"@absl_py//absl:app"
,
"@absl_py//absl/flags"
,
"@org_tensorflow//tensorflow:tensorflow_py"
,
"@org_tensorflow//tensorflow/core:protos_all_py"
,
],
...
...
@@ -190,11 +201,11 @@ py_library(
deps
=
[
"//dragnn/core:dragnn_bulk_ops"
,
"//dragnn/core:dragnn_ops"
,
"//dragnn/protos:spec_p
y_pb2
"
,
"//dragnn/protos:spec_p
b2_py
"
,
"//dragnn/python:graph_builder"
,
"//dragnn/python:sentence_io"
,
"//syntaxnet:parser_ops"
,
"//syntaxnet:sentence_p
y_pb2
"
,
"//syntaxnet:sentence_p
b2_py
"
,
"@org_tensorflow//tensorflow:tensorflow_py"
,
"@org_tensorflow//tensorflow/core:protos_all_py"
,
],
...
...
@@ -215,6 +226,6 @@ py_library(
"//dragnn/python:spec_builder"
,
"//dragnn/python:trainer_lib"
,
"//dragnn/python:visualization"
,
"//syntaxnet:task_spec_p
y_pb2
"
,
"//syntaxnet:task_spec_p
b2_py
"
,
],
)
research/syntaxnet/dragnn/tools/conll_checkpoint_converter.py
View file @
80178fc6
...
...
@@ -25,6 +25,7 @@ from __future__ import division
from
__future__
import
print_function
import
os
from
absl
import
flags
import
tensorflow
as
tf
from
google.protobuf
import
text_format
...
...
@@ -32,7 +33,6 @@ from dragnn.protos import spec_pb2
from
dragnn.python
import
dragnn_model_saver_lib
as
saver_lib
from
dragnn.python
import
spec_builder
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'master_spec'
,
None
,
'Path to task context with '
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment