Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
4364390a
Commit
4364390a
authored
Nov 13, 2017
by
Ivan Bogatyy
Committed by
calberti
Nov 13, 2017
Browse files
Release DRAGNN bulk networks (#2785)
* Release DRAGNN bulk networks
parent
638fd759
Changes
166
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1798 additions
and
226 deletions
+1798
-226
research/syntaxnet/dragnn/python/bulk_component_test.py
research/syntaxnet/dragnn/python/bulk_component_test.py
+11
-3
research/syntaxnet/dragnn/python/component.py
research/syntaxnet/dragnn/python/component.py
+19
-10
research/syntaxnet/dragnn/python/composite_optimizer.py
research/syntaxnet/dragnn/python/composite_optimizer.py
+16
-19
research/syntaxnet/dragnn/python/composite_optimizer_test.py
research/syntaxnet/dragnn/python/composite_optimizer_test.py
+12
-10
research/syntaxnet/dragnn/python/dragnn_model_saver.py
research/syntaxnet/dragnn/python/dragnn_model_saver.py
+85
-0
research/syntaxnet/dragnn/python/dragnn_model_saver_lib.py
research/syntaxnet/dragnn/python/dragnn_model_saver_lib.py
+244
-0
research/syntaxnet/dragnn/python/dragnn_model_saver_lib_test.py
...ch/syntaxnet/dragnn/python/dragnn_model_saver_lib_test.py
+131
-0
research/syntaxnet/dragnn/python/dragnn_ops.py
research/syntaxnet/dragnn/python/dragnn_ops.py
+5
-5
research/syntaxnet/dragnn/python/graph_builder.py
research/syntaxnet/dragnn/python/graph_builder.py
+80
-13
research/syntaxnet/dragnn/python/graph_builder_test.py
research/syntaxnet/dragnn/python/graph_builder_test.py
+66
-16
research/syntaxnet/dragnn/python/lexicon.py
research/syntaxnet/dragnn/python/lexicon.py
+2
-1
research/syntaxnet/dragnn/python/lexicon_test.py
research/syntaxnet/dragnn/python/lexicon_test.py
+8
-6
research/syntaxnet/dragnn/python/network_units.py
research/syntaxnet/dragnn/python/network_units.py
+642
-126
research/syntaxnet/dragnn/python/network_units_test.py
research/syntaxnet/dragnn/python/network_units_test.py
+266
-3
research/syntaxnet/dragnn/python/perf_test_data/master-spec
research/syntaxnet/dragnn/python/perf_test_data/master-spec
+171
-0
research/syntaxnet/dragnn/python/perf_test_data/params
research/syntaxnet/dragnn/python/perf_test_data/params
+0
-0
research/syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
...syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
+0
-0
research/syntaxnet/dragnn/python/render_spec_with_graphviz_test.py
...syntaxnet/dragnn/python/render_spec_with_graphviz_test.py
+1
-1
research/syntaxnet/dragnn/python/sentence_io.py
research/syntaxnet/dragnn/python/sentence_io.py
+30
-7
research/syntaxnet/dragnn/python/sentence_io_test.py
research/syntaxnet/dragnn/python/sentence_io_test.py
+9
-6
No files found.
research/syntaxnet/dragnn/python/bulk_component_test.py
View file @
4364390a
...
...
@@ -41,9 +41,6 @@ from dragnn.python import dragnn_ops
from
dragnn.python
import
network_units
from
syntaxnet
import
sentence_pb2
import
dragnn.python.load_dragnn_cc_impl
import
syntaxnet.load_parser_ops
FLAGS
=
tf
.
app
.
flags
.
FLAGS
...
...
@@ -473,6 +470,17 @@ class BulkComponentTest(test_util.TensorFlowTestCase):
[
2
],
[
-
1
],
[
-
1
],
[
-
1
],
[
2
],
[
3
],
[
-
1
],
[
-
1
]])
def
testBuildLossFailsOnNoExamples
(
self
):
with
tf
.
Graph
().
as_default
():
logits
=
tf
.
constant
([[
0.5
],
[
-
0.5
],
[
0.5
],
[
-
0.5
]])
gold
=
tf
.
constant
([
-
1
,
-
1
,
-
1
,
-
1
])
result
=
bulk_component
.
build_cross_entropy_loss
(
logits
,
gold
)
# Expect loss computation to generate a runtime error due to the gold
# tensor containing no valid examples.
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaises
(
tf
.
errors
.
InvalidArgumentError
):
sess
.
run
(
result
)
if
__name__
==
'__main__'
:
googletest
.
main
()
research/syntaxnet/dragnn/python/component.py
View file @
4364390a
...
...
@@ -46,9 +46,8 @@ class MasterState(object):
"""Simple utility to encapsulate tensors associated with the master state.
Attributes:
handle: string tensor handle to the underlying nlp_saft::dragnn::MasterState
current_batch_size: int tensor containing the batch size following the most
recent MasterState::Reset().
handle: string tensor handle to the underlying ComputeSession.
current_batch_size: int tensor containing the current batch size.
"""
def
__init__
(
self
,
handle
,
current_batch_size
):
...
...
@@ -390,7 +389,11 @@ class DynamicComponentBuilder(ComponentBuilderBase):
correctly predicted actions, and the total number of actions.
"""
logging
.
info
(
'Building component: %s'
,
self
.
spec
.
name
)
with
tf
.
control_dependencies
([
tf
.
assert_equal
(
self
.
training_beam_size
,
1
)]):
# Add 0 to training_beam_size to disable eager static evaluation.
# This is possible because tensorflow's constant_value does not
# propagate arithmetic operations.
with
tf
.
control_dependencies
([
tf
.
assert_equal
(
self
.
training_beam_size
+
0
,
1
)]):
stride
=
state
.
current_batch_size
*
self
.
training_beam_size
cost
=
tf
.
constant
(
0.
)
...
...
@@ -462,10 +465,10 @@ class DynamicComponentBuilder(ComponentBuilderBase):
# Saves completed arrays and return final state and cost.
state
.
handle
=
output
[
0
]
cost
=
output
[
1
]
correct
=
output
[
2
]
total
=
output
[
3
]
arrays
=
output
[
4
:]
cost
=
output
[
1
]
# Store handles to the final output for use in subsequent tasks.
network_state
=
network_states
[
self
.
name
]
...
...
@@ -475,6 +478,9 @@ class DynamicComponentBuilder(ComponentBuilderBase):
array
=
arrays
[
index
])
# Normalize the objective by the total # of steps taken.
# Note: Total could be zero by a number of reasons, including:
# * Oracle labels not being emitted.
# * No steps being taken if component is terminal at the start of a batch.
with
tf
.
control_dependencies
([
tf
.
assert_greater
(
total
,
0
)]):
cost
/=
tf
.
to_float
(
total
)
...
...
@@ -524,11 +530,14 @@ class DynamicComponentBuilder(ComponentBuilderBase):
during_training
=
during_training
)
next_arrays
=
update_tensor_arrays
(
network_tensors
,
arrays
)
with
tf
.
control_dependencies
([
x
.
flow
for
x
in
next_arrays
]):
logits
=
self
.
network
.
get_logits
(
network_tensors
)
logits
=
tf
.
cond
(
self
.
locally_normalize
,
lambda
:
tf
.
nn
.
log_softmax
(
logits
),
lambda
:
logits
)
handle
=
dragnn_ops
.
advance_from_prediction
(
handle
,
logits
,
component
=
self
.
name
)
if
self
.
num_actions
==
1
:
# deterministic; take oracle transition
handle
=
dragnn_ops
.
advance_from_oracle
(
handle
,
component
=
self
.
name
)
else
:
# predict next transition using network logits
logits
=
self
.
network
.
get_logits
(
network_tensors
)
logits
=
tf
.
cond
(
self
.
locally_normalize
,
lambda
:
tf
.
nn
.
log_softmax
(
logits
),
lambda
:
logits
)
handle
=
dragnn_ops
.
advance_from_prediction
(
handle
,
logits
,
component
=
self
.
name
)
return
[
handle
]
+
next_arrays
# Create the TensorArray's to store activations for downstream/recurrent
...
...
research/syntaxnet/dragnn/python/composite_optimizer.py
View file @
4364390a
...
...
@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""An optimizer that switches between several methods."""
import
functools
import
tensorflow
as
tf
from
tensorflow.python.training
import
optimizer
...
...
@@ -28,7 +29,7 @@ class CompositeOptimizer(optimizer.Optimizer):
optimizer2
,
switch
,
use_locking
=
False
,
name
=
'
Composite
'
):
name
=
"
Composite
"
):
"""Construct a new Composite optimizer.
Args:
...
...
@@ -47,24 +48,20 @@ class CompositeOptimizer(optimizer.Optimizer):
self
.
_switch
=
switch
def
apply_gradients
(
self
,
grads_and_vars
,
global_step
=
None
,
name
=
None
):
return
tf
.
cond
(
self
.
_switch
,
lambda
:
self
.
_optimizer1
.
apply_gradients
(
grads_and_vars
,
global_step
,
name
),
lambda
:
self
.
_optimizer2
.
apply_gradients
(
grads_and_vars
,
global_step
,
name
)
)
return
tf
.
cond
(
self
.
_switch
,
functools
.
partial
(
self
.
_optimizer1
.
apply_gradients
,
grads_and_vars
,
global_step
,
name
),
functools
.
partial
(
self
.
_optimizer2
.
apply_gradients
,
grads_and_vars
,
global_step
,
name
))
def
get_slot
(
self
,
var
,
name
):
slot1
=
self
.
_optimizer1
.
get_slot
(
var
,
name
)
slot2
=
self
.
_optimizer2
.
get_slot
(
var
,
name
)
if
slot1
and
slot2
:
raise
LookupError
(
'Slot named %s for variable %s populated for both '
'optimizers'
%
(
name
,
var
.
name
))
return
slot1
or
slot2
if
name
.
startswith
(
"c1-"
):
return
self
.
_optimizer1
.
get_slot
(
var
,
name
[
3
:])
else
:
return
self
.
_optimizer2
.
get_slot
(
var
,
name
[
3
:])
def
get_slot_names
(
self
):
return
sorted
(
self
.
_optimizer1
.
get_slot_names
()
+
self
.
_optimizer2
.
get_slot_names
())
opt1_names
=
self
.
_optimizer1
.
get_slot_names
()
opt2_names
=
self
.
_optimizer2
.
get_slot_names
()
return
sorted
([
"c1-{}"
.
format
(
name
)
for
name
in
opt1_names
]
+
[
"c2-{}"
.
format
(
name
)
for
name
in
opt2_names
])
research/syntaxnet/dragnn/python/composite_optimizer_test.py
View file @
4364390a
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for CompositeOptimizer."""
...
...
@@ -99,8 +98,8 @@ class CompositeOptimizerTest(test_util.TensorFlowTestCase):
optimizer1
=
MockAdamOptimizer
(
0.05
)
optimizer2
=
MockMomentumOptimizer
(
0.05
,
0.5
)
switch
=
tf
.
less
(
step
,
100
)
optimizer
=
composite_optimizer
.
CompositeOptimizer
(
optimizer1
,
optimizer2
,
switch
)
optimizer
=
composite_optimizer
.
CompositeOptimizer
(
optimizer1
,
optimizer2
,
switch
)
train_op
=
optimizer
.
minimize
(
loss
)
sess
.
run
(
tf
.
global_variables_initializer
())
...
...
@@ -111,16 +110,19 @@ class CompositeOptimizerTest(test_util.TensorFlowTestCase):
sess
.
run
(
train_op
)
sess
.
run
(
tf
.
assign_add
(
step
,
1
))
slot_names
=
optimizer
.
get_slot_names
()
self
.
assertItemsEqual
(
slot_names
,
[
"m"
,
"v"
,
"momentum"
,
"adam_counter"
,
"momentum_counter"
])
adam_counter
=
sess
.
run
(
optimizer
.
get_slot
(
w
,
"adam_counter"
))
momentum_counter
=
sess
.
run
(
optimizer
.
get_slot
(
w
,
"momentum_counter"
))
adam_slots
=
[
"c1-m"
,
"c1-v"
,
"c1-adam_counter"
]
momentum_slots
=
[
"c2-momentum"
,
"c2-momentum_counter"
]
self
.
assertItemsEqual
(
slot_names
,
adam_slots
+
momentum_slots
)
adam_counter
=
sess
.
run
(
optimizer
.
get_slot
(
w
,
"c1-adam_counter"
))
momentum_counter
=
sess
.
run
(
optimizer
.
get_slot
(
w
,
"c2-momentum_counter"
))
self
.
assertEqual
(
adam_counter
,
min
(
iteration
+
1
,
100
))
self
.
assertEqual
(
momentum_counter
,
max
(
iteration
-
99
,
0
))
if
iteration
%
20
==
0
:
logging
.
info
(
"%d %s %d %d"
,
iteration
,
sess
.
run
([
switch
,
step
,
w
,
b
]),
adam_counter
,
momentum_counter
)
logging
.
info
(
"%d %s %d %d"
,
iteration
,
sess
.
run
([
switch
,
step
,
w
,
b
]),
adam_counter
,
momentum_counter
)
if
__name__
==
"__main__"
:
googletest
.
main
()
research/syntaxnet/dragnn/python/dragnn_model_saver.py
0 → 100644
View file @
4364390a
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converter for DRAGNN checkpoint+master-spec files to TF SavedModels.
This script loads a DRAGNN model from a checkpoint and master-spec and saves it
to a TF SavedModel checkpoint. The checkpoint and master-spec together must
form a complete model - see the conll_checkpoint_converter.py for an example
of how to convert CONLL checkpoints, since they are not complete.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
dragnn_model_saver_lib
as
saver_lib
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'master_spec'
,
None
,
'Path to task context with '
'inputs and parameters for feature extractors.'
)
flags
.
DEFINE_string
(
'params_path'
,
None
,
'Path to trained model parameters.'
)
flags
.
DEFINE_string
(
'export_path'
,
''
,
'Output path for exported servo model.'
)
flags
.
DEFINE_bool
(
'export_moving_averages'
,
False
,
'Whether to export the moving average parameters.'
)
def
export
(
master_spec_path
,
params_path
,
export_path
,
export_moving_averages
):
"""Restores a model and exports it in SavedModel form.
This method loads a graph specified by the spec at master_spec_path and the
params in params_path. It then saves the model in SavedModel format to the
location specified in export_path.
Args:
master_spec_path: Path to a proto-text master spec.
params_path: Path to the parameters file to export.
export_path: Path to export the SavedModel to.
export_moving_averages: Whether to export the moving average parameters.
"""
graph
=
tf
.
Graph
()
master_spec
=
spec_pb2
.
MasterSpec
()
with
tf
.
gfile
.
FastGFile
(
master_spec_path
)
as
fin
:
text_format
.
Parse
(
fin
.
read
(),
master_spec
)
# Remove '/' if it exists at the end of the export path, ensuring that
# path utils work correctly.
stripped_path
=
export_path
.
rstrip
(
'/'
)
saver_lib
.
clean_output_paths
(
stripped_path
)
short_to_original
=
saver_lib
.
shorten_resource_paths
(
master_spec
)
saver_lib
.
export_master_spec
(
master_spec
,
graph
)
saver_lib
.
export_to_graph
(
master_spec
,
params_path
,
stripped_path
,
graph
,
export_moving_averages
)
saver_lib
.
export_assets
(
master_spec
,
short_to_original
,
stripped_path
)
def
main
(
unused_argv
):
# Run the exporter.
export
(
FLAGS
.
master_spec
,
FLAGS
.
params_path
,
FLAGS
.
export_path
,
FLAGS
.
export_moving_averages
)
tf
.
logging
.
info
(
'Export complete.'
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/syntaxnet/dragnn/python/dragnn_model_saver_lib.py
0 → 100644
View file @
4364390a
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A program to export a DRAGNN model via SavedModel."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
tempfile
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
graph_builder
# The saved model tags to export. The same set of tags must be specified when
# loading the saved model.
_SAVED_MODEL_TAGS
=
[
tf
.
saved_model
.
tag_constants
.
SERVING
]
def
clean_output_paths
(
stripped_path
):
"""Ensures that the output path is cleaned and ready to receive a model."""
# If the export path's directory doesn't exist, create it.
export_directory
=
os
.
path
.
dirname
(
stripped_path
)
if
not
tf
.
gfile
.
Exists
(
export_directory
):
tf
.
logging
.
info
(
'%s does not exist; creating it.'
%
export_directory
)
tf
.
gfile
.
MakeDirs
(
export_directory
)
# Remove any existing model on this export path, since exporting will fail
# if the model directory already exists.
if
tf
.
gfile
.
Exists
(
stripped_path
):
tf
.
logging
.
info
(
'%s already exists; deleting it.'
%
stripped_path
)
tf
.
gfile
.
DeleteRecursively
(
stripped_path
)
def
shorten_resource_paths
(
master_spec
):
"""Shortens the resource file paths in a MasterSpec.
Replaces resource paths in the MasterSpec with shortened paths and builds a
mapping from the shortened path to the original path. Note that shortened
paths are relative to the 'assets.extra' directory of the SavedModel. Also
removes resources from FixedFeatureChannel, since they are not exported.
NB: The format of the shortened resource paths should be considered an
implementation detail and may change.
Args:
master_spec: MasterSpec proto to sanitize.
Returns:
Dict mapping from shortened resource path to original resource path.
"""
for
component_spec
in
master_spec
.
component
:
for
feature_spec
in
component_spec
.
fixed_feature
:
feature_spec
.
ClearField
(
'pretrained_embedding_matrix'
)
feature_spec
.
ClearField
(
'vocab'
)
shortened_to_original
=
{}
original_to_shortened
=
{}
for
component_index
,
component_spec
in
enumerate
(
master_spec
.
component
):
component_name
=
'component_{}_{}'
.
format
(
component_index
,
component_spec
.
name
)
for
resource_index
,
resource_spec
in
enumerate
(
component_spec
.
resource
):
resource_name
=
'resource_{}_{}'
.
format
(
resource_index
,
resource_spec
.
name
)
for
part_index
,
part
in
enumerate
(
resource_spec
.
part
):
part_name
=
'part_{}'
.
format
(
part_index
)
shortened_path
=
os
.
path
.
join
(
'resources'
,
component_name
,
resource_name
,
part_name
)
if
part
.
file_pattern
not
in
original_to_shortened
:
shortened_to_original
[
shortened_path
]
=
part
.
file_pattern
original_to_shortened
[
part
.
file_pattern
]
=
shortened_path
part
.
file_pattern
=
original_to_shortened
[
part
.
file_pattern
]
return
shortened_to_original
def
export_master_spec
(
master_spec
,
external_graph
):
"""Exports a MasterSpec.
Args:
master_spec: MasterSpec proto.
external_graph: tf.Graph that will be used to export the SavedModel.
"""
# Implementation note: We can't export the original MasterSpec file directly
# because it uses short paths. We also can't replace the original MasterSpec
# file with the new version, because the file may have other users.
# Write the new spec to a temp file and export it. The basename will be
# exported in the SavedModel, so use mkdtemp() with a fixed basename.
master_spec_path
=
os
.
path
.
join
(
tempfile
.
mkdtemp
(),
'master_spec'
)
with
tf
.
gfile
.
FastGFile
(
master_spec_path
,
'w'
)
as
fout
:
fout
.
write
(
text_format
.
MessageToString
(
master_spec
))
with
external_graph
.
as_default
():
asset_file_tensor
=
tf
.
constant
(
master_spec_path
,
name
=
'master_spec_filepath'
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
ASSET_FILEPATHS
,
asset_file_tensor
)
def
export_assets
(
master_spec
,
shortened_to_original
,
saved_model_path
):
"""Exports the assets in a master_spec into a SavedModel directory.
This method exports a master_spec and associated files into the SavedModel's
'assets.extra' directory (which is unmanaged). All resources are added to the
'assets.extra' directory using sanitized paths. The master spec itself is
located at the base of the assets.extra directory.
NB: Only exports resource files in MasterSpec.component.resource, not the
embedding init resources in FixedFeatureChannel.
Args:
master_spec: Proto master spec.
shortened_to_original: Mapping returned by shorten_resource_paths().
saved_model_path: Path to an already-created SavedModel directory.
"""
if
not
tf
.
gfile
.
Exists
(
saved_model_path
):
tf
.
logging
.
fatal
(
'Unable to export assets - directory %s does not exist!'
%
saved_model_path
)
asset_dir
=
os
.
path
.
join
(
saved_model_path
,
'assets.extra'
)
tf
.
logging
.
info
(
'Exporting assets to model at %s'
%
asset_dir
)
# First, write the MasterSpec that will be used to export the data.
tf
.
gfile
.
MakeDirs
(
asset_dir
)
with
tf
.
gfile
.
FastGFile
(
os
.
path
.
join
(
asset_dir
,
'master_spec'
),
'w'
)
as
out_file
:
out_file
.
write
(
text_format
.
MessageToString
(
master_spec
))
# Then, copy all the asset files.
for
component_spec
in
master_spec
.
component
:
for
resource_spec
in
component_spec
.
resource
:
tf
.
logging
.
info
(
'Copying assets for resource %s/%s.'
%
(
component_spec
.
name
,
resource_spec
.
name
))
for
part
in
resource_spec
.
part
:
original_file
=
shortened_to_original
[
part
.
file_pattern
]
new_file
=
os
.
path
.
join
(
asset_dir
,
part
.
file_pattern
)
tf
.
logging
.
info
(
'Asset %s was renamed to %s.'
%
(
original_file
,
new_file
))
if
tf
.
gfile
.
Exists
(
new_file
):
tf
.
logging
.
info
(
'%s already exists, skipping copy.'
%
(
new_file
))
else
:
new_dir
=
os
.
path
.
dirname
(
new_file
)
tf
.
gfile
.
MakeDirs
(
new_dir
)
tf
.
logging
.
info
(
'Copying %s to %s'
%
(
original_file
,
new_dir
))
tf
.
gfile
.
Copy
(
original_file
,
new_file
,
overwrite
=
True
)
tf
.
logging
.
info
(
'Asset export complete.'
)
def
export_to_graph
(
master_spec
,
params_path
,
export_path
,
external_graph
,
export_moving_averages
,
signature_name
=
'model'
):
"""Restores a model and exports it in SavedModel form.
This method loads a graph specified by the master_spec and the params in
params_path into the graph given in external_graph. It then saves the model
in SavedModel format to the location specified in export_path.
Args:
master_spec: Proto master spec.
params_path: Path to the parameters file to export.
export_path: Path to export the SavedModel to.
external_graph: A tf.Graph() object to build the graph inside.
export_moving_averages: Whether to export the moving average parameters.
signature_name: Name of the signature to insert.
"""
tf
.
logging
.
info
(
'Exporting graph with signature_name "%s" and use_moving_averages = %s'
%
(
signature_name
,
export_moving_averages
))
tf
.
logging
.
info
(
'Building the graph'
)
with
external_graph
.
as_default
(),
tf
.
device
(
'/device:CPU:0'
):
hyperparam_config
=
spec_pb2
.
GridPoint
()
hyperparam_config
.
use_moving_average
=
export_moving_averages
builder
=
graph_builder
.
MasterBuilder
(
master_spec
,
hyperparam_config
)
post_restore_hook
=
builder
.
build_post_restore_hook
()
annotation
=
builder
.
add_annotation
()
builder
.
add_saver
()
# Resets session.
session_config
=
tf
.
ConfigProto
(
log_device_placement
=
False
,
intra_op_parallelism_threads
=
10
,
inter_op_parallelism_threads
=
10
)
with
tf
.
Session
(
graph
=
external_graph
,
config
=
session_config
)
as
session
:
tf
.
logging
.
info
(
'Initializing variables...'
)
session
.
run
(
tf
.
global_variables_initializer
())
tf
.
logging
.
info
(
'Loading params...'
)
session
.
run
(
'save/restore_all'
,
{
'save/Const:0'
:
params_path
})
tf
.
logging
.
info
(
'Saving.'
)
with
tf
.
device
(
'/device:CPU:0'
):
saved_model_builder
=
tf
.
saved_model
.
builder
.
SavedModelBuilder
(
export_path
)
signature_map
=
{
signature_name
:
tf
.
saved_model
.
signature_def_utils
.
build_signature_def
(
inputs
=
{
'inputs'
:
tf
.
saved_model
.
utils
.
build_tensor_info
(
annotation
[
'input_batch'
])
},
outputs
=
{
'annotations'
:
tf
.
saved_model
.
utils
.
build_tensor_info
(
annotation
[
'annotations'
])
},
method_name
=
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
),
}
tf
.
logging
.
info
(
'Input is: %s'
,
annotation
[
'input_batch'
].
name
)
tf
.
logging
.
info
(
'Output is: %s'
,
annotation
[
'annotations'
].
name
)
saved_model_builder
.
add_meta_graph_and_variables
(
session
,
tags
=
_SAVED_MODEL_TAGS
,
legacy_init_op
=
tf
.
group
(
post_restore_hook
,
builder
.
build_warmup_graph
(
tf
.
get_collection
(
tf
.
GraphKeys
.
ASSET_FILEPATHS
)[
0
])),
signature_def_map
=
signature_map
,
assets_collection
=
tf
.
get_collection
(
tf
.
GraphKeys
.
ASSET_FILEPATHS
))
saved_model_builder
.
save
()
research/syntaxnet/dragnn/python/dragnn_model_saver_lib_test.py
0 → 100644
View file @
4364390a
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for dragnn.python.dragnn_model_saver_lib."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.platform
import
googletest
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
dragnn_model_saver_lib
FLAGS
=
tf
.
app
.
flags
.
FLAGS
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
class
DragnnModelSaverLibTest
(
test_util
.
TensorFlowTestCase
):
def
LoadSpec
(
self
,
spec_path
):
master_spec
=
spec_pb2
.
MasterSpec
()
root_dir
=
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
'dragnn/python'
)
with
file
(
os
.
path
.
join
(
root_dir
,
'testdata'
,
spec_path
),
'r'
)
as
fin
:
text_format
.
Parse
(
fin
.
read
().
replace
(
'TOPDIR'
,
root_dir
),
master_spec
)
return
master_spec
def
CreateLocalSpec
(
self
,
spec_path
):
master_spec
=
self
.
LoadSpec
(
spec_path
)
master_spec_name
=
os
.
path
.
basename
(
spec_path
)
outfile
=
os
.
path
.
join
(
FLAGS
.
test_tmpdir
,
master_spec_name
)
fout
=
open
(
outfile
,
'w'
)
fout
.
write
(
text_format
.
MessageToString
(
master_spec
))
return
outfile
def
ValidateAssetExistence
(
self
,
master_spec
,
export_path
):
asset_path
=
os
.
path
.
join
(
export_path
,
'assets.extra'
)
# The master spec should exist.
expected_path
=
os
.
path
.
join
(
asset_path
,
'master_spec'
)
tf
.
logging
.
info
(
'Validating existence of %s'
%
expected_path
)
self
.
assertTrue
(
os
.
path
.
isfile
(
expected_path
))
# For every part in every resource in every component, the resource should
# exist at [export_path]/assets.extra/[component file path]
path_list
=
[]
for
component_spec
in
master_spec
.
component
:
for
resource_spec
in
component_spec
.
resource
:
for
part
in
resource_spec
.
part
:
expected_path
=
os
.
path
.
join
(
asset_path
,
part
.
file_pattern
.
strip
(
os
.
path
.
sep
))
tf
.
logging
.
info
(
'Validating existence of %s'
%
expected_path
)
self
.
assertTrue
(
os
.
path
.
isfile
(
expected_path
))
path_list
.
append
(
expected_path
)
# Return a set of all unique paths.
return
set
(
path_list
)
def
testModelExport
(
self
):
# Get the master spec and params for this graph.
master_spec
=
self
.
LoadSpec
(
'ud-hungarian.master-spec'
)
params_path
=
os
.
path
.
join
(
FLAGS
.
test_srcdir
,
'dragnn/python/testdata'
'/ud-hungarian.params'
)
# Export the graph via SavedModel. (Here, we maintain a handle to the graph
# for comparison, but that's usually not necessary.)
export_path
=
os
.
path
.
join
(
FLAGS
.
test_tmpdir
,
'export'
)
saver_graph
=
tf
.
Graph
()
shortened_to_original
=
dragnn_model_saver_lib
.
shorten_resource_paths
(
master_spec
)
dragnn_model_saver_lib
.
export_master_spec
(
master_spec
,
saver_graph
)
dragnn_model_saver_lib
.
export_to_graph
(
master_spec
,
params_path
,
export_path
,
saver_graph
,
export_moving_averages
=
False
)
# Export the assets as well.
dragnn_model_saver_lib
.
export_assets
(
master_spec
,
shortened_to_original
,
export_path
)
# Validate that the assets are all in the exported directory.
path_set
=
self
.
ValidateAssetExistence
(
master_spec
,
export_path
)
# This master-spec has 4 unique assets. If there are more, we have not
# uniquified the assets properly.
self
.
assertEqual
(
len
(
path_set
),
4
)
# Restore the graph from the checkpoint into a new Graph object.
restored_graph
=
tf
.
Graph
()
restoration_config
=
tf
.
ConfigProto
(
log_device_placement
=
False
,
intra_op_parallelism_threads
=
10
,
inter_op_parallelism_threads
=
10
)
with
tf
.
Session
(
graph
=
restored_graph
,
config
=
restoration_config
)
as
sess
:
tf
.
saved_model
.
loader
.
load
(
sess
,
[
tf
.
saved_model
.
tag_constants
.
SERVING
],
export_path
)
if
__name__
==
'__main__'
:
googletest
.
main
()
research/syntaxnet/dragnn/python/dragnn_ops.py
View file @
4364390a
...
...
@@ -16,9 +16,9 @@
"""Groups the DRAGNN TensorFlow ops in one module."""
try
:
from
dragnn.core.ops.gen_dragnn_bulk_ops
import
*
from
dragnn.core.ops.gen_dragnn_ops
import
*
except
ImportError
as
e
:
raise
e
from
dragnn.core.ops.gen_dragnn_bulk_ops
import
*
from
dragnn.core.ops.gen_dragnn_ops
import
*
import
dragnn.python.load_dragnn_cc_impl
import
syntaxnet.load_parser_ops
research/syntaxnet/dragnn/python/graph_builder.py
View file @
4364390a
...
...
@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builds a DRAGNN graph for local training."""
import
collections
import
tensorflow
as
tf
from
tensorflow.core.protobuf
import
saver_pb2
from
tensorflow.python.platform
import
tf_logging
as
logging
...
...
@@ -32,6 +32,37 @@ except KeyError, e:
logging
.
info
(
str
(
e
))
def
_validate_grid_point
(
hyperparams
,
is_sub_optimizer
=
False
):
"""Validates that a grid point's configuration is reasonable.
Args:
hyperparams (spec_pb2.GridPoint): Grid point to validate.
is_sub_optimizer (bool): Whether this optimizer is a sub-optimizer of
a composite optimizer.
Raises:
ValueError: If the grid point is not valid.
"""
valid_methods
=
(
'gradient_descent'
,
'adam'
,
'lazyadam'
,
'momentum'
,
'composite'
)
if
hyperparams
.
learning_method
not
in
valid_methods
:
raise
ValueError
(
'Unknown learning method (optimizer)'
)
if
is_sub_optimizer
:
for
base_only_field
in
(
'decay_steps'
,
'decay_base'
,
'decay_staircase'
):
if
hyperparams
.
HasField
(
base_only_field
):
raise
ValueError
(
'Field {} is not valid for sub-optimizers of a '
'composite optimizer.'
.
format
(
base_only_field
))
if
hyperparams
.
learning_method
==
'composite'
:
spec
=
hyperparams
.
composite_optimizer_spec
if
spec
.
switch_after_steps
<
1
:
raise
ValueError
(
'switch_after_steps {} not valid for composite '
'optimizer!'
.
format
(
spec
.
switch_after_steps
))
for
sub_optimizer
in
(
spec
.
method1
,
spec
.
method2
):
_validate_grid_point
(
sub_optimizer
,
is_sub_optimizer
=
True
)
def
_create_learning_rate
(
hyperparams
,
step_var
):
"""Creates learning rate var, with decay and switching for CompositeOptimizer.
...
...
@@ -40,21 +71,31 @@ def _create_learning_rate(hyperparams, step_var):
learning_method to determine optimizer class to use.
step_var: tf.Variable, global training step.
Raises:
ValueError: If the composite optimizer is set, but not correctly configured.
Returns:
a scalar `Tensor`, the learning rate based on current step and hyperparams.
"""
if
hyperparams
.
learning_method
!=
'composite'
:
base_rate
=
hyperparams
.
learning_rate
adjusted_steps
=
step_var
else
:
spec
=
hyperparams
.
composite_optimizer_spec
switch
=
tf
.
less
(
step_var
,
spec
.
switch_after_steps
)
base_rate
=
tf
.
cond
(
switch
,
lambda
:
tf
.
constant
(
spec
.
method1
.
learning_rate
),
lambda
:
tf
.
constant
(
spec
.
method2
.
learning_rate
))
if
spec
.
reset_learning_rate
:
adjusted_steps
=
tf
.
cond
(
switch
,
lambda
:
step_var
,
lambda
:
step_var
-
spec
.
switch_after_steps
)
else
:
adjusted_steps
=
step_var
return
tf
.
train
.
exponential_decay
(
base_rate
,
step_var
,
hyperparams
.
decay_steps
,
hyperparams
.
decay_base
,
learning_rate
=
base_rate
,
global_step
=
adjusted_steps
,
decay_steps
=
hyperparams
.
decay_steps
,
decay_rate
=
hyperparams
.
decay_base
,
staircase
=
hyperparams
.
decay_staircase
)
...
...
@@ -158,6 +199,7 @@ class MasterBuilder(object):
self
.
spec
=
master_spec
self
.
hyperparams
=
(
spec_pb2
.
GridPoint
()
if
hyperparam_config
is
None
else
hyperparam_config
)
_validate_grid_point
(
self
.
hyperparams
)
self
.
pool_scope
=
pool_scope
# Set the graph-level random seed before creating the Components so the ops
...
...
@@ -260,6 +302,25 @@ class MasterBuilder(object):
all_nodes
[
'run'
]
=
run_op
return
all_nodes
def
build_warmup_graph
(
self
,
asset_dir
):
"""Builds a warmup graph.
This graph performs a MasterSpec asset location rewrite via
SetAssetDirectory, then grabs a ComputeSession and immediately returns it.
By grabbing a session, we cause the underlying transition systems to cache
their static data reads.
Args:
asset_dir: The base directory to append to all resources.
Returns:
A single op suitable for passing to the legacy_init_op of the ModelSaver.
"""
with
tf
.
control_dependencies
([
dragnn_ops
.
set_asset_directory
(
asset_dir
)]):
session
=
self
.
_get_compute_session
()
release_op
=
dragnn_ops
.
release_session
(
session
)
return
tf
.
group
(
release_op
,
name
=
'run'
)
def
build_training
(
self
,
handle
,
compute_gradients
=
True
,
...
...
@@ -408,6 +469,8 @@ class MasterBuilder(object):
# Restore that subsequent builds don't use average by default.
self
.
read_from_avg
=
False
cost
=
tf
.
check_numerics
(
cost
,
message
=
'Cost is not finite.'
)
# Returns named access to common outputs.
outputs
=
{
'cost'
:
cost
,
...
...
@@ -447,8 +510,14 @@ class MasterBuilder(object):
Returns:
setup_op - An op that, when run, guarantees all setup ops will run.
"""
with
tf
.
control_dependencies
(
[
comp
.
build_post_restore_hook
()
for
comp
in
self
.
components
]):
control_ops
=
[]
for
comp
in
self
.
components
:
hook
=
comp
.
build_post_restore_hook
()
if
isinstance
(
hook
,
collections
.
Iterable
):
control_ops
.
extend
(
hook
)
else
:
control_ops
.
append
(
hook
)
with
tf
.
control_dependencies
(
control_ops
):
return
tf
.
no_op
(
name
=
'post_restore_hook_master'
)
def
build_inference
(
self
,
handle
,
use_moving_average
=
False
):
...
...
@@ -597,10 +666,8 @@ class MasterBuilder(object):
def
add_saver
(
self
):
"""Adds a Saver for all variables in the graph."""
logging
.
info
(
'Saving
non-quantized
variables:
\n\t
%s'
,
'
\n\t
'
.
join
(
[
x
.
name
for
x
in
tf
.
global_variables
()
if
'quantized'
not
in
x
.
name
]))
logging
.
info
(
'Saving variables:
\n\t
%s'
,
'
\n\t
'
.
join
(
[
x
.
name
for
x
in
tf
.
global_variables
()]))
self
.
saver
=
tf
.
train
.
Saver
(
var_list
=
[
x
for
x
in
tf
.
global_variables
()
if
'quantized'
not
in
x
.
name
],
var_list
=
[
x
for
x
in
tf
.
global_variables
()],
write_version
=
saver_pb2
.
SaverDef
.
V1
)
research/syntaxnet/dragnn/python/graph_builder_test.py
View file @
4364390a
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for graph_builder."""
...
...
@@ -35,14 +34,8 @@ from tensorflow.python.framework import test_util
from
tensorflow.python.platform
import
googletest
from
tensorflow.python.platform
import
tf_logging
as
logging
import
dragnn.python.load_dragnn_cc_impl
import
syntaxnet.load_parser_ops
FLAGS
=
tf
.
app
.
flags
.
FLAGS
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
_DUMMY_GOLD_SENTENCE
=
"""
token {
...
...
@@ -157,6 +150,13 @@ token {
]
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
def
_as_op
(
x
):
"""Always returns the tf.Operation associated with a node."""
return
x
.
op
if
isinstance
(
x
,
tf
.
Tensor
)
else
x
...
...
@@ -264,7 +264,8 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
gold_doc_2
=
sentence_pb2
.
Sentence
()
text_format
.
Parse
(
_DUMMY_GOLD_SENTENCE_2
,
gold_doc_2
)
reader_strings
=
[
gold_doc
.
SerializeToString
(),
gold_doc_2
.
SerializeToString
()
gold_doc
.
SerializeToString
(),
gold_doc_2
.
SerializeToString
()
]
tf
.
logging
.
info
(
'Generating graph with config: %s'
,
hyperparam_config
)
with
tf
.
Graph
().
as_default
():
...
...
@@ -294,18 +295,35 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
self
.
RunTraining
(
self
.
MakeHyperparams
(
learning_method
=
'adam'
,
use_moving_average
=
True
))
def
testTrainingWithLazyAdamAndNoAveraging
(
self
):
"""Adds code coverage for lazy ADAM without the use of moving averaging."""
self
.
RunTraining
(
self
.
MakeHyperparams
(
learning_method
=
'lazyadam'
,
use_moving_average
=
False
))
def
testTrainingWithCompositeOptimizer
(
self
):
"""Adds code coverage for CompositeOptimizer."""
self
.
RunCompositeOptimizerTraining
(
False
)
def
testTrainingWithCompositeOptimizerResetLearningRate
(
self
):
"""Adds code coverage for CompositeOptimizer."""
self
.
RunCompositeOptimizerTraining
(
True
)
def
RunCompositeOptimizerTraining
(
self
,
reset_learning_rate
):
grid_point
=
self
.
MakeHyperparams
(
learning_method
=
'composite'
)
grid_point
.
composite_optimizer_spec
.
method1
.
learning_method
=
'adam'
grid_point
.
composite_optimizer_spec
.
method2
.
learning_method
=
'momentum'
grid_point
.
composite_optimizer_spec
.
method2
.
momentum
=
0.9
spec
=
grid_point
.
composite_optimizer_spec
spec
.
reset_learning_rate
=
reset_learning_rate
spec
.
switch_after_steps
=
1
spec
.
method1
.
learning_method
=
'adam'
spec
.
method2
.
learning_method
=
'momentum'
spec
.
method2
.
momentum
=
0.9
self
.
RunTraining
(
grid_point
)
def
RunFullTrainingAndInference
(
self
,
test_name
,
master_spec_path
=
None
,
master_spec
=
None
,
hyperparam_config
=
None
,
component_weights
=
None
,
unroll_using_oracle
=
None
,
num_evaluated_components
=
1
,
...
...
@@ -320,7 +338,8 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
gold_doc_2
=
sentence_pb2
.
Sentence
()
text_format
.
Parse
(
_DUMMY_GOLD_SENTENCE_2
,
gold_doc_2
)
gold_reader_strings
=
[
gold_doc
.
SerializeToString
(),
gold_doc_2
.
SerializeToString
()
gold_doc
.
SerializeToString
(),
gold_doc_2
.
SerializeToString
()
]
test_doc
=
sentence_pb2
.
Sentence
()
...
...
@@ -328,8 +347,10 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
test_doc_2
=
sentence_pb2
.
Sentence
()
text_format
.
Parse
(
_DUMMY_TEST_SENTENCE_2
,
test_doc_2
)
test_reader_strings
=
[
test_doc
.
SerializeToString
(),
test_doc
.
SerializeToString
(),
test_doc_2
.
SerializeToString
(),
test_doc
.
SerializeToString
()
test_doc
.
SerializeToString
(),
test_doc
.
SerializeToString
(),
test_doc_2
.
SerializeToString
(),
test_doc
.
SerializeToString
()
]
if
batch_size_limit
is
not
None
:
...
...
@@ -338,7 +359,8 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
with
tf
.
Graph
().
as_default
():
tf
.
set_random_seed
(
1
)
hyperparam_config
=
spec_pb2
.
GridPoint
()
if
not
hyperparam_config
:
hyperparam_config
=
spec_pb2
.
GridPoint
()
builder
=
graph_builder
.
MasterBuilder
(
master_spec
,
hyperparam_config
,
pool_scope
=
test_name
)
target
=
spec_pb2
.
TrainTarget
()
...
...
@@ -493,6 +515,22 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
expected_num_actions
=
12
,
expected
=
_TAGGER_PARSER_EXPECTED_SENTENCES
)
def
testTaggerParserNanDeath
(
self
):
hyperparam_config
=
spec_pb2
.
GridPoint
()
hyperparam_config
.
learning_rate
=
1.0
# The large learning rate should trigger check_numerics.
with
self
.
assertRaisesRegexp
(
tf
.
errors
.
InvalidArgumentError
,
'Cost is not finite'
):
self
.
RunFullTrainingAndInference
(
'tagger-parser'
,
'tagger_parser_master_spec.textproto'
,
hyperparam_config
=
hyperparam_config
,
component_weights
=
[
0.
,
1.
,
1.
],
unroll_using_oracle
=
[
False
,
True
,
True
],
expected_num_actions
=
12
,
expected
=
_TAGGER_PARSER_EXPECTED_SENTENCES
)
def
testTaggerParserWithAttention
(
self
):
spec
=
self
.
LoadSpec
(
'tagger_parser_master_spec.textproto'
)
...
...
@@ -621,6 +659,18 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
self
.
checkOpOrder
(
'annotations'
,
anno
[
'annotations'
],
[
'GetSession'
,
'ReleaseSession'
])
def
testWarmupGetsAndReleasesSession
(
self
):
"""Checks that create_warmup_graph creates Get and ReleaseSession."""
test_name
=
'warmup-graph-structure'
with
tf
.
Graph
().
as_default
():
# Build the actual graphs. The choice of spec is arbitrary, as long as
# training and annotation nodes can be constructed.
builder
,
_
=
self
.
getBuilderAndTarget
(
test_name
)
warmup
=
builder
.
build_warmup_graph
(
'foo'
)
self
.
checkOpOrder
(
'annotations'
,
warmup
,
[
'SetAssetDirectory'
,
'GetSession'
,
'ReleaseSession'
])
def
testAttachDataReader
(
self
):
"""Checks that train['run'] and 'annotations' call AttachDataReader."""
test_name
=
'attach-data-reader'
...
...
research/syntaxnet/dragnn/python/lexicon.py
View file @
4364390a
...
...
@@ -28,7 +28,8 @@ def create_lexicon_context(path):
context
=
task_spec_pb2
.
TaskSpec
()
for
name
in
[
'word-map'
,
'tag-map'
,
'tag-to-category'
,
'lcword-map'
,
'category-map'
,
'char-map'
,
'char-ngram-map'
,
'label-map'
,
'prefix-table'
,
'suffix-table'
'char-map'
,
'char-ngram-map'
,
'label-map'
,
'prefix-table'
,
'suffix-table'
,
'known-word-map'
]:
context
.
input
.
add
(
name
=
name
).
part
.
add
(
file_pattern
=
os
.
path
.
join
(
path
,
name
))
return
context
...
...
research/syntaxnet/dragnn/python/lexicon_test.py
View file @
4364390a
...
...
@@ -28,13 +28,7 @@ from dragnn.python import lexicon
from
syntaxnet
import
parser_trainer
from
syntaxnet
import
task_spec_pb2
import
syntaxnet.load_parser_ops
FLAGS
=
tf
.
app
.
flags
.
FLAGS
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
_EXPECTED_CONTEXT
=
r
"""
...
...
@@ -48,9 +42,17 @@ input { name: "char-ngram-map" Part { file_pattern: "/tmp/char-ngram-map" } }
input { name: "label-map" Part { file_pattern: "/tmp/label-map" } }
input { name: "prefix-table" Part { file_pattern: "/tmp/prefix-table" } }
input { name: "suffix-table" Part { file_pattern: "/tmp/suffix-table" } }
input { name: "known-word-map" Part { file_pattern: "/tmp/known-word-map" } }
"""
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
class
LexiconTest
(
tf
.
test
.
TestCase
):
def
testCreateLexiconContext
(
self
):
...
...
research/syntaxnet/dragnn/python/network_units.py
View file @
4364390a
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Basic network units used in assembling DRAGNN graphs."""
from
__future__
import
absolute_import
...
...
@@ -21,6 +20,8 @@ from __future__ import print_function
import
abc
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.ops
import
nn
from
tensorflow.python.ops
import
tensor_array_ops
as
ta
...
...
@@ -141,17 +142,22 @@ def add_embeddings(channel_id, feature_spec, seed=None):
embeddings
=
syntaxnet_ops
.
word_embedding_initializer
(
vectors
=
feature_spec
.
pretrained_embedding_matrix
.
part
[
0
].
file_pattern
,
vocabulary
=
feature_spec
.
vocab
.
part
[
0
].
file_pattern
,
num_special_embeddings
=
1
,
embedding_init
=
1.0
,
seed
=
seed1
,
seed2
=
seed2
)
return
tf
.
get_variable
(
name
,
initializer
=
tf
.
reshape
(
embeddings
,
shape
))
return
tf
.
get_variable
(
name
,
initializer
=
tf
.
reshape
(
embeddings
,
shape
),
trainable
=
not
feature_spec
.
is_constant
)
else
:
return
tf
.
get_variable
(
name
,
shape
,
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1.0
/
feature_spec
.
embedding_dim
**
.
5
,
seed
=
seed
))
stddev
=
1.0
/
feature_spec
.
embedding_dim
**
.
5
,
seed
=
seed
),
trainable
=
not
feature_spec
.
is_constant
)
def
embedding_lookup
(
embedding_matrix
,
indices
,
ids
,
weights
,
size
):
...
...
@@ -183,7 +189,7 @@ def fixed_feature_lookup(component, state, channel_id, stride):
Args:
component: Component object in which to look up the fixed features.
state: MasterState object for the live
nlp_saft::dragnn::MasterState
.
state: MasterState object for the live
ComputeSession
.
channel_id: int id of the fixed feature to look up.
stride: int Tensor of current batch * beam size.
...
...
@@ -228,6 +234,100 @@ def get_input_tensor(fixed_embeddings, linked_embeddings):
return
tf
.
concat
([
e
.
tensor
for
e
in
embeddings
],
1
)
def
add_var_initialized
(
name
,
shape
,
init_type
,
divisor
=
1.0
,
stddev
=
1e-4
):
"""Creates a tf.Variable with the given shape and initialization.
Args:
name: variable name
shape: variable shape
init_type: type of initialization (random, xavier, identity, varscale)
divisor: numerator for identity initialization where in_dim != out_dim,
should divide both in_dim and out_dim
stddev: standard deviation for random normal initialization
Returns:
tf.Variable object with the given shape and initialization
Raises:
ValueError: if identity initialization is specified for a tensor of rank < 4
NotImplementedError: if an unimplemented type of initialization is specified
"""
if
init_type
==
'random'
:
# Random normal initialization
return
tf
.
get_variable
(
name
,
shape
=
shape
,
initializer
=
tf
.
random_normal_initializer
(
stddev
=
stddev
),
dtype
=
tf
.
float32
)
if
init_type
==
'xavier'
:
# Xavier normal initialization (Glorot and Bengio, 2010):
# http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf
return
tf
.
get_variable
(
name
,
shape
=
shape
,
initializer
=
tf
.
contrib
.
layers
.
xavier_initializer
(),
dtype
=
tf
.
float32
)
if
init_type
==
'varscale'
:
# Variance scaling initialization (He at al. 2015):
# https://arxiv.org/abs/1502.01852
return
tf
.
get_variable
(
name
,
shape
=
shape
,
initializer
=
tf
.
contrib
.
layers
.
variance_scaling_initializer
(),
dtype
=
tf
.
float32
)
if
init_type
==
'identity'
:
# "Identity initialization" described in Yu and Koltun (2015):
# https://arxiv.org/abs/1511.07122v3 eqns. (4) and (5)
rank
=
len
(
shape
)
square
=
shape
[
-
1
]
==
shape
[
-
2
]
if
rank
<
2
:
raise
ValueError
(
'Identity initialization requires a tensor with rank >= 2. The given '
'shape has rank '
+
str
(
rank
))
if
shape
[
-
1
]
%
divisor
!=
0
or
shape
[
-
2
]
%
divisor
!=
0
:
raise
ValueError
(
'Divisor must divide both shape[-1]='
+
str
(
shape
[
-
1
])
+
' and shape[-2]='
+
str
(
shape
[
-
2
])
+
'. Divisor is: '
+
str
(
divisor
))
# If the desired shape is > 2 dimensions, we only want to set the values
# in the middle along the last two dims.
middle_indices
=
[
int
(
s
/
2
)
for
s
in
shape
]
middle_indices
=
middle_indices
[:
-
2
]
base_array
=
NotImplemented
if
square
:
if
rank
==
2
:
base_array
=
np
.
eye
(
shape
[
-
1
])
else
:
base_array
=
np
.
zeros
(
shape
,
dtype
=
np
.
float32
)
base_array
[[[
i
]
for
i
in
middle_indices
]]
=
np
.
eye
(
shape
[
-
1
])
else
:
# NOTE(strubell): We use NumPy's RNG here and not TensorFlow's because
# constructing this matrix with tf ops is tedious and harder to read.
base_array
=
np
.
random
.
normal
(
size
=
shape
,
loc
=
0
,
scale
=
stddev
).
astype
(
np
.
float32
)
m
=
divisor
/
shape
[
-
1
]
identity
=
np
.
eye
(
int
(
divisor
))
x_stretch
=
int
(
shape
[
-
1
]
/
divisor
)
y_stretch
=
int
(
shape
[
-
2
]
/
divisor
)
x_stretched_ident
=
np
.
repeat
(
identity
,
x_stretch
,
1
)
xy_stretched_ident
=
np
.
repeat
(
x_stretched_ident
,
y_stretch
,
0
)
indices
=
np
.
where
(
xy_stretched_ident
==
1.0
)
if
rank
==
2
:
base_array
[
indices
[
0
],
indices
[
1
]]
=
m
else
:
arr
=
base_array
[[[
i
]
for
i
in
middle_indices
]][
0
]
arr
[
indices
[
0
],
indices
[
1
]]
=
m
base_array
[[[
i
]
for
i
in
middle_indices
]]
=
arr
return
tf
.
get_variable
(
name
,
initializer
=
base_array
)
raise
NotImplementedError
(
'Initialization type '
+
init_type
+
' is not implemented.'
)
def
get_input_tensor_with_stride
(
fixed_embeddings
,
linked_embeddings
,
stride
):
"""Constructs an input tensor with a separate dimension for steps.
...
...
@@ -304,8 +404,8 @@ def lookup_named_tensor(name, named_tensors):
for
named_tensor
in
named_tensors
:
if
named_tensor
.
name
==
name
:
return
named_tensor
raise
KeyError
(
'Name "%s" not found in named tensors: %s'
%
(
name
,
named_tensors
))
raise
KeyError
(
'Name "%s" not found in named tensors: %s'
%
(
name
,
named_tensors
))
def
activation_lookup_recurrent
(
component
,
state
,
channel_id
,
source_array
,
...
...
@@ -317,7 +417,7 @@ def activation_lookup_recurrent(component, state, channel_id, source_array,
Args:
component: Component object in which to look up the fixed features.
state: MasterState object for the live
nlp_saft::dragnn::MasterState
.
state: MasterState object for the live
ComputeSession
.
channel_id: int id of the fixed feature to look up.
source_array: TensorArray from which to fetch feature vectors, expected to
have size [steps + 1] elements of shape [stride, D] each.
...
...
@@ -381,7 +481,7 @@ def activation_lookup_other(component, state, channel_id, source_tensor,
Args:
component: Component object in which to look up the fixed features.
state: MasterState object for the live
nlp_saft::dragnn::MasterState
.
state: MasterState object for the live
ComputeSession
.
channel_id: int id of the fixed feature to look up.
source_tensor: Tensor from which to fetch feature vectors. Expected to have
have shape [steps + 1, stride, D].
...
...
@@ -494,8 +594,8 @@ class LayerNorm(object):
# Compute layer normalization using the batch_normalization function.
variance_epsilon
=
1E-12
outputs
=
nn
.
batch_normalization
(
inputs
,
mean
,
variance
,
beta
,
gamma
,
variance_epsilon
)
outputs
=
nn
.
batch_normalization
(
inputs
,
mean
,
variance
,
beta
,
gamma
,
variance_epsilon
)
outputs
.
set_shape
(
inputs_shape
)
return
outputs
...
...
@@ -529,12 +629,13 @@ class Layer(object):
TensorArray object
"""
check
.
Gt
(
self
.
dim
,
0
,
'Cannot create array when dimension is dynamic'
)
tensor_array
=
ta
.
TensorArray
(
dtype
=
tf
.
float32
,
size
=
0
,
dynamic_size
=
True
,
clear_after_read
=
False
,
infer_shape
=
False
,
name
=
'%s_array'
%
self
.
name
)
tensor_array
=
ta
.
TensorArray
(
dtype
=
tf
.
float32
,
size
=
0
,
dynamic_size
=
True
,
clear_after_read
=
False
,
infer_shape
=
False
,
name
=
'%s_array'
%
self
.
name
)
# Start each array with all zeros. Special values will still be learned via
# the extra embedding dimension stored for each linked feature channel.
...
...
@@ -588,9 +689,6 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
shape of |inputs|, containing the masked or original inputs, depending on
whether dropout was actually performed.
"""
check
.
Ge
(
inputs
.
get_shape
().
ndims
,
2
,
'inputs must be rank 2 or 3'
)
check
.
Le
(
inputs
.
get_shape
().
ndims
,
3
,
'inputs must be rank 2 or 3'
)
flat
=
(
inputs
.
get_shape
().
ndims
==
2
)
if
keep_prob
>=
1.0
:
return
inputs
...
...
@@ -598,6 +696,11 @@ def maybe_apply_dropout(inputs, keep_prob, per_sequence, stride=None):
if
not
per_sequence
:
return
tf
.
nn
.
dropout
(
inputs
,
keep_prob
)
# We only check the dims if we are applying per-sequence dropout
check
.
Ge
(
inputs
.
get_shape
().
ndims
,
2
,
'inputs must be rank 2 or 3'
)
check
.
Le
(
inputs
.
get_shape
().
ndims
,
3
,
'inputs must be rank 2 or 3'
)
flat
=
(
inputs
.
get_shape
().
ndims
==
2
)
check
.
NotNone
(
stride
,
'per-sequence dropout requires stride'
)
dim
=
inputs
.
get_shape
().
as_list
()[
-
1
]
check
.
NotNone
(
dim
,
'inputs must have static activation dimension, but have '
...
...
@@ -629,7 +732,7 @@ class NetworkUnitInterface(object):
layers (list): List of Layer objects to track network layers that should
be written to Tensors during training and inference.
"""
__metaclass__
=
abc
.
ABCMeta
# required for @abstractmethod
__metaclass__
=
abc
.
ABCMeta
# required for @
abc.
abstractmethod
def
__init__
(
self
,
component
,
init_layers
=
None
,
init_context_layers
=
None
):
"""Initializes parameters for embedding matrices.
...
...
@@ -692,8 +795,8 @@ class NetworkUnitInterface(object):
# Compute the cumulative dimension of all inputs. If any input has dynamic
# dimension, then the result is -1.
input_dims
=
(
self
.
_fixed_feature_dims
.
values
()
+
self
.
_linked_feature_dims
.
values
())
input_dims
=
(
self
.
_fixed_feature_dims
.
values
()
+
self
.
_linked_feature_dims
.
values
())
if
any
(
x
<
0
for
x
in
input_dims
):
self
.
_concatenated_input_dim
=
-
1
else
:
...
...
@@ -844,8 +947,7 @@ class NetworkUnitInterface(object):
tf
.
reduce_sum
(
tf
.
multiply
(
h_tensor
,
tf
.
reshape
(
p_vec
,
[
-
1
,
1
]),
name
=
'time_together2'
),
0
),
0
)
0
),
0
)
return
tf
.
matmul
(
r_vec
,
self
.
_component
.
get_variable
(
'attention_weights_pu'
),
...
...
@@ -908,6 +1010,7 @@ class FeedForwardNetwork(NetworkUnitInterface):
Parameters used to construct the network:
hidden_layer_sizes: comma-separated list of ints, indicating the
number of hidden units in each hidden layer.
omit_logits (False): Whether to elide the logits layer.
layer_norm_input (False): Whether or not to apply layer normalization
on the concatenated input to the network.
layer_norm_hidden (False): Whether or not to apply layer normalization
...
...
@@ -928,21 +1031,24 @@ class FeedForwardNetwork(NetworkUnitInterface):
when the |dropout_keep_prob| parameter is negative.
"""
self
.
_attrs
=
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
'hidden_layer_sizes'
:
''
,
'omit_logits'
:
False
,
'layer_norm_input'
:
False
,
'layer_norm_hidden'
:
False
,
'nonlinearity'
:
'relu'
,
'dropout_keep_prob'
:
-
1.0
,
'dropout_per_sequence'
:
False
,
'dropout_all_layers'
:
False
})
'dropout_all_layers'
:
False
})
# Initialize the hidden layer sizes before running the base initializer, as
# the base initializer may need to know the size of
of
the hidden layer for
# the base initializer may need to know the size of the hidden layer for
# recurrent connections.
self
.
_hidden_layer_sizes
=
(
map
(
int
,
self
.
_attrs
[
'hidden_layer_sizes'
].
split
(
','
))
if
self
.
_attrs
[
'hidden_layer_sizes'
]
else
[])
self
.
_hidden_layer_sizes
=
(
map
(
int
,
self
.
_attrs
[
'hidden_layer_sizes'
].
split
(
','
))
if
self
.
_attrs
[
'hidden_layer_sizes'
]
else
[])
super
(
FeedForwardNetwork
,
self
).
__init__
(
component
)
# Infer dropout rate from network parameters and grid hyperparameters.
...
...
@@ -960,9 +1066,8 @@ class FeedForwardNetwork(NetworkUnitInterface):
self
.
_params
.
extend
(
self
.
_layer_norm_input
.
params
)
if
self
.
_attrs
[
'layer_norm_hidden'
]:
self
.
_layer_norm_hidden
=
LayerNorm
(
self
.
_component
,
'layer_0'
,
self
.
_hidden_layer_sizes
[
0
],
tf
.
float32
)
self
.
_layer_norm_hidden
=
LayerNorm
(
self
.
_component
,
'layer_0'
,
self
.
_hidden_layer_sizes
[
0
],
tf
.
float32
)
self
.
_params
.
extend
(
self
.
_layer_norm_hidden
.
params
)
# Extract nonlinearity from |tf.nn|.
...
...
@@ -984,13 +1089,11 @@ class FeedForwardNetwork(NetworkUnitInterface):
self
.
_params
.
append
(
tf
.
get_variable
(
'bias_%d'
%
index
,
[
hidden_layer_size
],
initializer
=
tf
.
constant_initializer
(
0.2
,
dtype
=
tf
.
float32
)))
initializer
=
tf
.
constant_initializer
(
0.2
,
dtype
=
tf
.
float32
)))
self
.
_weights
.
append
(
weights
)
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'layer_%d'
%
index
,
dim
=
hidden_layer_size
))
Layer
(
component
,
name
=
'layer_%d'
%
index
,
dim
=
hidden_layer_size
))
last_layer_dim
=
hidden_layer_size
# Add a convenience alias for the last hidden layer, if any.
...
...
@@ -1000,7 +1103,7 @@ class FeedForwardNetwork(NetworkUnitInterface):
# By default, regularize only the weights.
self
.
_regularized_weights
.
extend
(
self
.
_weights
)
if
component
.
num_actions
:
if
component
.
num_actions
and
not
self
.
_attrs
[
'omit_logits'
]
:
self
.
_params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
last_layer_dim
,
component
.
num_actions
],
...
...
@@ -1010,8 +1113,7 @@ class FeedForwardNetwork(NetworkUnitInterface):
'bias_softmax'
,
[
component
.
num_actions
],
initializer
=
tf
.
zeros_initializer
()))
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -1078,10 +1180,8 @@ class FeedForwardNetwork(NetworkUnitInterface):
return
self
.
_hidden_layer_sizes
[
-
1
]
if
not
layer_name
.
startswith
(
'layer_'
):
logging
.
fatal
(
'Invalid layer name: "%s" Can only retrieve from "logits", '
'"last_layer", and "layer_*".'
,
layer_name
)
logging
.
fatal
(
'Invalid layer name: "%s" Can only retrieve from "logits", '
'"last_layer", and "layer_*".'
,
layer_name
)
# NOTE(danielandor): Since get_layer_size is called before the
# model has been built, we compute the layer size directly from
...
...
@@ -1157,7 +1257,8 @@ class LSTMNetwork(NetworkUnitInterface):
self
.
_params
.
extend
([
self
.
_x2i
,
self
.
_h2i
,
self
.
_c2i
,
self
.
_bi
,
self
.
_x2o
,
self
.
_h2o
,
self
.
_c2o
,
self
.
_bo
,
self
.
_x2c
,
self
.
_h2c
,
self
.
_bc
])
self
.
_c2o
,
self
.
_bo
,
self
.
_x2c
,
self
.
_h2c
,
self
.
_bc
])
lstm_h_layer
=
Layer
(
component
,
name
=
'lstm_h'
,
dim
=
self
.
_hidden_layer_sizes
)
lstm_c_layer
=
Layer
(
component
,
name
=
'lstm_c'
,
dim
=
self
.
_hidden_layer_sizes
)
...
...
@@ -1168,20 +1269,20 @@ class LSTMNetwork(NetworkUnitInterface):
self
.
_layers
.
extend
(
self
.
_context_layers
)
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'layer_0'
,
dim
=
self
.
_hidden_layer_sizes
))
Layer
(
component
,
name
=
'layer_0'
,
dim
=
self
.
_hidden_layer_sizes
))
self
.
params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
self
.
_hidden_layer_sizes
,
component
.
num_actions
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
)))
self
.
params
.
append
(
tf
.
get_variable
(
'weights_softmax'
,
[
self
.
_hidden_layer_sizes
,
component
.
num_actions
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
)))
self
.
params
.
append
(
tf
.
get_variable
(
'bias_softmax'
,
[
component
.
num_actions
],
initializer
=
tf
.
zeros_initializer
()))
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
Layer
(
component
,
name
=
'logits'
,
dim
=
component
.
num_actions
))
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -1215,6 +1316,13 @@ class LSTMNetwork(NetworkUnitInterface):
i_h_tm1
=
context_tensor_arrays
[
0
].
read
(
length
-
1
)
i_c_tm1
=
context_tensor_arrays
[
1
].
read
(
length
-
1
)
# label c and h inputs
i_c_tm1
=
tf
.
identity
(
i_c_tm1
,
name
=
'lstm_c_in'
)
i_h_tm1
=
tf
.
identity
(
i_h_tm1
,
name
=
'lstm_h_in'
)
# label the feature input (for debugging purposes)
input_tensor
=
tf
.
identity
(
input_tensor
,
name
=
'input_tensor'
)
# apply dropout according to http://arxiv.org/pdf/1409.2329v5.pdf
if
during_training
and
self
.
_input_dropout_rate
<
1
:
input_tensor
=
tf
.
nn
.
dropout
(
input_tensor
,
self
.
_input_dropout_rate
)
...
...
@@ -1251,7 +1359,8 @@ class LSTMNetwork(NetworkUnitInterface):
h
=
tf
.
identity
(
ht
,
name
=
'layer_0'
)
logits
=
tf
.
nn
.
xw_plus_b
(
ht
,
tf
.
get_variable
(
'weights_softmax'
),
logits
=
tf
.
nn
.
xw_plus_b
(
ht
,
tf
.
get_variable
(
'weights_softmax'
),
tf
.
get_variable
(
'bias_softmax'
))
if
self
.
_component
.
spec
.
attention_component
:
...
...
@@ -1284,7 +1393,7 @@ class ConvNetwork(NetworkUnitInterface):
widths: comma separated list of ints, number of steps input to the
convolutional kernel at every layer.
depths: comma separated list of ints, number of channels input to the
convolutional kernel at every layer.
convolutional kernel at every layer
except the first
.
output_embedding_dim: int, number of output channels for the convolutional
kernel of the last layer, which receives no ReLU activation and
therefore can be used in a softmax output. If zero, this final
...
...
@@ -1298,6 +1407,13 @@ class ConvNetwork(NetworkUnitInterface):
sequence, instead of once per step. See Gal and Ghahramani
(https://arxiv.org/abs/1512.05287).
Raises:
RuntimeError: if the number of widths is not equal to the number of
depths - 1.
The input depth of the first layer is inferred from the total concatenated
size of the input features.
Hyperparameters used:
dropout_rate: The probability that an input is not dropped. Only used
when the |dropout_keep_prob| parameter is negative.
...
...
@@ -1305,21 +1421,34 @@ class ConvNetwork(NetworkUnitInterface):
super
(
ConvNetwork
,
self
).
__init__
(
component
)
self
.
_attrs
=
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
'widths'
:
''
,
'depths'
:
''
,
'output_embedding_dim'
:
0
,
'nonlinearity'
:
'relu'
,
'dropout_keep_prob'
:
-
1.0
,
'dropout_per_sequence'
:
False
})
'dropout_per_sequence'
:
False
})
self
.
_weights
=
[]
self
.
_biases
=
[]
self
.
_widths
=
map
(
int
,
self
.
_attrs
[
'widths'
].
split
(
','
))
self
.
_depths
=
map
(
int
,
self
.
_attrs
[
'depths'
].
split
(
','
))
self
.
_depths
=
[
self
.
_concatenated_input_dim
]
# Since we infer the input dimension, depths could be empty
if
self
.
_attrs
[
'depths'
]:
self
.
_depths
.
extend
(
map
(
int
,
self
.
_attrs
[
'depths'
].
split
(
','
)))
self
.
_output_dim
=
self
.
_attrs
[
'output_embedding_dim'
]
if
self
.
_output_dim
:
self
.
_depths
.
append
(
self
.
_output_dim
)
if
len
(
self
.
_widths
)
!=
len
(
self
.
_depths
)
-
1
:
raise
RuntimeError
(
'Unmatched widths/depths: %d/%d (depths should equal widths + 1)'
%
(
len
(
self
.
_widths
),
len
(
self
.
_depths
)))
self
.
kernel_shapes
=
[]
for
i
in
range
(
len
(
self
.
_depths
)
-
1
):
self
.
kernel_shapes
.
append
(
...
...
@@ -1350,10 +1479,9 @@ class ConvNetwork(NetworkUnitInterface):
self
.
_params
.
extend
(
self
.
_weights
+
self
.
_biases
)
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'conv_output'
,
dim
=
self
.
_depths
[
-
1
]))
self
.
_regularized_weights
.
extend
(
self
.
_weights
[:
-
1
]
if
self
.
_output_dim
else
self
.
_weights
)
Layer
(
component
,
name
=
'conv_output'
,
dim
=
self
.
_depths
[
-
1
]))
self
.
_regularized_weights
.
extend
(
self
.
_weights
[:
-
1
]
if
self
.
_output_dim
else
self
.
_weights
)
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -1365,7 +1493,7 @@ class ConvNetwork(NetworkUnitInterface):
"""Requires |stride|; otherwise see base class."""
if
stride
is
None
:
raise
RuntimeError
(
"ConvNetwork needs 'stride' and must be called in the "
"
bulk feature extractor component.
"
)
'
bulk feature extractor component.
'
)
input_tensor
=
get_input_tensor_with_stride
(
fixed_embeddings
,
linked_embeddings
,
stride
)
...
...
@@ -1388,8 +1516,253 @@ class ConvNetwork(NetworkUnitInterface):
if
i
<
(
len
(
self
.
_weights
)
-
1
)
or
not
self
.
_output_dim
:
conv
=
self
.
_nonlinearity
(
conv
,
name
=
scope
.
name
)
return
[
tf
.
reshape
(
conv
,
[
-
1
,
self
.
_depths
[
-
1
]],
name
=
'reshape_activations'
)
]
def
_maybe_apply_dropout
(
self
,
inputs
,
stride
):
# The |inputs| are rank 4 (one 1xN "image" per sequence). Squeeze out and
# restore the singleton image height, so dropout is applied to the normal
# rank 3 batched input tensor.
inputs
=
tf
.
squeeze
(
inputs
,
[
1
])
inputs
=
maybe_apply_dropout
(
inputs
,
self
.
_dropout_rate
,
self
.
_attrs
[
'dropout_per_sequence'
],
stride
)
inputs
=
tf
.
expand_dims
(
inputs
,
1
)
return
inputs
class
ConvMultiNetwork
(
NetworkUnitInterface
):
"""Implementation of a convolutional feed forward net with a side tower."""
def
__init__
(
self
,
component
):
"""Initializes kernels and biases for this convolutional net.
Args:
component: parent ComponentBuilderBase object.
Parameters used to construct the network:
widths: comma separated list of ints, number of steps input to the
convolutional kernel at every layer.
depths: comma separated list of ints, number of channels input to the
convolutional kernel at every layer except the first.
output_embedding_dim: int, number of output channels for the convolutional
kernel of the last layer, which receives no ReLU activation and
therefore can be used in a softmax output. If zero, this final
layer is disabled entirely.
side_tower_index: An int representing the layer of the tower that the
side tower will start from. 0 is the input data and 'num_layers'
is the output.
side_tower_widths: comma separated list of ints, number of steps input to
the convolutional kernel at every layer of the side tower.
side_tower_depths: comma separated list of ints, number of channels input
to the convolutional kernel at every layer of the side tower save
the first.
side_tower_output_embedding_dim: int, number of output channels for the
kernel of the last layer, which receives no ReLU activation and
therefore can be used in a softmax output. If zero, this final
layer is disabled entirely.
nonlinearity ('relu'): Name of function from module "tf.nn" to apply to
each hidden layer; e.g., "relu" or "elu".
dropout_keep_prob (-1.0): The probability that an input is not dropped.
If >= 1.0, disables dropout. If < 0.0, uses the global |dropout_rate|
hyperparameter.
dropout_per_sequence (False): If true, sample the dropout mask once per
sequence, instead of once per step. See Gal and Ghahramani
(https://arxiv.org/abs/1512.05287).
Raises:
RuntimeError: if the number of widths is not equal to the number of
depths - 1.
The input depth of the first layer is inferred from the total concatenated
size of the input features.
Hyperparameters used:
dropout_rate: The probability that an input is not dropped. Only used
when the |dropout_keep_prob| parameter is negative.
"""
super
(
ConvMultiNetwork
,
self
).
__init__
(
component
)
self
.
_attrs
=
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
defaults
=
{
'widths'
:
''
,
'depths'
:
''
,
'output_embedding_dim'
:
0
,
'side_tower_index'
:
0
,
'side_tower_widths'
:
''
,
'side_tower_depths'
:
''
,
'side_tower_output_embedding_dim'
:
0
,
'nonlinearity'
:
'relu'
,
'dropout_keep_prob'
:
-
1.0
,
'dropout_per_sequence'
:
False
})
# Examine the widths and depths for the primary tower.
self
.
_weights
=
[]
self
.
_biases
=
[]
self
.
_widths
=
map
(
int
,
self
.
_attrs
[
'widths'
].
split
(
','
))
self
.
_depths
=
[
self
.
_concatenated_input_dim
]
# Since we infer the input dimension, depths could be empty.
if
self
.
_attrs
[
'depths'
]:
self
.
_depths
.
extend
(
map
(
int
,
self
.
_attrs
[
'depths'
].
split
(
','
)))
self
.
_output_dim
=
self
.
_attrs
[
'output_embedding_dim'
]
if
self
.
_output_dim
:
self
.
_depths
.
append
(
self
.
_output_dim
)
if
len
(
self
.
_widths
)
!=
len
(
self
.
_depths
)
-
1
:
raise
RuntimeError
(
'Unmatched widths/depths: %d/%d (depths should equal widths + 1)'
%
(
len
(
self
.
_widths
),
len
(
self
.
_depths
)))
# Create the kernels for the primary tower.
self
.
kernel_shapes
=
[]
for
i
in
range
(
len
(
self
.
_depths
)
-
1
):
self
.
kernel_shapes
.
append
(
[
1
,
self
.
_widths
[
i
],
self
.
_depths
[
i
],
self
.
_depths
[
i
+
1
]])
for
i
in
range
(
len
(
self
.
_depths
)
-
1
):
with
tf
.
variable_scope
(
'conv%d'
%
i
):
self
.
_weights
.
append
(
tf
.
get_variable
(
'weights'
,
self
.
kernel_shapes
[
i
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
),
dtype
=
tf
.
float32
))
bias_init
=
0.0
if
(
i
==
len
(
self
.
_widths
)
-
1
)
else
0.2
self
.
_biases
.
append
(
tf
.
get_variable
(
'biases'
,
self
.
kernel_shapes
[
i
][
-
1
],
initializer
=
tf
.
constant_initializer
(
bias_init
),
dtype
=
tf
.
float32
))
# Examine the widths and depths for the side tower.
self
.
_side_index
=
self
.
_attrs
[
'side_tower_index'
]
self
.
_side_weights
=
[]
self
.
_side_biases
=
[]
self
.
_side_widths
=
map
(
int
,
self
.
_attrs
[
'side_tower_widths'
].
split
(
','
))
self
.
_side_depths
=
[
self
.
_depths
[
self
.
_side_index
]]
# Since we infer the input dimension, depths could be empty.
if
self
.
_attrs
[
'side_tower_depths'
]:
self
.
_side_depths
.
extend
(
map
(
int
,
self
.
_attrs
[
'side_tower_depths'
].
split
(
','
)))
self
.
_side_output_dim
=
self
.
_attrs
[
'side_tower_output_embedding_dim'
]
if
self
.
_side_output_dim
:
self
.
_depths
.
append
(
self
.
_side_output_dim
)
if
len
(
self
.
_side_widths
)
!=
len
(
self
.
_side_depths
)
-
1
:
raise
RuntimeError
(
'Unmatched widths/depths: %d/%d (depths should equal widths + 1)'
%
(
len
(
self
.
_side_widths
),
len
(
self
.
_side_depths
)))
# Create the kernels for the side tower, if there is more than one layer.
self
.
side_kernel_shapes
=
[]
for
i
in
range
(
len
(
self
.
_side_depths
)
-
1
):
self
.
side_kernel_shapes
.
append
([
1
,
self
.
_side_widths
[
i
],
self
.
_side_depths
[
i
],
self
.
_side_depths
[
i
+
1
]
])
for
i
in
range
(
len
(
self
.
_side_depths
)
-
1
):
with
tf
.
variable_scope
(
'side_conv%d'
%
i
):
self
.
_side_weights
.
append
(
tf
.
get_variable
(
'weights'
,
self
.
side_kernel_shapes
[
i
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
),
dtype
=
tf
.
float32
))
bias_init
=
0.0
if
(
i
==
len
(
self
.
_side_widths
)
-
1
)
else
0.2
self
.
_side_biases
.
append
(
tf
.
get_variable
(
'biases'
,
self
.
side_kernel_shapes
[
i
][
-
1
],
initializer
=
tf
.
constant_initializer
(
bias_init
),
dtype
=
tf
.
float32
))
# Extract nonlinearity from |tf.nn|.
self
.
_nonlinearity
=
getattr
(
tf
.
nn
,
self
.
_attrs
[
'nonlinearity'
])
# Infer dropout rate from network parameters and grid hyperparameters.
self
.
_dropout_rate
=
self
.
_attrs
[
'dropout_keep_prob'
]
if
self
.
_dropout_rate
<
0.0
:
self
.
_dropout_rate
=
component
.
master
.
hyperparams
.
dropout_rate
self
.
_params
.
extend
(
self
.
_weights
+
self
.
_biases
+
self
.
_side_weights
+
self
.
_side_biases
)
# Append primary tower layers to the data structure.
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'conv_output'
,
dim
=
self
.
_depths
[
-
1
]))
if
self
.
_output_dim
:
self
.
_regularized_weights
.
extend
(
self
.
_weights
[:
-
1
])
else
:
self
.
_regularized_weights
.
extend
(
self
.
_weights
)
# Append side tower layers to the data structure.
self
.
_layers
.
append
(
Layer
(
component
,
name
=
'conv_side_output'
,
dim
=
self
.
_side_depths
[
-
1
]))
if
self
.
_side_output_dim
:
self
.
_regularized_weights
.
extend
(
self
.
_side_weights
[:
-
1
])
else
:
self
.
_regularized_weights
.
extend
(
self
.
_side_weights
)
def
create
(
self
,
fixed_embeddings
,
linked_embeddings
,
context_tensor_arrays
,
attention_tensor
,
during_training
,
stride
=
None
):
"""Requires |stride|; otherwise see base class."""
if
stride
is
None
:
raise
RuntimeError
(
"ConvNetwork needs 'stride' and must be called in the "
'bulk feature extractor component.'
)
input_tensor
=
get_input_tensor_with_stride
(
fixed_embeddings
,
linked_embeddings
,
stride
)
# TODO(googleuser): Add context and attention.
del
context_tensor_arrays
,
attention_tensor
# On CPU, add a dimension so that the 'image' has shape
# [stride, 1, num_steps, D].
conv
=
tf
.
expand_dims
(
input_tensor
,
1
)
for
i
in
range
(
len
(
self
.
_depths
)
-
1
):
if
i
==
self
.
_side_index
:
logging
.
info
(
'Creating side tower at index %d'
,
i
)
side_conv
=
conv
for
j
in
range
(
len
(
self
.
_side_depths
)
-
1
):
with
tf
.
variable_scope
(
'side_conv%d'
%
j
,
reuse
=
True
)
as
scope
:
if
during_training
:
side_conv
.
set_shape
([
None
,
1
,
None
,
self
.
_side_depths
[
j
]])
side_conv
=
self
.
_maybe_apply_dropout
(
side_conv
,
stride
)
side_conv
=
tf
.
nn
.
conv2d
(
side_conv
,
self
.
_component
.
get_variable
(
'weights'
),
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
side_conv
=
tf
.
nn
.
bias_add
(
side_conv
,
self
.
_component
.
get_variable
(
'biases'
))
if
j
<
(
len
(
self
.
_side_weights
)
-
1
)
or
not
self
.
_side_output_dim
:
side_conv
=
self
.
_nonlinearity
(
side_conv
,
name
=
scope
.
name
)
with
tf
.
variable_scope
(
'conv%d'
%
i
,
reuse
=
True
)
as
scope
:
if
during_training
:
conv
.
set_shape
([
None
,
1
,
None
,
self
.
_depths
[
i
]])
conv
=
self
.
_maybe_apply_dropout
(
conv
,
stride
)
conv
=
tf
.
nn
.
conv2d
(
conv
,
self
.
_component
.
get_variable
(
'weights'
),
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
conv
=
tf
.
nn
.
bias_add
(
conv
,
self
.
_component
.
get_variable
(
'biases'
))
if
i
<
(
len
(
self
.
_weights
)
-
1
)
or
not
self
.
_output_dim
:
conv
=
self
.
_nonlinearity
(
conv
,
name
=
scope
.
name
)
return
[
tf
.
reshape
(
conv
,
[
-
1
,
self
.
_depths
[
-
1
]],
name
=
'reshape_activations'
),
tf
.
reshape
(
conv
,
[
-
1
,
self
.
_depths
[
-
1
]],
name
=
'reshape_activations'
)
side_conv
,
[
-
1
,
self
.
_side_depths
[
-
1
]],
name
=
'reshape_side_activations'
),
]
def
_maybe_apply_dropout
(
self
,
inputs
,
stride
):
...
...
@@ -1406,20 +1779,17 @@ class ConvNetwork(NetworkUnitInterface):
class
PairwiseConvNetwork
(
NetworkUnitInterface
):
"""Implementation of a pairwise 2D convolutional feed forward network.
For a sequence of N tokens, all N^2 pairs of concatenated input features are
constructed. If each input vector is of length D, then the sequence is
represented by an image of dimensions [N, N] with 2*D channels per pixel.
I.e. pixel [i, j] has a representation that is the concatenation of the
representations of the tokens at i and at j.
To use this network for graph edge scoring, for instance by using the "heads"
transition system, the output layer needs to have dimensions [N, N] and only
a single channel. The network takes care of outputting an [N, N] sized layer,
but the user needs to ensure that the output depth equals 1.
TODO(googleuser): Like Dozat and Manning, we will need an
additional network to label the edges, and the ability to read head
and modifier representations from different inputs.
For two sequences of representations of N tokens, all N^2 pairs of
concatenated input features are constructed. If each input vector is of
length D, then the sequence is represented by an image of dimensions [N, N]
with 2*D channels per pixel. I.e. pixel [i, j] has a representation that is
the concatenation of the representations of the tokens at i and at j.
To use this network for graph edge scoring, for instance by using the
"heads_labels" transition system, the output layer needs to have dimensions
[N, N*num_labels]. The network takes care of outputting an [N, N*last_dim]
sized layer, but the user needs to ensure that the output depth equals the
desired number of output labels.
"""
def
__init__
(
self
,
component
):
...
...
@@ -1430,62 +1800,98 @@ class PairwiseConvNetwork(NetworkUnitInterface):
convolutional kernel at every layer.
widths: comma separated list of ints, number of steps input to the
convolutional kernel at every layer.
relu_layers: comma separate list of ints, the id of layers after which
to apply a relu activation. *By default, all but the final layer will
have a relu activation applied.*
To generate a network with M layers, both 'depths' and 'widths' must be of
length M. The input depth of the first layer is inferred from the total
concatenated size of the input features.
dropout: comma separated list of floats, dropout keep probability for each
layer.
bias_init: comma separated list of floats, constant bias initializer for
each layer.
initialization: comma separated list of strings, initialization for each
layer. See add_var_initialized() for available initialization schemes.
activation_layers: comma separated list of ints, the id of layers after
which to apply an activation. *By default, all but the final layer
will have an activation applied.*
activation: anything defined in tf.nn.
To generate a network with M layers, 'depths', 'widths', 'dropout',
'bias_init' and 'initialization' must be of length M. The input depth of the
first layer is inferred from the total concatenated size of the input
features.
Args:
component: parent ComponentBuilderBase object.
Raises:
RuntimeError: if the number of depths and weights are not equal.
ValueError: if the final depth is not equal to 1.
RuntimeError: if the lists of dropout, bias_init, initialization, and
widths do not have equal length, or the number of widths is not
equal to the number of depths - 1.
"""
parameters
=
component
.
spec
.
network_unit
.
parameters
super
(
PairwiseConvNetwork
,
self
).
__init__
(
component
)
self
.
_source_dim
=
self
.
_linked_feature_dims
[
'sources'
]
self
.
_target_dim
=
self
.
_linked_feature_dims
[
'targets'
]
# Each input pixel will comprise the concatenation of two tokens, so the
# input depth is double that for a single token.
self
.
_depths
=
[
self
.
_concatenated_input_dim
*
2
]
self
.
_depths
.
extend
(
map
(
int
,
parameters
[
'depths'
].
split
(
','
)))
self
.
_depths
=
[
self
.
_source_dim
+
self
.
_target_dim
]
self
.
_widths
=
map
(
int
,
parameters
[
'widths'
].
split
(
','
))
self
.
_num_layers
=
len
(
self
.
_widths
)
if
len
(
self
.
_depths
)
!=
self
.
_num_layers
+
1
:
raise
RuntimeError
(
'Unmatched depths/weights %s/%s'
%
(
parameters
[
'depths'
],
parameters
[
'weights'
]))
if
self
.
_depths
[
-
1
]
!=
1
:
raise
ValueError
(
'Final depth is not equal to 1 in %s'
%
parameters
[
'depths'
])
self
.
_dropout
=
map
(
float
,
parameters
[
'dropout'
].
split
(
','
))
if
parameters
[
'dropout'
]
else
[
1.0
]
*
self
.
_num_layers
self
.
_bias_init
=
map
(
float
,
parameters
[
'bias_init'
].
split
(
','
))
if
parameters
[
'bias_init'
]
else
[
0.01
]
*
self
.
_num_layers
self
.
_initialization
=
parameters
[
'initialization'
].
split
(
','
)
if
parameters
[
'initialization'
]
else
[
'xavier'
]
*
self
.
_num_layers
param_lengths
=
map
(
len
,
[
self
.
_widths
,
self
.
_dropout
,
self
.
_bias_init
,
self
.
_initialization
])
if
not
all
(
param_lengths
[
0
]
==
param_len
for
param_len
in
param_lengths
):
raise
RuntimeError
(
'Unmatched widths/dropout/bias_init/initialization: '
+
'%d/%d/%d/%d'
%
(
param_lengths
[
0
],
param_lengths
[
1
],
param_lengths
[
2
],
param_lengths
[
3
]))
self
.
_depths
.
extend
(
map
(
int
,
parameters
[
'depths'
].
split
(
','
)))
if
len
(
self
.
_depths
)
!=
len
(
self
.
_widths
)
+
1
:
raise
RuntimeError
(
'Unmatched widths/depths: %d/%d (depths should equal widths + 1)'
%
(
len
(
self
.
_widths
),
len
(
self
.
_depths
)))
if
parameters
[
'activation'
]:
self
.
_activation
=
parameters
[
'activation'
]
else
:
self
.
_activation
=
'relu'
self
.
_activation_fn
=
getattr
(
tf
.
nn
,
self
.
_activation
)
self
.
_num_labels
=
self
.
_depths
[
-
1
]
if
parameters
[
'activation_layers'
]:
self
.
_activation_layers
=
set
(
map
(
int
,
parameters
[
'activation_layers'
].
split
(
','
)))
else
:
self
.
_activation_layers
=
set
(
range
(
self
.
_num_layers
-
1
))
self
.
_kernel_shapes
=
[]
for
i
,
width
in
enumerate
(
self
.
_widths
):
self
.
_
kernel_shapes
.
append
(
[
width
,
width
,
self
.
_depths
[
i
],
self
.
_depths
[
i
+
1
]])
if
parameters
[
'relu_layers'
]:
s
el
f
.
_relu_layers
=
set
(
map
(
int
,
parameters
[
'relu_layers'
].
split
(
','
)))
else
:
self
.
_relu_layers
=
set
(
range
(
self
.
_num_layers
-
1
)
)
if
self
.
_
activation
==
'glu'
and
i
in
self
.
_activation_layers
:
self
.
_kernel_shapes
.
append
(
[
width
,
width
,
self
.
_depths
[
i
],
2
*
self
.
_depths
[
i
+
1
]])
el
se
:
self
.
_kernel_shapes
.
append
(
[
width
,
width
,
self
.
_depths
[
i
],
self
.
_depths
[
i
+
1
]]
)
self
.
_weights
=
[]
self
.
_biases
=
[]
for
i
,
kernel_shape
in
enumerate
(
self
.
_kernel_shapes
):
with
tf
.
variable_scope
(
'conv%d'
%
i
):
self
.
_weights
.
append
(
tf
.
get_variable
(
'weights'
,
kernel_shape
,
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
),
dtype
=
tf
.
float32
))
bias_init
=
0.0
if
i
in
self
.
_relu_layers
else
0.2
add_var_initialized
(
'weights'
,
kernel_shape
,
self
.
_initialization
[
i
]))
self
.
_biases
.
append
(
tf
.
get_variable
(
'biases'
,
kernel_shape
[
-
1
],
initializer
=
tf
.
constant_initializer
(
bias_init
),
initializer
=
tf
.
constant_initializer
(
self
.
_
bias_init
[
i
]
),
dtype
=
tf
.
float32
))
self
.
_params
.
extend
(
self
.
_weights
+
self
.
_biases
)
...
...
@@ -1500,34 +1906,46 @@ class PairwiseConvNetwork(NetworkUnitInterface):
during_training
,
stride
=
None
):
"""Requires |stride|; otherwise see base class."""
del
context_tensor_arrays
,
attention_tensor
# Unused.
# TODO(googleuser): Normalize the arguments to create(). 'stride'
# is unused by the recurrent network units, while 'context_tensor_arrays'
# and 'attenion_tensor_array' is unused by bulk network units. b/33587044
if
stride
is
None
:
raise
ValueError
(
"PairwiseConvNetwork needs 'stride'"
)
input_tensor
=
get_input_tensor_with_stride
(
fixed_embeddings
,
linked_embeddings
,
stride
)
# TODO(googleuser): Add dropout.
del
context_tensor_arrays
,
attention_tensor
,
during_training
# Unused.
num_steps
=
tf
.
shape
(
input_tensor
)[
1
]
arg1
=
tf
.
expand_dims
(
input_tensor
,
1
)
arg1
=
tf
.
tile
(
arg1
,
tf
.
stack
([
1
,
num_steps
,
1
,
1
]))
arg2
=
tf
.
expand_dims
(
input_tensor
,
2
)
arg2
=
tf
.
tile
(
arg2
,
tf
.
stack
([
1
,
1
,
num_steps
,
1
]))
sources
=
lookup_named_tensor
(
'sources'
,
linked_embeddings
).
tensor
targets
=
lookup_named_tensor
(
'targets'
,
linked_embeddings
).
tensor
source_tokens
=
tf
.
reshape
(
sources
,
[
stride
,
-
1
,
1
,
self
.
_source_dim
])
target_tokens
=
tf
.
reshape
(
targets
,
[
stride
,
1
,
-
1
,
self
.
_target_dim
])
# sources and targets should have shapes [b, n, 1, s] and [b, 1, n, t],
# respectively. Since we just reshaped them, we can check that all dims are
# as expected by checking the one unknown dim, i.e. their num_steps (n) dim.
sources_shape
=
tf
.
shape
(
source_tokens
)
targets_shape
=
tf
.
shape
(
target_tokens
)
num_steps
=
sources_shape
[
1
]
with
tf
.
control_dependencies
([
tf
.
assert_equal
(
num_steps
,
targets_shape
[
2
],
name
=
'num_steps_mismatch'
)]):
arg1
=
tf
.
tile
(
source_tokens
,
tf
.
stack
([
1
,
1
,
num_steps
,
1
]))
arg2
=
tf
.
tile
(
target_tokens
,
tf
.
stack
([
1
,
num_steps
,
1
,
1
]))
conv
=
tf
.
concat
([
arg1
,
arg2
],
3
)
for
i
in
xrange
(
self
.
_num_layers
):
with
tf
.
variable_scope
(
'conv%d'
%
i
,
reuse
=
True
)
as
scope
:
conv
=
tf
.
nn
.
conv2d
(
conv
,
self
.
_component
.
get_variable
(
'weights'
),
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
if
during_training
:
conv
=
maybe_apply_dropout
(
conv
,
self
.
_dropout
[
i
],
False
)
conv
=
tf
.
nn
.
conv2d
(
conv
,
self
.
_component
.
get_variable
(
'weights'
),
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
conv
=
tf
.
nn
.
bias_add
(
conv
,
self
.
_component
.
get_variable
(
'biases'
))
if
i
in
self
.
_relu_layers
:
conv
=
tf
.
nn
.
relu
(
conv
,
name
=
scope
.
name
)
return
[
tf
.
reshape
(
conv
,
[
-
1
,
num_steps
],
name
=
'reshape_activations'
)]
if
i
in
self
.
_activation_layers
:
conv
=
self
.
_activation_fn
(
conv
,
name
=
scope
.
name
)
return
[
tf
.
reshape
(
conv
,
[
-
1
,
num_steps
*
self
.
_num_labels
],
name
=
'reshape_activations'
)
]
class
ExportFixedFeaturesNetwork
(
NetworkUnitInterface
):
...
...
@@ -1593,7 +2011,7 @@ class SplitNetwork(NetworkUnitInterface):
for
slice_index
in
xrange
(
self
.
_num_slices
):
self
.
_layers
.
append
(
Layer
(
self
,
'slice_%s'
%
slice_index
,
self
.
_slice_dim
))
Layer
(
component
,
'slice_%s'
%
slice_index
,
self
.
_slice_dim
))
def
create
(
self
,
fixed_embeddings
,
...
...
@@ -1602,5 +2020,103 @@ class SplitNetwork(NetworkUnitInterface):
attention_tensor
,
during_training
,
stride
=
None
):
"""See base class."""
input_bnxd
=
get_input_tensor
(
fixed_embeddings
,
linked_embeddings
)
return
tf
.
split
(
input_bnxd
,
self
.
_num_slices
,
axis
=
1
)
class
GatherNetwork
(
NetworkUnitInterface
):
"""Network unit that gathers input according to specified step indices.
This can be used to implement a non-trivial linked feature (i.e., where the
link mapping is more complex than 'input.focus'). Extract the step indices
using a BulkFeatureIdExtractorComponentBuilder, and then gather activations
using this network.
Note that the step index -1 is special: gathering it will retrieve a padding
vector, which can be constant (zeros) or trainable.
Parameters:
trainable_padding (False): Whether the padding vector is trainable.
Features:
indices: [B * N, 1] The step indices to gather, local to each batch item.
These are local in the sense that, for each batch item, the step indices
are in the range [-1,N).
All other features are concatenated into a [B * N, D] matrix.
Layers:
outputs: [B * N, D] The first slice of the input.
"""
def
__init__
(
self
,
component
):
"""Initializes weights and layers.
Args:
component: Parent ComponentBuilderBase object.
"""
super
(
GatherNetwork
,
self
).
__init__
(
component
)
self
.
_attrs
=
get_attrs_with_defaults
(
component
.
spec
.
network_unit
.
parameters
,
{
'trainable_padding'
:
False
})
check
.
In
(
'indices'
,
self
.
_linked_feature_dims
,
'Missing required linked feature'
)
check
.
Eq
(
self
.
_linked_feature_dims
[
'indices'
],
1
,
'Wrong dimension for "indices" feature'
)
self
.
_dim
=
self
.
_concatenated_input_dim
-
1
# exclude 'indices'
self
.
_layers
.
append
(
Layer
(
component
,
'outputs'
,
self
.
_dim
))
if
self
.
_attrs
[
'trainable_padding'
]:
self
.
_params
.
append
(
tf
.
get_variable
(
'pre_padding'
,
[
1
,
1
,
self
.
_dim
],
initializer
=
tf
.
random_normal_initializer
(
stddev
=
1e-4
),
dtype
=
tf
.
float32
))
def
create
(
self
,
fixed_embeddings
,
linked_embeddings
,
context_tensor_arrays
,
attention_tensor
,
during_training
,
stride
=
None
):
"""Requires |stride|; otherwise see base class."""
check
.
NotNone
(
stride
,
'BulkBiLSTMNetwork requires "stride" and must be called '
'in the bulk feature extractor component.'
)
# Extract the batched local step indices.
local_indices
=
lookup_named_tensor
(
'indices'
,
linked_embeddings
)
local_indices_bxn
=
tf
.
reshape
(
local_indices
.
tensor
,
[
stride
,
-
1
])
local_indices_bxn
=
tf
.
to_int32
(
local_indices_bxn
)
num_steps
=
tf
.
shape
(
local_indices_bxn
)[
1
]
# Collect all other inputs as a batched tensor.
linked_embeddings
=
[
named_tensor
for
named_tensor
in
linked_embeddings
if
named_tensor
.
name
!=
'indices'
]
inputs_bnxd
=
get_input_tensor
(
fixed_embeddings
,
linked_embeddings
)
# Prepend the padding vector, which may be trainable or constant.
inputs_bxnxd
=
tf
.
reshape
(
inputs_bnxd
,
[
stride
,
-
1
,
self
.
_dim
])
if
self
.
_attrs
[
'trainable_padding'
]:
padding_1x1xd
=
self
.
_component
.
get_variable
(
'pre_padding'
)
padding_bx1xd
=
tf
.
tile
(
padding_1x1xd
,
[
stride
,
1
,
1
])
else
:
padding_bx1xd
=
tf
.
zeros
([
stride
,
1
,
self
.
_dim
],
tf
.
float32
)
inputs_bxnxd
=
tf
.
concat
([
padding_bx1xd
,
inputs_bxnxd
],
1
)
inputs_bnxd
=
tf
.
reshape
(
inputs_bxnxd
,
[
-
1
,
self
.
_dim
])
# As mentioned above, for each batch item the local step indices are in the
# range [-1,N). To compensate for batching and padding, the local indices
# must be progressively offset into "global" indices such that batch item b
# is in the range [b*(N+1),(b+1)*(N+1)).
batch_indices_b
=
tf
.
range
(
stride
)
batch_indices_bx1
=
tf
.
expand_dims
(
batch_indices_b
,
1
)
local_to_global_offsets_bx1
=
batch_indices_bx1
*
(
num_steps
+
1
)
+
1
global_indices_bxn
=
local_indices_bxn
+
local_to_global_offsets_bx1
global_indices_bn
=
tf
.
reshape
(
global_indices_bxn
,
[
-
1
])
outputs_bnxd
=
tf
.
gather
(
inputs_bnxd
,
global_indices_bn
)
return
[
outputs_bnxd
]
research/syntaxnet/dragnn/python/network_units_test.py
View file @
4364390a
...
...
@@ -16,16 +16,16 @@
"""Tests for network_units."""
import
numpy
as
np
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.platform
import
googletest
from
dragnn.protos
import
spec_pb2
from
dragnn.python
import
network_units
import
dragnn.python.load_dragnn_cc_impl
import
syntaxnet.load_parser_ops
FLAGS
=
tf
.
app
.
flags
.
FLAGS
...
...
@@ -66,6 +66,9 @@ class MockComponent(object):
def
attr
(
self
,
name
):
return
self
.
_attrs
[
name
]
def
get_variable
(
self
,
name
):
return
tf
.
get_variable
(
name
)
class
MockMaster
(
object
):
...
...
@@ -77,6 +80,15 @@ class MockMaster(object):
}
class
MockNetwork
(
object
):
def
__init__
(
self
,
**
dims
):
self
.
_dims
=
dims
def
get_layer_size
(
self
,
name
):
return
self
.
_dims
[
name
]
class
NetworkUnitsLookupTest
(
test_util
.
TensorFlowTestCase
):
def
setUp
(
self
):
...
...
@@ -155,5 +167,256 @@ class GetAttrsWithDefaultsTest(test_util.TensorFlowTestCase):
_assert_attr_is_true
(
'TRUE'
)
class
GatherNetworkTest
(
test_util
.
TensorFlowTestCase
):
def
setUp
(
self
):
# Clear the graph and all existing variables. Otherwise, variables created
# in different tests may collide with each other.
tf
.
reset_default_graph
()
self
.
_master
=
MockMaster
()
self
.
_master
.
spec
=
spec_pb2
.
MasterSpec
()
text_format
.
Parse
(
"""
component {
name: 'test'
backend { registered_name: 'TestComponent' }
linked_feature {
name: 'indices'
fml: 'input.focus'
size: 1
embedding_dim: -1
source_component: 'previous'
source_translator: 'identity'
source_layer: 'index_layer'
}
linked_feature {
name: 'features'
fml: 'input.focus'
size: 1
embedding_dim: -1
source_component: 'previous'
source_translator: 'identity'
source_layer: 'feature_layer'
}
network_unit {
registered_name: 'GatherNetwork'
}
}
"""
,
self
.
_master
.
spec
)
self
.
_component
=
MockComponent
(
self
.
_master
,
self
.
_master
.
spec
.
component
[
0
])
self
.
_master
.
lookup_component
[
'previous'
].
network
=
MockNetwork
(
index_layer
=
1
,
feature_layer
=
2
)
def
testConstantPadding
(
self
):
with
tf
.
Graph
().
as_default
(),
self
.
test_session
():
with
tf
.
variable_scope
(
'test_scope'
):
network
=
network_units
.
GatherNetwork
(
self
.
_component
)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices
=
tf
.
constant
([[
1
],
[
2
],
[
0
],
# item 1
[
-
1
],
[
0
],
[
-
1
]],
# item 2
dtype
=
tf
.
int64
)
features
=
tf
.
constant
([[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 1
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]],
# item 2
dtype
=
tf
.
float32
)
fixed_embeddings
=
[]
linked_embeddings
=
[
network_units
.
NamedTensor
(
indices
,
'indices'
,
1
),
network_units
.
NamedTensor
(
features
,
'features'
,
2
)
]
with
tf
.
variable_scope
(
'test_scope'
,
reuse
=
True
):
outputs
=
network
.
create
(
fixed_embeddings
,
linked_embeddings
,
None
,
None
,
True
,
2
)
gathered
=
outputs
[
0
]
# Zeros will be substituted for index -1.
self
.
assertAllEqual
(
gathered
.
eval
(),
[[
2.0
,
2.5
],
# gathered from 1
[
3.0
,
3.5
],
# gathered from 2
[
1.0
,
1.5
],
# gathered from 0
[
0.0
,
0.0
],
# gathered from -1
[
4.0
,
4.5
],
# gathered from 0
[
0.0
,
0.0
]])
# gathered from -1
def
testTrainablePadding
(
self
):
self
.
_component
.
spec
.
network_unit
.
parameters
[
'trainable_padding'
]
=
'true'
with
tf
.
Graph
().
as_default
(),
self
.
test_session
():
with
tf
.
variable_scope
(
'test_scope'
):
network
=
network_units
.
GatherNetwork
(
self
.
_component
)
# Construct a batch of two items with 3 and 2 steps, respectively.
indices
=
tf
.
constant
([[
1
],
[
2
],
[
0
],
# item 1
[
-
1
],
[
0
],
[
-
1
]],
# item 2
dtype
=
tf
.
int64
)
features
=
tf
.
constant
([[
1.0
,
1.5
],
[
2.0
,
2.5
],
[
3.0
,
3.5
],
# item 1
[
4.0
,
4.5
],
[
5.0
,
5.5
],
[
6.0
,
6.5
]],
# item 2
dtype
=
tf
.
float32
)
fixed_embeddings
=
[]
linked_embeddings
=
[
network_units
.
NamedTensor
(
indices
,
'indices'
,
1
),
network_units
.
NamedTensor
(
features
,
'features'
,
2
)
]
with
tf
.
variable_scope
(
'test_scope'
,
reuse
=
True
):
outputs
=
network
.
create
(
fixed_embeddings
,
linked_embeddings
,
None
,
None
,
True
,
2
)
gathered
=
outputs
[
0
]
# Ensure that the padding variable is initialized.
tf
.
global_variables_initializer
().
run
()
# Randomly-initialized padding will be substituted for index -1.
self
.
assertAllEqual
(
gathered
[
0
].
eval
(),
[
2.0
,
2.5
])
# gathered from 1
self
.
assertAllEqual
(
gathered
[
1
].
eval
(),
[
3.0
,
3.5
])
# gathered from 2
self
.
assertAllEqual
(
gathered
[
2
].
eval
(),
[
1.0
,
1.5
])
# gathered from 0
tf
.
logging
.
info
(
'padding = %s'
,
gathered
[
3
].
eval
())
# gathered from -1
self
.
assertAllEqual
(
gathered
[
4
].
eval
(),
[
4.0
,
4.5
])
# gathered from 0
tf
.
logging
.
info
(
'padding = %s'
,
gathered
[
5
].
eval
())
# gathered from -1
# Though random, the padding must identical.
self
.
assertAllEqual
(
gathered
[
3
].
eval
(),
gathered
[
5
].
eval
())
class
IdentityInitializerTest
(
test_util
.
TensorFlowTestCase
):
def
IdentityInitializerHelper
(
self
,
shape
,
expected
,
divisor
=
1.0
,
std
=
1e-4
):
"""Tests identity initialization by comparing expected to actual array.
Tests the given expected array against the result of calling
network_units.add_var_initialized() with the given params and
init_type='identity'.
Args:
shape: shape of the array
expected: expected contents of the array to initialize
divisor: numerator for identity initialization where the last two dims
of the array are not equal; should divide both of the last two dims
std: standard deviation for random normal samples
"""
with
tf
.
Graph
().
as_default
(),
self
.
test_session
()
as
session
:
np
.
random
.
seed
(
4
)
tensor
=
network_units
.
add_var_initialized
(
'tensor'
,
shape
,
'identity'
,
divisor
=
divisor
,
stddev
=
std
)
session
.
run
(
tf
.
global_variables_initializer
())
actual
=
session
.
run
(
tensor
)
self
.
assertAllClose
(
actual
,
expected
,
1e-8
,
1e-8
)
def
IdentityInitializerSquareHelper
(
self
,
shape
,
middles
):
"""Tests identity initialization when last two dims are equal.
When the last two dims of the array are equal, identity initialization
should simply set the center matrix in the last two dimensions to the
identity, with all other entries set to zero.
Args:
shape: shape of the array to initialize
middles: indices into the middle of all axes except the last two. It
must be the case that len(middles) == len(shape) - 2.
"""
expected
=
np
.
zeros
(
shape
,
dtype
=
'float32'
)
expected
[[[
m
]
for
m
in
middles
]]
=
np
.
eye
(
shape
[
-
1
])
self
.
IdentityInitializerHelper
(
shape
,
expected
)
def
testIdentityInitializerSquareRank2
(
self
):
shape
=
(
3
,
3
)
expected
=
np
.
eye
(
shape
[
-
1
]).
astype
(
'float32'
)
self
.
IdentityInitializerHelper
(
shape
,
expected
)
def
testIdentityInitializerSquareRank3
(
self
):
shape
=
(
2
,
4
,
4
)
middles
=
[
1
]
self
.
IdentityInitializerSquareHelper
(
shape
,
middles
)
def
testIdentityInitializerSquareRank4
(
self
):
shape
=
(
2
,
3
,
4
,
4
)
middles
=
[
1
,
1
]
self
.
IdentityInitializerSquareHelper
(
shape
,
middles
)
def
testIdentityInitializerSquareRank5
(
self
):
shape
=
(
2
,
3
,
4
,
5
,
5
)
middles
=
[
1
,
1
,
2
]
self
.
IdentityInitializerSquareHelper
(
shape
,
middles
)
def
testIdentityInitializerNonSquareRank2FirstDimLarger
(
self
):
divisor
=
3.
std
=
1e-3
shape
=
(
6
,
3
)
m
=
divisor
/
shape
[
-
1
]
expected
=
[[
m
,
4.99951362e-04
,
-
9.95908980e-04
],
[
m
,
-
4.18301526e-04
,
-
1.58457726e-03
],
[
-
6.47706795e-04
,
m
,
3.32250027e-04
],
[
-
1.14747661e-03
,
m
,
-
8.79869258e-05
],
[
4.25072387e-04
,
3.32253141e-04
,
m
],
[
3.50997143e-04
,
-
6.06887275e-04
,
m
]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
def
testIdentityInitializerNonSquareRank2FirstDimSmaller
(
self
):
divisor
=
2.
std
=
1e-3
shape
=
(
2
,
4
)
m
=
divisor
/
shape
[
-
1
]
expected
=
[[
m
,
m
,
-
9.95908980e-04
,
6.93598529e-04
],
[
-
4.18301526e-04
,
-
1.58457726e-03
,
m
,
m
]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
def
testIdentityInitializerNonSquareRank3
(
self
):
divisor
=
2.
std
=
1e-3
shape
=
(
2
,
2
,
6
)
m
=
divisor
/
shape
[
-
1
]
expected
=
[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529e-04
,
-
4.18301526e-04
,
-
1.58457726e-03
],
[
-
6.47706795e-04
,
5.98575163e-04
,
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
]],
[[
m
,
m
,
m
,
3.50997143e-04
,
-
6.06887275e-04
,
1.54697930e-03
],
[
7.23341596e-04
,
4.61355667e-05
,
-
9.82991653e-04
,
m
,
m
,
m
]]]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
def
testIdentityInitializerNonSquareRank4
(
self
):
divisor
=
2.
std
=
1e-3
shape
=
(
2
,
3
,
2
,
8
)
m
=
divisor
/
float
(
shape
[
-
1
])
expected
=
[
[[[
5.05617063e-05
,
4.99951362e-04
,
-
9.95908980e-04
,
6.93598529e-04
,
-
4.18301526e-04
,
-
1.58457726e-03
,
-
6.47706795e-04
,
5.98575163e-04
],
[
3.32250027e-04
,
-
1.14747661e-03
,
6.18669670e-04
,
-
8.79869258e-05
,
4.25072387e-04
,
3.32253141e-04
,
-
1.15681626e-03
,
3.50997143e-04
]],
[[
-
6.06887275e-04
,
1.54697930e-03
,
7.23341596e-04
,
4.61355667e-05
,
-
9.82991653e-04
,
5.44327377e-05
,
1.59892938e-04
,
-
1.20894820e-03
],
[
2.22336012e-03
,
3.94295203e-04
,
1.69235771e-03
,
-
1.11281220e-03
,
1.63574750e-03
,
-
1.36096554e-03
,
-
6.51225855e-04
,
5.42451337e-04
]],
[[
4.80062481e-05
,
-
2.35807360e-03
,
-
1.10558409e-03
,
8.37836356e-04
,
2.08787085e-03
,
9.14840959e-04
,
-
2.76203355e-04
,
7.96511886e-04
],
[
-
1.14379858e-03
,
5.09919773e-04
,
-
1.34746032e-03
,
-
9.36010019e-06
,
-
1.30704633e-04
,
8.02086608e-04
,
-
3.02963977e-04
,
1.20200263e-03
]]],
[[[
-
1.96745284e-04
,
8.36528721e-04
,
7.86602264e-04
,
-
1.84087583e-03
,
3.75474883e-05
,
3.59280530e-05
,
-
7.78739923e-04
,
1.79410708e-04
],
[
-
1.45553437e-03
,
5.56185201e-04
,
5.09778853e-04
,
3.00445536e-04
,
2.47658417e-03
,
3.52343399e-04
,
6.74710027e-05
,
-
7.32264714e-04
]],
[[
m
,
m
,
m
,
m
,
1.58469542e-04
,
1.99008291e-03
,
1.16418756e-03
,
2.42660157e-04
],
[
1.37992005e-03
,
-
5.45587063e-05
,
7.95233937e-04
,
1.90899627e-05
,
m
,
m
,
m
,
m
]],
[[
-
1.09712186e-03
,
-
5.28196048e-04
,
-
2.37977528e-03
,
-
6.07683673e-04
,
-
1.07529014e-03
,
2.02240516e-03
,
-
5.64875314e-04
,
-
1.54292909e-03
],
[
8.70841788e-04
,
-
1.75210531e-04
,
4.86030076e-05
,
1.88646198e-04
,
2.09313483e-04
,
-
3.74444906e-04
,
9.54698597e-04
,
5.23247640e-04
]]]
]
self
.
IdentityInitializerHelper
(
shape
,
expected
,
divisor
,
std
)
if
__name__
==
'__main__'
:
googletest
.
main
()
research/syntaxnet/dragnn/python/perf_test_data/master-spec
0 → 100644
View file @
4364390a
component {
name: "convnet"
transition_system {
registered_name: "shift-only"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "words-embedding-input"
part {
file_pattern: "/readahead/512M/cns/lg-d/home/saft/corpora/word-embeddings/en/word2vec/1billion/word2vec-embedding-bi-true-32.sst"
file_format: "sstable"
record_format: "dist_belief.TokenEmbedding"
}
}
resource {
name: "words-vocab-input"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.model-init/vocab"
file_format: "text"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.convnet.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
fixed_feature {
name: "char_ngram"
fml: "input.token.lexifuse-char-ngram"
embedding_dim: 16
vocabulary_size: 16500
size: 1
predicate_map: "hashed"
}
fixed_feature {
name: "words"
fml: "input.word"
embedding_dim: 32
vocabulary_size: 39395
size: 1
predicate_map: "hashed"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 1
attention_component: ""
component_builder {
registered_name: "components.common.dragnn.python.conv_component.ConvComponentBuilder"
parameters {
key: "depths"
value: "48,128"
}
parameters {
key: "output_dims"
value: "45"
}
parameters {
key: "widths"
value: "7"
}
}
training_beam_size: 1
inference_beam_size: 1
}
component {
name: "tagger"
transition_system {
registered_name: "tagger"
parameters {
key: "parser_skip_deterministic"
value: "false"
}
}
resource {
name: "tag-map"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexicon/tag-map"
file_format: "text"
record_format: ""
}
}
resource {
name: "lexifuse-repository"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/lexifuse.lexifuse-repository/repository"
file_format: "repository"
record_format: "entity"
}
}
resource {
name: "brain-parser-model"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/brain-parser-model"
file_format: "model"
record_format: ""
}
}
resource {
name: "transition-system-data"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.model-init/transition-system-data"
file_format: "model"
record_format: ""
}
}
resource {
name: "component-builder-module"
part {
file_pattern: "/cns/lg-d/home/chrisalberti/e/conv/dragnn-parser.tagger.component-builder-module/module-spec"
file_format: "pbtxt"
record_format: ""
}
}
linked_feature {
name: "convnet"
fml: "input.focus"
embedding_dim: -1
size: 1
source_component: "convnet"
source_translator: "identity"
source_layer: "conv0_logits"
}
network_unit {
registered_name: "IdentityNetwork"
}
backend {
registered_name: "ParserComponent"
}
num_actions: 45
attention_component: ""
component_builder {
registered_name: "bulk_component.BulkAnnotatorComponentBuilder"
}
training_beam_size: 1
inference_beam_size: 1
}
research/syntaxnet/dragnn/python/perf_test_data/params
0 → 100644
View file @
4364390a
File added
research/syntaxnet/dragnn/python/perf_test_data/sample_docs.pickle
0 → 100644
View file @
4364390a
File added
research/syntaxnet/dragnn/python/render_spec_with_graphviz_test.py
View file @
4364390a
...
...
@@ -28,7 +28,7 @@ from dragnn.python import spec_builder
def
_make_basic_master_spec
():
"""Constructs a simple spec.
Modified version of
nlp/saft/opensource/
dragnn/tools/parser_trainer.py
Modified version of dragnn/tools/parser_trainer.py
Returns:
spec_pb2.MasterSpec instance.
...
...
research/syntaxnet/dragnn/python/sentence_io.py
View file @
4364390a
...
...
@@ -18,21 +18,26 @@ import tensorflow as tf
from
syntaxnet.ops
import
gen_parser_ops
class
Conll
SentenceReader
(
object
):
"""A reader for
conll
files, with optional projectivizing."""
class
Format
SentenceReader
(
object
):
"""A reader for
formatted
files, with optional projectivizing."""
def
__init__
(
self
,
filepath
,
batch_size
=
32
,
projectivize
=
False
,
morph_to_pos
=
False
):
def
__init__
(
self
,
filepath
,
record_format
,
batch_size
=
32
,
check_well_formed
=
False
,
projectivize
=
False
,
morph_to_pos
=
False
):
self
.
_graph
=
tf
.
Graph
()
self
.
_session
=
tf
.
Session
(
graph
=
self
.
_graph
)
task_context_str
=
"""
input {
name: 'documents'
record_format: '
conll-sentence
'
record_format: '
%s
'
Part {
file_pattern: '%s'
}
}"""
%
filepath
}"""
%
(
record_format
,
filepath
)
if
morph_to_pos
:
task_context_str
+=
"""
Parameter {
...
...
@@ -51,7 +56,8 @@ class ConllSentenceReader(object):
with
self
.
_graph
.
as_default
():
self
.
_source
,
self
.
_is_last
=
gen_parser_ops
.
document_source
(
task_context_str
=
task_context_str
,
batch_size
=
batch_size
)
self
.
_source
=
gen_parser_ops
.
well_formed_filter
(
self
.
_source
)
if
check_well_formed
:
self
.
_source
=
gen_parser_ops
.
well_formed_filter
(
self
.
_source
)
if
projectivize
:
self
.
_source
=
gen_parser_ops
.
projectivize_filter
(
self
.
_source
)
...
...
@@ -77,3 +83,20 @@ class ConllSentenceReader(object):
break
tf
.
logging
.
info
(
'Read %d sentences.'
%
len
(
corpus
))
return
corpus
class
ConllSentenceReader
(
FormatSentenceReader
):
"""A sentence reader that uses an underlying 'conll-sentence' reader."""
def
__init__
(
self
,
filepath
,
batch_size
=
32
,
projectivize
=
False
,
morph_to_pos
=
False
):
super
(
ConllSentenceReader
,
self
).
__init__
(
filepath
,
'conll-sentence'
,
check_well_formed
=
True
,
batch_size
=
batch_size
,
projectivize
=
projectivize
,
morph_to_pos
=
morph_to_pos
)
research/syntaxnet/dragnn/python/sentence_io_test.py
View file @
4364390a
...
...
@@ -19,16 +19,19 @@ import tensorflow as tf
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.platform
import
googletest
from
dragnn.python
import
dragnn_ops
from
dragnn.python
import
sentence_io
from
syntaxnet
import
sentence_pb2
import
syntaxnet.load_parser_ops
FLAGS
=
tf
.
app
.
flags
.
FLAGS
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
def
setUpModule
():
if
not
hasattr
(
FLAGS
,
'test_srcdir'
):
FLAGS
.
test_srcdir
=
''
if
not
hasattr
(
FLAGS
,
'test_tmpdir'
):
FLAGS
.
test_tmpdir
=
tf
.
test
.
get_temp_dir
()
class
ConllSentenceReaderTest
(
test_util
.
TensorFlowTestCase
):
...
...
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment