Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
09d9656f
Unverified
Commit
09d9656f
authored
Jan 13, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Jan 13, 2022
Browse files
Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling
parents
ac671306
49a5706c
Changes
427
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
940 additions
and
448 deletions
+940
-448
official/nlp/modeling/layers/block_diag_feedforward.py
official/nlp/modeling/layers/block_diag_feedforward.py
+166
-0
official/nlp/modeling/layers/block_diag_feedforward_test.py
official/nlp/modeling/layers/block_diag_feedforward_test.py
+119
-0
official/nlp/modeling/layers/dense_einsum.py
official/nlp/modeling/layers/dense_einsum.py
+0
-180
official/nlp/modeling/layers/dense_einsum_test.py
official/nlp/modeling/layers/dense_einsum_test.py
+0
-119
official/nlp/modeling/layers/reuse_transformer_test.py
official/nlp/modeling/layers/reuse_transformer_test.py
+13
-13
official/nlp/modeling/layers/rezero_transformer.py
official/nlp/modeling/layers/rezero_transformer.py
+50
-8
official/nlp/modeling/layers/rezero_transformer_test.py
official/nlp/modeling/layers/rezero_transformer_test.py
+20
-2
official/nlp/modeling/layers/text_layers.py
official/nlp/modeling/layers/text_layers.py
+141
-1
official/nlp/modeling/layers/text_layers_test.py
official/nlp/modeling/layers/text_layers_test.py
+104
-0
official/nlp/modeling/layers/transformer_encoder_block.py
official/nlp/modeling/layers/transformer_encoder_block.py
+4
-1
official/nlp/modeling/layers/util.py
official/nlp/modeling/layers/util.py
+28
-2
official/nlp/modeling/models/seq2seq_transformer.py
official/nlp/modeling/models/seq2seq_transformer.py
+2
-9
official/nlp/modeling/networks/bert_dense_encoder_test.py
official/nlp/modeling/networks/bert_dense_encoder_test.py
+20
-16
official/nlp/modeling/networks/bert_encoder.py
official/nlp/modeling/networks/bert_encoder.py
+32
-5
official/nlp/modeling/networks/funnel_transformer.py
official/nlp/modeling/networks/funnel_transformer.py
+88
-39
official/nlp/modeling/networks/funnel_transformer_test.py
official/nlp/modeling/networks/funnel_transformer_test.py
+18
-9
official/nlp/serving/export_savedmodel.py
official/nlp/serving/export_savedmodel.py
+13
-3
official/nlp/serving/export_savedmodel_util.py
official/nlp/serving/export_savedmodel_util.py
+18
-40
official/nlp/serving/serving_modules.py
official/nlp/serving/serving_modules.py
+48
-1
official/nlp/serving/serving_modules_test.py
official/nlp/serving/serving_modules_test.py
+56
-0
No files found.
official/nlp/modeling/layers/block_diag_feedforward.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-based gated feedforward layer."""
# pylint: disable=g-classes-have-attributes
from
typing
import
Optional
import
tensorflow
as
tf
class
BlockDiagFeedforward
(
tf
.
keras
.
layers
.
Layer
):
"""Block diagonal feedforward layer.
This layer replaces the weight matrix of the output_dense layer with a block
diagonal matrix to save layer parameters and FLOPs. A linear mixing layer can
be added optionally to improve layer expressibility.
Args:
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout.
num_blocks: The number of blocks for the block diagonal matrix of the
output_dense layer.
apply_mixing: Apply linear mixing if True.
kernel_initializer: Initializer for dense layer kernels.
bias_initializer: Initializer for dense layer biases.
kernel_regularizer: Regularizer for dense layer kernels.
bias_regularizer: Regularizer for dense layer biases.
activity_regularizer: Regularizer for dense layer activity.
kernel_constraint: Constraint for dense layer kernels.
bias_constraint: Constraint for dense layer kernels.
"""
def
__init__
(
self
,
intermediate_size
:
int
,
intermediate_activation
:
str
,
dropout
:
float
,
num_blocks
:
int
=
1
,
apply_mixing
:
bool
=
True
,
kernel_initializer
:
str
=
"glorot_uniform"
,
bias_initializer
:
str
=
"zeros"
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activity_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
kernel_constraint
:
Optional
[
tf
.
keras
.
constraints
.
Constraint
]
=
None
,
bias_constraint
:
Optional
[
tf
.
keras
.
constraints
.
Constraint
]
=
None
,
**
kwargs
):
# pylint: disable=g-doc-args
super
(
BlockDiagFeedforward
,
self
).
__init__
(
**
kwargs
)
self
.
_intermediate_size
=
intermediate_size
self
.
_intermediate_activation
=
intermediate_activation
self
.
_dropout
=
dropout
self
.
_num_blocks
=
num_blocks
self
.
_apply_mixing
=
apply_mixing
if
intermediate_size
%
num_blocks
!=
0
:
raise
ValueError
(
"Intermediate_size (%d) isn't a multiple of num_blocks "
"(%d)."
%
(
intermediate_size
,
num_blocks
))
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_bias_initializer
=
tf
.
keras
.
initializers
.
get
(
bias_initializer
)
self
.
_kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
_bias_regularizer
=
tf
.
keras
.
regularizers
.
get
(
bias_regularizer
)
self
.
_activity_regularizer
=
tf
.
keras
.
regularizers
.
get
(
activity_regularizer
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
def
build
(
self
,
input_shape
):
hidden_size
=
input_shape
.
as_list
()[
-
1
]
common_kwargs
=
dict
(
kernel_initializer
=
self
.
_kernel_initializer
,
bias_initializer
=
self
.
_bias_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activity_regularizer
=
self
.
_activity_regularizer
,
kernel_constraint
=
self
.
_kernel_constraint
,
bias_constraint
=
self
.
_bias_constraint
)
self
.
_intermediate_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abc,cde->abde"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
self
.
_intermediate_size
//
self
.
_num_blocks
),
bias_axes
=
"de"
,
name
=
"intermediate"
,
**
common_kwargs
)
policy
=
tf
.
keras
.
mixed_precision
.
global_policy
()
if
policy
.
name
==
"mixed_bfloat16"
:
# bfloat16 causes BERT with the LAMB optimizer to not converge
# as well, so we use float32.
policy
=
tf
.
float32
self
.
_intermediate_activation_layer
=
tf
.
keras
.
layers
.
Activation
(
self
.
_intermediate_activation
,
dtype
=
policy
)
self
.
_output_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abde,deo->abdo"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
hidden_size
//
self
.
_num_blocks
),
bias_axes
=
"do"
,
name
=
"output"
,
**
common_kwargs
)
if
self
.
_apply_mixing
:
self
.
_output_mixing
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abdo,de->abeo"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
hidden_size
//
self
.
_num_blocks
),
name
=
"output_mixing"
,
**
common_kwargs
)
self
.
_output_reshape
=
tf
.
keras
.
layers
.
Reshape
((
-
1
,
hidden_size
))
self
.
_output_dropout
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_dropout
)
def
get_config
(
self
):
config
=
{
"intermediate_size"
:
self
.
_intermediate_size
,
"intermediate_activation"
:
self
.
_intermediate_activation
,
"dropout"
:
self
.
_dropout
,
"num_blocks"
:
self
.
_num_blocks
,
"apply_mixing"
:
self
.
_apply_mixing
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_bias_initializer
),
"kernel_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_kernel_regularizer
),
"bias_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_bias_regularizer
),
"activity_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_activity_regularizer
),
"kernel_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_kernel_constraint
),
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
BlockDiagFeedforward
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
intermediate_output
=
self
.
_intermediate_dense
(
inputs
)
intermediate_output
=
self
.
_intermediate_activation_layer
(
intermediate_output
)
layer_output
=
self
.
_output_dense
(
intermediate_output
)
if
self
.
_apply_mixing
:
layer_output
=
self
.
_output_mixing
(
layer_output
)
layer_output
=
self
.
_output_reshape
(
layer_output
)
layer_output
=
self
.
_output_dropout
(
layer_output
)
return
layer_output
official/nlp/modeling/layers/block_diag_feedforward_test.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Keras-based gated feedforward layer."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.layers
import
block_diag_feedforward
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
BlockDiagFeedforwardTest
(
keras_parameterized
.
TestCase
):
def
tearDown
(
self
):
super
(
BlockDiagFeedforwardTest
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
@
parameterized
.
parameters
(
(
1
,
True
,
"float32"
),
(
1
,
True
,
"mixed_float16"
),
(
1
,
False
,
"float32"
),
(
1
,
False
,
"mixed_float16"
),
(
2
,
True
,
"float32"
),
(
2
,
True
,
"mixed_float16"
),
(
2
,
False
,
"float32"
),
(
2
,
False
,
"mixed_float16"
),
)
def
test_layer_creation
(
self
,
num_blocks
,
apply_mixing
,
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
intermediate_size
=
128
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
num_blocks
,
apply_mixing
=
apply_mixing
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
sequence_length
=
64
width
=
128
# Create a 3-dimensional input (the first dimension is implicit).
data_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
data_tensor
)
# The default output of a transformer layer should be the same as the input.
self
.
assertEqual
(
data_tensor
.
shape
.
as_list
(),
output_tensor
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
1
,
True
,
"float32"
),
(
1
,
True
,
"mixed_float16"
),
(
1
,
False
,
"float32"
),
(
1
,
False
,
"mixed_float16"
),
(
2
,
True
,
"float32"
),
(
2
,
True
,
"mixed_float16"
),
(
2
,
False
,
"float32"
),
(
2
,
False
,
"mixed_float16"
),
)
def
test_layer_invocation
(
self
,
num_blocks
,
apply_mixing
,
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
intermediate_size
=
16
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
num_blocks
,
apply_mixing
=
apply_mixing
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
sequence_length
=
16
width
=
32
# Create a 3-dimensional input (the first dimension is implicit).
data_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
data_tensor
)
# Create a model from the test layer.
model
=
tf
.
keras
.
Model
(
data_tensor
,
output_tensor
)
# Invoke the model on test data.
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
output_data
=
model
.
predict
(
input_data
)
self
.
assertEqual
(
output_data
.
shape
,
(
batch_size
,
sequence_length
,
width
))
def
test_get_config
(
self
):
kwargs
=
dict
(
intermediate_size
=
16
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
2
,
apply_mixing
=
True
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
new_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
.
from_config
(
test_layer
.
get_config
())
self
.
assertAllEqual
(
test_layer
.
get_config
(),
new_layer
.
get_config
())
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/dense_einsum.py
deleted
100644 → 0
View file @
ac671306
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-based einsum layer."""
# pylint: disable=g-classes-have-attributes
import
tensorflow
as
tf
from
tensorflow.python.util
import
deprecation
_CHR_IDX
=
[
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
,
"g"
,
"h"
,
"i"
,
"j"
,
"k"
,
"l"
,
"m"
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
DenseEinsum
(
tf
.
keras
.
layers
.
Layer
):
"""A densely connected layer that uses `tf.einsum` as the backing computation.
This layer can perform einsum calculations of arbitrary dimensionality.
Args:
output_shape: Positive integer or tuple, dimensionality of the output space.
num_summed_dimensions: The number of dimensions to sum over. Standard 2D
matmul should use 1, 3D matmul should use 2, and so forth.
activation: Activation function to use. If you don't specify anything, no
activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation")..
kernel_constraint: Constraint function applied to the `kernel` weights
matrix.
bias_constraint: Constraint function applied to the bias vector.
Input shape:
N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common
situation would be a 2D input with shape `(batch_size, input_dim)`.
Output shape:
N-D tensor with shape: `(batch_size, ..., units)`. For instance, for a 2D
input with shape `(batch_size, input_dim)`, the output would have shape
`(batch_size, units)`.
"""
@
deprecation
.
deprecated
(
None
,
"DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead."
)
def
__init__
(
self
,
output_shape
,
num_summed_dimensions
=
1
,
activation
=
None
,
use_bias
=
True
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activity_regularizer
=
None
,
kernel_constraint
=
None
,
bias_constraint
=
None
,
**
kwargs
):
super
(
DenseEinsum
,
self
).
__init__
(
**
kwargs
)
self
.
_output_shape
=
output_shape
if
isinstance
(
output_shape
,
(
list
,
tuple
))
else
(
output_shape
,)
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_bias
=
use_bias
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_bias_initializer
=
tf
.
keras
.
initializers
.
get
(
bias_initializer
)
self
.
_kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
_bias_regularizer
=
tf
.
keras
.
regularizers
.
get
(
bias_regularizer
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_num_summed_dimensions
=
num_summed_dimensions
self
.
_einsum_string
=
None
def
_build_einsum_string
(
self
,
free_input_dims
,
bound_dims
,
output_dims
):
input_str
=
""
kernel_str
=
""
output_str
=
""
letter_offset
=
0
for
i
in
range
(
free_input_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
output_str
+=
char
letter_offset
+=
free_input_dims
for
i
in
range
(
bound_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
kernel_str
+=
char
letter_offset
+=
bound_dims
for
i
in
range
(
output_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
kernel_str
+=
char
output_str
+=
char
return
input_str
+
","
+
kernel_str
+
"->"
+
output_str
def
build
(
self
,
input_shape
):
input_shape
=
tf
.
TensorShape
(
input_shape
)
input_rank
=
input_shape
.
rank
free_input_dims
=
input_rank
-
self
.
_num_summed_dimensions
output_dims
=
len
(
self
.
_output_shape
)
self
.
_einsum_string
=
self
.
_build_einsum_string
(
free_input_dims
,
self
.
_num_summed_dimensions
,
output_dims
)
# This is only saved for testing purposes.
self
.
_kernel_shape
=
(
input_shape
[
free_input_dims
:].
concatenate
(
self
.
_output_shape
))
self
.
_kernel
=
self
.
add_weight
(
"kernel"
,
shape
=
self
.
_kernel_shape
,
initializer
=
self
.
_kernel_initializer
,
regularizer
=
self
.
_kernel_regularizer
,
constraint
=
self
.
_kernel_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
if
self
.
_use_bias
:
self
.
_bias
=
self
.
add_weight
(
"bias"
,
shape
=
self
.
_output_shape
,
initializer
=
self
.
_bias_initializer
,
regularizer
=
self
.
_bias_regularizer
,
constraint
=
self
.
_bias_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
else
:
self
.
_bias
=
None
super
(
DenseEinsum
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
"output_shape"
:
self
.
_output_shape
,
"num_summed_dimensions"
:
self
.
_num_summed_dimensions
,
"activation"
:
tf
.
keras
.
activations
.
serialize
(
self
.
_activation
),
"use_bias"
:
self
.
_use_bias
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_bias_initializer
),
"kernel_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_kernel_regularizer
),
"bias_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_bias_regularizer
),
"activity_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_activity_regularizer
),
"kernel_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_kernel_constraint
),
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
DenseEinsum
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
ret
=
tf
.
einsum
(
self
.
_einsum_string
,
inputs
,
self
.
_kernel
)
if
self
.
_use_bias
:
ret
+=
self
.
_bias
if
self
.
_activation
is
not
None
:
ret
=
self
.
_activation
(
ret
)
return
ret
official/nlp/modeling/layers/dense_einsum_test.py
deleted
100644 → 0
View file @
ac671306
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Keras-based einsum layer."""
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.layers
import
dense_einsum
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
DenseEinsumLayer
(
keras_parameterized
.
TestCase
):
def
test_3D_einsum_with_two_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
)
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
def
test_3D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,
32
),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cde->abde"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
,
32
))
def
test_2D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_bias_term_can_be_disabled
(
self
):
# A layer created using the bias should have two weights.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
True
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
2
,
len
(
test_layer
.
get_weights
()))
# A layer created without the bias should have only one weight.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
False
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
1
,
len
(
test_layer
.
get_weights
()))
def
test_activation
(
self
):
# Create a model that does not use an activation.
no_activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
None
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
no_activation_layer
(
input_tensor
)
no_activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Create a model that uses a softmax activation.
activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
"softmax"
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
activation_layer
(
input_tensor
)
activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Make sure the models' weights are identical.
activation_model
.
set_weights
(
no_activation_model
.
get_weights
())
# Predict using each model on the same input data. The output should be
# different, since one is using a softmax - even though the models' weights
# are the same.
input_values
=
10
*
np
.
random
.
random_sample
((
10
,
4
,
80
))
non_activated_data
=
no_activation_model
.
predict
(
input_values
)
activated_data
=
activation_model
.
predict
(
input_values
)
self
.
assertNotAllClose
(
activated_data
,
non_activated_data
)
def
test_non_iterable_output_shape
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_with_explicit_initializer
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
))
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/reuse_transformer_test.py
View file @
09d9656f
...
@@ -68,7 +68,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -68,7 +68,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
_
=
model
.
predict
(
input_data
)
_
=
model
.
predict
(
input_data
)
...
@@ -89,7 +89,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -89,7 +89,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
# which here is (batch, sequence_length, sequence_length)
...
@@ -104,7 +104,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -104,7 +104,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
width
=
80
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
@@ -121,7 +121,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -121,7 +121,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_with_relative_pe
(
self
,
transformer_cls
):
def
test_layer_output_range_with_relative_pe
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
test_layer
=
transformer_cls
(
...
@@ -131,7 +131,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -131,7 +131,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
width
=
80
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
@@ -149,7 +149,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -149,7 +149,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_without_mask
(
self
,
transformer_cls
):
def
test_layer_output_range_without_mask
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
test_layer
=
transformer_cls
(
...
@@ -159,7 +159,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -159,7 +159,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
width
=
80
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
output_tensor
,
_
=
test_layer
(
input_data
)
output_tensor
,
_
=
test_layer
(
input_data
)
...
@@ -175,7 +175,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -175,7 +175,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
(
input_data
)
new_output_tensor
,
_
=
new_layer
(
input_data
)
self
.
assertAllClose
(
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_with_pre_norm
(
self
,
transformer_cls
):
def
test_layer_output_range_with_pre_norm
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
test_layer
=
transformer_cls
(
...
@@ -185,7 +185,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -185,7 +185,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
width
=
80
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
@@ -203,7 +203,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -203,7 +203,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_invocation_with_float16_dtype
(
self
,
transformer_cls
):
def
test_layer_invocation_with_float16_dtype
(
self
,
transformer_cls
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
...
@@ -223,7 +223,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -223,7 +223,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
batch_size
=
6
input_data
=
(
10
*
np
.
random
.
random_sample
(
input_data
=
(
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
)))
(
batch_size
,
sequence_length
,
width
)))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
# which here is (batch, sequence_length, sequence_length)
...
@@ -368,7 +368,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -368,7 +368,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
(
batch_size
,
sequence_length
,
width
))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
# which here is (batch, sequence_length, sequence_length)
...
@@ -404,7 +404,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -404,7 +404,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
batch_size
=
6
input_data
=
(
10
*
np
.
random
.
random_sample
(
input_data
=
(
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
)))
(
batch_size
,
sequence_length
,
width
)))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
# which here is (batch, sequence_length, sequence_length)
...
...
official/nlp/modeling/layers/rezero_transformer.py
View file @
09d9656f
...
@@ -18,6 +18,8 @@
...
@@ -18,6 +18,8 @@
import
gin
import
gin
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
util
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
@
gin
.
configurable
@
gin
.
configurable
...
@@ -45,6 +47,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -45,6 +47,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
kernel_constraint: Constraint for dense layer kernels.
kernel_constraint: Constraint for dense layer kernels.
bias_constraint: Constraint for dense layer kernels.
bias_constraint: Constraint for dense layer kernels.
use_layer_norm: If add layer_norm on top of the ReZero.
use_layer_norm: If add layer_norm on top of the ReZero.
share_rezero: If attention layer and FFN layer share the same alpha.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -62,7 +65,14 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -62,7 +65,14 @@ class ReZeroTransformer(tf.keras.layers.Layer):
kernel_constraint
=
None
,
kernel_constraint
=
None
,
bias_constraint
=
None
,
bias_constraint
=
None
,
use_layer_norm
=
False
,
use_layer_norm
=
False
,
share_rezero
=
True
,
**
kwargs
):
**
kwargs
):
# attention_dropout will override attention_dropout_rate.
# This is to unify the input params with TransformerEncoderBlock.
attention_dropout_rate
=
kwargs
.
pop
(
"attention_dropout"
,
attention_dropout_rate
)
dropout_rate
=
kwargs
.
pop
(
"output_dropout"
,
dropout_rate
)
util
.
filter_kwargs
(
kwargs
)
super
(
ReZeroTransformer
,
self
).
__init__
(
**
kwargs
)
super
(
ReZeroTransformer
,
self
).
__init__
(
**
kwargs
)
self
.
_num_heads
=
num_attention_heads
self
.
_num_heads
=
num_attention_heads
...
@@ -78,10 +88,18 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -78,10 +88,18 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_use_layer_norm
=
use_layer_norm
self
.
_use_layer_norm
=
use_layer_norm
self
.
_share_rezero
=
share_rezero
def
build
(
self
,
input_shape
):
def
build
(
self
,
input_shape
):
input_tensor
=
input_shape
[
0
]
if
len
(
input_shape
)
==
2
else
input_shape
if
isinstance
(
input_shape
,
tf
.
TensorShape
):
input_tensor_shape
=
tf
.
TensorShape
(
input_tensor
)
input_tensor_shape
=
input_shape
elif
isinstance
(
input_shape
,
(
list
,
tuple
)):
input_tensor_shape
=
tf
.
TensorShape
(
input_shape
[
0
])
else
:
raise
ValueError
(
"The type of input shape argument is not supported, got: %s"
%
type
(
input_shape
))
if
len
(
input_tensor_shape
.
as_list
())
!=
3
:
if
len
(
input_tensor_shape
.
as_list
())
!=
3
:
raise
ValueError
(
"TransformerLayer expects a three-dimensional input of "
raise
ValueError
(
"TransformerLayer expects a three-dimensional input of "
"shape [batch, sequence, width]."
)
"shape [batch, sequence, width]."
)
...
@@ -158,6 +176,15 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -158,6 +176,15 @@ class ReZeroTransformer(tf.keras.layers.Layer):
trainable
=
True
,
trainable
=
True
,
dtype
=
tf
.
float32
)
dtype
=
tf
.
float32
)
if
self
.
_share_rezero
:
self
.
_rezero_a_ffn
=
self
.
_rezero_a
else
:
self
.
_rezero_a_ffn
=
self
.
add_weight
(
name
=
"rezero_alpha_ffn"
,
initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
True
,
dtype
=
tf
.
float32
)
super
(
ReZeroTransformer
,
self
).
build
(
input_shape
)
super
(
ReZeroTransformer
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
def
get_config
(
self
):
...
@@ -176,6 +203,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -176,6 +203,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self
.
_output_range
,
self
.
_output_range
,
"use_layer_norm"
:
"use_layer_norm"
:
self
.
_use_layer_norm
,
self
.
_use_layer_norm
,
"share_rezero"
:
self
.
_share_rezero
,
"kernel_initializer"
:
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
"bias_initializer"
:
...
@@ -196,21 +225,34 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -196,21 +225,34 @@ class ReZeroTransformer(tf.keras.layers.Layer):
def
reset_rezero
(
self
):
def
reset_rezero
(
self
):
self
.
_rezero_a
.
assign
(
0.
)
self
.
_rezero_a
.
assign
(
0.
)
if
not
self
.
_share_rezero
:
self
.
_rezero_a_ffn
.
assign
(
0.
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
if
isinstance
(
inputs
,
(
list
,
tuple
))
and
len
(
inputs
)
==
2
:
if
isinstance
(
inputs
,
(
list
,
tuple
)):
input_tensor
,
attention_mask
=
inputs
if
len
(
inputs
)
==
2
:
input_tensor
,
attention_mask
=
inputs
key_value
=
None
elif
len
(
inputs
)
==
3
:
input_tensor
,
key_value
,
attention_mask
=
inputs
else
:
raise
ValueError
(
"Unexpected inputs to %s with length at %d"
%
(
self
.
__class__
,
len
(
inputs
)))
else
:
else
:
input_tensor
,
attention_mask
=
(
inputs
,
None
)
input_tensor
,
key_value
,
attention_mask
=
(
inputs
,
None
,
None
)
if
self
.
_output_range
:
if
self
.
_output_range
:
target_tensor
=
input_tensor
[:,
0
:
self
.
_output_range
,
:]
target_tensor
=
input_tensor
[:,
0
:
self
.
_output_range
,
:]
attention_mask
=
attention_mask
[:,
0
:
self
.
_output_range
,
:]
if
attention_mask
is
not
None
:
attention_mask
=
attention_mask
[:,
0
:
self
.
_output_range
,
:]
else
:
else
:
target_tensor
=
input_tensor
target_tensor
=
input_tensor
if
key_value
is
None
:
key_value
=
input_tensor
attention_output
=
self
.
_attention_layer
(
attention_output
=
self
.
_attention_layer
(
query
=
target_tensor
,
value
=
input_tensor
,
attention_mask
=
attention_mask
)
query
=
target_tensor
,
value
=
key_value
,
attention_mask
=
attention_mask
)
attention_output
=
self
.
_attention_dropout
(
attention_output
)
attention_output
=
self
.
_attention_dropout
(
attention_output
)
attention_output
=
target_tensor
+
self
.
_rezero_a
*
attention_output
attention_output
=
target_tensor
+
self
.
_rezero_a
*
attention_output
if
self
.
_use_layer_norm
:
if
self
.
_use_layer_norm
:
...
@@ -225,7 +267,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
...
@@ -225,7 +267,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
layer_output
=
self
.
_output_dropout
(
layer_output
)
layer_output
=
self
.
_output_dropout
(
layer_output
)
# During mixed precision training, attention_output is from layer norm and
# During mixed precision training, attention_output is from layer norm and
# is always fp32 for now. Cast layer_output to fp32 for the subsequent add.
# is always fp32 for now. Cast layer_output to fp32 for the subsequent add.
layer_output
=
attention_output
+
tf
.
cast
(
self
.
_rezero_a
*
layer_output
,
layer_output
=
attention_output
+
tf
.
cast
(
self
.
_rezero_a
_ffn
*
layer_output
,
tf
.
float32
)
tf
.
float32
)
if
self
.
_use_layer_norm
:
if
self
.
_use_layer_norm
:
layer_output
=
self
.
_output_layer_norm
(
layer_output
)
layer_output
=
self
.
_output_layer_norm
(
layer_output
)
...
...
official/nlp/modeling/layers/rezero_transformer_test.py
View file @
09d9656f
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
"""Tests for Keras-based rezero-transformer block layer."""
"""Tests for Keras-based rezero-transformer block layer."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -30,12 +31,15 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
...
@@ -30,12 +31,15 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
super
(
TransformerWithReZeroLayerTest
,
self
).
tearDown
()
super
(
TransformerWithReZeroLayerTest
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
def
test_layer_invocation_with_float16_dtype
(
self
):
@
parameterized
.
named_parameters
((
'no_share_attn_ffn'
,
False
),
(
'share_attn_ffn'
,
True
))
def
test_layer_invocation_with_float16_dtype
(
self
,
share_rezero
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
test_layer
=
rezero_transformer
.
ReZeroTransformer
(
test_layer
=
rezero_transformer
.
ReZeroTransformer
(
num_attention_heads
=
10
,
num_attention_heads
=
10
,
intermediate_size
=
2048
,
intermediate_size
=
2048
,
intermediate_activation
=
'relu'
)
intermediate_activation
=
'relu'
,
share_rezero
=
share_rezero
)
sequence_length
=
21
sequence_length
=
21
width
=
80
width
=
80
# Create a 3-dimensional input (the first dimension is implicit).
# Create a 3-dimensional input (the first dimension is implicit).
...
@@ -124,6 +128,20 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
...
@@ -124,6 +128,20 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
new_output_tensor
=
new_layer
([
input_data
,
mask_data
])
new_output_tensor
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:])
def
test_separate_qkv
(
self
):
test_layer
=
rezero_transformer
.
ReZeroTransformer
(
num_attention_heads
=
2
,
intermediate_size
=
128
,
intermediate_activation
=
'relu'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
))
# Forward path.
q_tensor
=
tf
.
zeros
([
2
,
4
,
16
],
dtype
=
tf
.
float32
)
kv_tensor
=
tf
.
zeros
([
2
,
8
,
16
],
dtype
=
tf
.
float32
)
dummy_mask
=
tf
.
zeros
([
2
,
4
,
8
],
dtype
=
tf
.
float32
)
inputs
=
[
q_tensor
,
kv_tensor
,
dummy_mask
]
output
=
test_layer
(
inputs
)
self
.
assertEqual
(
output
.
shape
,
q_tensor
.
shape
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/nlp/modeling/layers/text_layers.py
View file @
09d9656f
...
@@ -13,18 +13,22 @@
...
@@ -13,18 +13,22 @@
# limitations under the License.
# limitations under the License.
"""Keras Layers for BERT-specific preprocessing."""
"""Keras Layers for BERT-specific preprocessing."""
# pylint: disable=g-import-not-at-top
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Union
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Union
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
try
:
try
:
import
tensorflow_text
as
text
# pylint: disable=g-import-not-at-top
import
tensorflow_text
as
text
from
tensorflow_text.python.ops
import
bert_tokenizer
except
ImportError
:
except
ImportError
:
text
=
None
text
=
None
bert_tokenizer
=
None
except
tf
.
errors
.
NotFoundError
as
e
:
except
tf
.
errors
.
NotFoundError
as
e
:
logging
.
warn
(
"Encountered error when importing tensorflow_text: %s"
,
e
)
logging
.
warn
(
"Encountered error when importing tensorflow_text: %s"
,
e
)
text
=
None
text
=
None
bert_tokenizer
=
None
def
_check_if_tf_text_installed
():
def
_check_if_tf_text_installed
():
...
@@ -587,3 +591,139 @@ class BertPackInputs(tf.keras.layers.Layer):
...
@@ -587,3 +591,139 @@ class BertPackInputs(tf.keras.layers.Layer):
return
dict
(
input_word_ids
=
_reshape
(
input_word_ids
),
return
dict
(
input_word_ids
=
_reshape
(
input_word_ids
),
input_mask
=
_reshape
(
input_mask
),
input_mask
=
_reshape
(
input_mask
),
input_type_ids
=
_reshape
(
input_type_ids
))
input_type_ids
=
_reshape
(
input_type_ids
))
class
FastWordpieceBertTokenizer
(
tf
.
keras
.
layers
.
Layer
):
"""A bert tokenizer keras layer using text.FastWordpieceTokenizer.
See details: "Fast WordPiece Tokenization" (https://arxiv.org/abs/2012.15524)
"""
def
__init__
(
self
,
*
,
vocab_file
:
str
,
lower_case
:
bool
,
tokenize_with_offsets
:
bool
=
False
,
**
kwargs
):
"""Initializes a FastWordpieceBertTokenizer layer.
Args:
vocab_file: A Python string with the path of the vocabulary file. This is
a text file with newline-separated wordpiece tokens. This layer loads
a list of tokens from it to create text.FastWordpieceTokenizer.
lower_case: A Python boolean forwarded to text.BasicTokenizer. If true,
input text is converted to lower case (where applicable) before
tokenization. This must be set to match the way in which the vocab_file
was created.
tokenize_with_offsets: A Python boolean. If true, this layer calls
FastWordpieceTokenizer.tokenize_with_offsets() instead of plain
.tokenize() and outputs a triple of (tokens, start_offsets,
limit_offsets) insead of just tokens.
**kwargs: standard arguments to Layer().
"""
super
().
__init__
(
**
kwargs
)
logging
.
info
(
"Initialize a FastWordpieceBertTokenizer."
)
self
.
tokenize_with_offsets
=
tokenize_with_offsets
self
.
_basic_tokenizer
=
bert_tokenizer
.
BasicTokenizer
(
lower_case
=
lower_case
)
# Read the vocab file into a list of tokens to create `fast_wp_tokenizer`.
self
.
_vocab
=
[
line
.
rstrip
()
for
line
in
tf
.
io
.
gfile
.
GFile
(
vocab_file
)]
self
.
_fast_wp_tokenizer
=
text
.
FastWordpieceTokenizer
(
vocab
=
self
.
_vocab
,
token_out_type
=
tf
.
int32
,
no_pretokenization
=
True
)
self
.
_special_tokens_dict
=
self
.
_create_special_tokens_dict
()
@
property
def
vocab_size
(
self
):
return
len
(
self
.
_vocab
)
def
get_config
(
self
):
# Skip in tf.saved_model.save(); fail if called direcly.
# We cannot just put the original, user-supplied vocab file name into
# the config, because the path has to change as the SavedModel is copied
# around.
raise
NotImplementedError
(
"Not implemented yet."
)
def
get_special_tokens_dict
(
self
):
"""Returns dict of token ids, keyed by standard names for their purpose.
Returns:
A dict from Python strings to Python integers. Each key is a standard
name for a special token describing its use. (For example, "padding_id"
is what BERT traditionally calls "[PAD]" but others may call "<pad>".)
The corresponding value is the integer token id. If a special token
is not found, its entry is omitted from the dict.
The supported keys and tokens are:
* start_of_sequence_id: looked up from "[CLS]"
* end_of_segment_id: looked up from "[SEP]"
* padding_id: looked up form "[PAD]"
* mask_id: looked up from "[MASK]"
* vocab_size: one past the largest token id used
"""
return
self
.
_special_tokens_dict
def
_create_special_tokens_dict
(
self
):
"""Creates dict of token ids, keyed by standard names for their purpose."""
special_tokens
=
{
"vocab_size"
:
self
.
vocab_size
}
def
add_special_token
(
key
,
token
):
try
:
token_id
=
self
.
_vocab
.
index
(
token
)
special_tokens
[
key
]
=
token_id
except
ValueError
:
# Similar as nlp.modeling.layers.BertTokenizer, if a special token
# is not found, its entry is omitted from the dict.
logging
.
warning
(
"Could not find %s as token
\"
%s
\"
in vocab file"
,
key
,
token
)
add_special_token
(
"start_of_sequence_id"
,
"[CLS]"
)
add_special_token
(
"end_of_segment_id"
,
"[SEP]"
)
add_special_token
(
"padding_id"
,
"[PAD]"
)
add_special_token
(
"mask_id"
,
"[MASK]"
)
return
special_tokens
def
_tokenize_with_offsets
(
self
,
text_input
:
tf
.
Tensor
):
tokens
,
begin
,
_
=
self
.
_basic_tokenizer
.
tokenize_with_offsets
(
text_input
)
wordpieces
,
wp_begin
,
wp_end
=
(
self
.
_fast_wp_tokenizer
.
tokenize_with_offsets
(
tokens
))
begin_expanded
=
tf
.
expand_dims
(
begin
,
axis
=
2
)
final_begin
=
begin_expanded
+
wp_begin
final_end
=
begin_expanded
+
wp_end
return
wordpieces
,
final_begin
,
final_end
def
_tokenize
(
self
,
text_input
:
tf
.
Tensor
):
tokens
=
self
.
_basic_tokenizer
.
tokenize
(
text_input
)
return
self
.
_fast_wp_tokenizer
.
tokenize
(
tokens
)
def
call
(
self
,
inputs
:
tf
.
Tensor
):
"""Calls text.BertTokenizer on inputs.
Args:
inputs: A string Tensor of shape [batch_size].
Returns:
One or three of RaggedTensors if tokenize_with_offsets is False or True,
respectively. These are
tokens: A RaggedTensor of shape [batch_size, (words), (pieces_per_word)]
and type int32. tokens[i,j,k] contains the k-th wordpiece of the
j-th word in the i-th input.
start_offsets, limit_offsets: If tokenize_with_offsets is True,
RaggedTensors of type int64 with the same indices as tokens.
Element [i,j,k] contains the byte offset at the start, or past the
end, resp., for the k-th wordpiece of the j-th word in the i-th input.
"""
# Prepare to reshape the result to work around broken shape inference.
batch_size
=
tf
.
shape
(
inputs
)[
0
]
def
_reshape
(
rt
):
values
=
rt
.
values
row_splits
=
rt
.
row_splits
row_splits
=
tf
.
reshape
(
row_splits
,
[
batch_size
+
1
])
return
tf
.
RaggedTensor
.
from_row_splits
(
values
,
row_splits
)
if
self
.
tokenize_with_offsets
:
tokens
,
start_offsets
,
limit_offsets
=
self
.
_tokenize_with_offsets
(
inputs
)
return
_reshape
(
tokens
),
_reshape
(
start_offsets
),
_reshape
(
limit_offsets
)
else
:
tokens
=
self
.
_tokenize
(
inputs
)
return
_reshape
(
tokens
)
official/nlp/modeling/layers/text_layers_test.py
View file @
09d9656f
...
@@ -442,5 +442,109 @@ class BertPackInputsTest(tf.test.TestCase):
...
@@ -442,5 +442,109 @@ class BertPackInputsTest(tf.test.TestCase):
[
1001
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
1002
]]))
[
1001
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
1002
]]))
# This test covers the in-process behavior of FastWordpieceBertTokenizer layer.
class
FastWordPieceBertTokenizerTest
(
tf
.
test
.
TestCase
):
def
_make_vocab_file
(
self
,
vocab
,
filename
=
"vocab.txt"
):
path
=
os
.
path
.
join
(
tempfile
.
mkdtemp
(
dir
=
self
.
get_temp_dir
()),
# New subdir each time.
filename
)
with
tf
.
io
.
gfile
.
GFile
(
path
,
"w"
)
as
f
:
f
.
write
(
"
\n
"
.
join
(
vocab
+
[
""
]))
return
path
def
test_uncased
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"xy"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
inputs
=
tf
.
constant
([
"abc def"
,
"ABC DEF d"
])
token_ids
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
tf
.
ragged
.
constant
([[[
6
],
[
4
,
5
]],
[[
6
],
[
4
,
5
],
[
4
]]]))
bert_tokenize
.
tokenize_with_offsets
=
True
token_ids_2
,
start_offsets
,
limit_offsets
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
token_ids_2
)
self
.
assertAllEqual
(
start_offsets
,
tf
.
ragged
.
constant
([[[
0
],
[
4
,
5
]],
[[
0
],
[
4
,
5
],
[
8
]]]))
self
.
assertAllEqual
(
limit_offsets
,
tf
.
ragged
.
constant
([[[
3
],
[
5
,
7
]],
[[
3
],
[
5
,
7
],
[
9
]]]))
self
.
assertEqual
(
bert_tokenize
.
vocab_size
,
8
)
# Repeat the above and test that case matters with lower_case=False.
def
test_cased
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"ABC"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
False
,
tokenize_with_offsets
=
True
)
inputs
=
tf
.
constant
([
"abc def"
,
"ABC DEF"
])
token_ids
,
start_offsets
,
limit_offsets
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
tf
.
ragged
.
constant
([[[
6
],
[
4
,
5
]],
[[
7
],
[
1
]]]))
self
.
assertAllEqual
(
start_offsets
,
tf
.
ragged
.
constant
([[[
0
],
[
4
,
5
]],
[[
0
],
[
4
]]]))
self
.
assertAllEqual
(
limit_offsets
,
tf
.
ragged
.
constant
([[[
3
],
[
5
,
7
]],
[[
3
],
[
7
]]]))
def
test_special_tokens_complete
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"foo"
,
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"[MASK]"
,
"xy"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
self
.
assertDictEqual
(
bert_tokenize
.
get_special_tokens_dict
(),
dict
(
padding_id
=
1
,
start_of_sequence_id
=
3
,
end_of_segment_id
=
4
,
mask_id
=
5
,
vocab_size
=
7
))
def
test_special_tokens_partial
(
self
):
# [UNK] token is required by fast wordpiece tokenizer.
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[CLS]"
,
"[SEP]"
,
"[UNK]"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
self
.
assertDictEqual
(
bert_tokenize
.
get_special_tokens_dict
(),
dict
(
padding_id
=
0
,
start_of_sequence_id
=
1
,
end_of_segment_id
=
2
,
vocab_size
=
4
))
# No mask_id,
def
test_special_tokens_in_estimator
(
self
):
"""Tests getting special tokens without an Eager init context."""
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"xy"
])
def
input_fn
():
with
tf
.
init_scope
():
self
.
assertFalse
(
tf
.
executing_eagerly
())
# Build a preprocessing Model.
sentences
=
tf
.
keras
.
layers
.
Input
(
shape
=
[],
dtype
=
tf
.
string
)
bert_tokenizer
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
special_tokens_dict
=
bert_tokenizer
.
get_special_tokens_dict
()
for
k
,
v
in
special_tokens_dict
.
items
():
self
.
assertIsInstance
(
v
,
int
,
"Unexpected type for {}"
.
format
(
k
))
tokens
=
bert_tokenizer
(
sentences
)
packed_inputs
=
text_layers
.
BertPackInputs
(
4
,
special_tokens_dict
=
special_tokens_dict
)(
tokens
)
preprocessing
=
tf
.
keras
.
Model
(
sentences
,
packed_inputs
)
# Map the dataset.
ds
=
tf
.
data
.
Dataset
.
from_tensors
(
(
tf
.
constant
([
"abc"
,
"DEF"
]),
tf
.
constant
([
0
,
1
])))
ds
=
ds
.
map
(
lambda
features
,
labels
:
(
preprocessing
(
features
),
labels
))
return
ds
def
model_fn
(
features
,
labels
,
mode
):
del
labels
# Unused.
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
features
[
"input_word_ids"
])
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
)
outputs
=
list
(
estimator
.
predict
(
input_fn
))
self
.
assertAllEqual
(
outputs
,
np
.
array
([[
2
,
6
,
3
,
0
],
[
2
,
4
,
5
,
3
]]))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/nlp/modeling/layers/transformer_encoder_block.py
View file @
09d9656f
...
@@ -16,6 +16,8 @@
...
@@ -16,6 +16,8 @@
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
util
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
TransformerEncoderBlock
(
tf
.
keras
.
layers
.
Layer
):
class
TransformerEncoderBlock
(
tf
.
keras
.
layers
.
Layer
):
...
@@ -86,8 +88,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
...
@@ -86,8 +88,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
kernel.
kernel.
attention_axes: axes over which the attention is applied. `None` means
attention_axes: axes over which the attention is applied. `None` means
attention over all axes, but batch, heads, and features.
attention over all axes, but batch, heads, and features.
**kwargs: keyword arguments
/
**kwargs: keyword arguments
.
"""
"""
util
.
filter_kwargs
(
kwargs
)
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
_num_heads
=
num_attention_heads
self
.
_num_heads
=
num_attention_heads
...
...
official/nlp/modeling/layers/util.py
View file @
09d9656f
...
@@ -30,13 +30,13 @@ class TfFunctionIfEagerDecorator(object):
...
@@ -30,13 +30,13 @@ class TfFunctionIfEagerDecorator(object):
@
functools
.
wraps
(
func
)
@
functools
.
wraps
(
func
)
def
wrapped_func
(
*
args
):
def
wrapped_func
(
*
args
):
# TODO(b/150147476, b/150024785): Fix tf.function in TF1 crash.
# TODO(b/150147476, b/150024785): Fix tf.function in TF1 crash.
if
not
hasattr
(
tf
.
compat
.
v1
,
"
executing_eagerly_outside_functions
"
if
not
hasattr
(
tf
.
compat
.
v1
,
'
executing_eagerly_outside_functions
'
)
or
tf
.
compat
.
v1
.
executing_eagerly_outside_functions
():
)
or
tf
.
compat
.
v1
.
executing_eagerly_outside_functions
():
return
tf
.
function
(
func
=
func
,
**
self
.
func_kwargs
)(
*
args
)
return
tf
.
function
(
func
=
func
,
**
self
.
func_kwargs
)(
*
args
)
return
func
(
*
args
)
return
func
(
*
args
)
# Cache the created function in self._call_impl.
# Cache the created function in self._call_impl.
if
not
hasattr
(
self
,
"
_call_impl
"
):
if
not
hasattr
(
self
,
'
_call_impl
'
):
self
.
_call_impl
=
wrapped_func
self
.
_call_impl
=
wrapped_func
return
self
.
_call_impl
return
self
.
_call_impl
...
@@ -44,3 +44,29 @@ class TfFunctionIfEagerDecorator(object):
...
@@ -44,3 +44,29 @@ class TfFunctionIfEagerDecorator(object):
def
tf_function_if_eager
(
**
kwargs
):
def
tf_function_if_eager
(
**
kwargs
):
"""Applies the @tf.function decorator only if running in eager mode."""
"""Applies the @tf.function decorator only if running in eager mode."""
return
TfFunctionIfEagerDecorator
(
**
kwargs
)
return
TfFunctionIfEagerDecorator
(
**
kwargs
)
def
filter_kwargs
(
kwargs
):
"""In place removes unused options in kwargs.
This function removes the construction signatures: e.g.
number_attention_heads... in TransformerEncoderBlock. This is needed,
otherwise base_layer.py in Keras will complain.
Args:
kwargs: keyword arguments to be filtered.
"""
# This is the union of signatures of TransformerEncoderBlock and
# ReZeroTransformer. Every Transformer
# block that uses compatible signature with TransformerEncoderBlock should
# call this function before base constructor super().__init__(**kwargs).
denylist
=
[
'num_attention_heads'
,
'intermediate_size'
,
'intermediate_activation'
,
'inner_dim'
,
'inner_activation'
,
'output_range'
,
'kernel_initializer'
,
'bias_initializer'
,
'kernel_regularizer'
,
'bias_regularizer'
,
'activity_regularizer'
,
'kernel_constraint'
,
'bias_constraint'
,
'use_bias'
,
'norm_first'
,
'norm_epsilon'
,
'output_dropout'
,
'attention_dropout'
,
'inner_dropout'
,
'attention_initializer'
,
'attention_axes'
,
'share_rezero'
]
for
unused_key
in
denylist
:
kwargs
.
pop
(
unused_key
,
None
)
official/nlp/modeling/models/seq2seq_transformer.py
View file @
09d9656f
...
@@ -260,11 +260,9 @@ class Seq2SeqTransformer(tf.keras.Model):
...
@@ -260,11 +260,9 @@ class Seq2SeqTransformer(tf.keras.Model):
return
{
"outputs"
:
top_decoded_ids
,
"scores"
:
top_scores
}
return
{
"outputs"
:
top_decoded_ids
,
"scores"
:
top_scores
}
decoder_inputs
=
self
.
embedding_lookup
(
targets
)
embedding_mask
=
tf
.
cast
(
tf
.
not_equal
(
targets
,
0
),
decoder_inputs
.
dtype
)
decoder_inputs
*=
tf
.
expand_dims
(
embedding_mask
,
-
1
)
# Shift targets to the right, and remove the last element
# Shift targets to the right, and remove the last element
decoder_inputs
=
tf
.
pad
(
decoder_inputs
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
targets
=
tf
.
pad
(
targets
,
[[
0
,
0
],
[
1
,
0
]])[:,
:
-
1
]
decoder_inputs
=
self
.
embedding_lookup
(
targets
)
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
pos_encoding
=
self
.
position_embedding
(
decoder_inputs
)
pos_encoding
=
self
.
position_embedding
(
decoder_inputs
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
embedded_inputs
.
dtype
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
embedded_inputs
.
dtype
)
...
@@ -325,12 +323,7 @@ class Seq2SeqTransformer(tf.keras.Model):
...
@@ -325,12 +323,7 @@ class Seq2SeqTransformer(tf.keras.Model):
decoder_input
=
ids
[:,
-
1
:]
decoder_input
=
ids
[:,
-
1
:]
# Preprocess decoder input by getting embeddings and adding timing signal.
# Preprocess decoder input by getting embeddings and adding timing signal.
# decoder_input = self.embedding_softmax_layer(decoder_input)
source_decoder_input
=
decoder_input
decoder_input
=
self
.
embedding_lookup
(
decoder_input
)
decoder_input
=
self
.
embedding_lookup
(
decoder_input
)
embedding_mask
=
tf
.
cast
(
tf
.
not_equal
(
source_decoder_input
,
0
),
decoder_input
.
dtype
)
decoder_input
*=
tf
.
expand_dims
(
embedding_mask
,
-
1
)
decoder_input
+=
timing_signal
[
i
]
decoder_input
+=
timing_signal
[
i
]
if
self
.
_padded_decode
:
if
self
.
_padded_decode
:
# indexing does not work on TPU.
# indexing does not work on TPU.
...
...
official/nlp/modeling/networks/bert_dense_encoder_test.py
View file @
09d9656f
...
@@ -20,29 +20,30 @@ import numpy as np
...
@@ -20,29 +20,30 @@ import numpy as np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.networks
import
bert_
dense_
encoder
from
official.nlp.modeling.networks
import
bert_encoder
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
@
keras_parameterized
.
run_all_keras_modes
class
Bert
Dense
EncoderTest
(
keras_parameterized
.
TestCase
):
class
BertEncoder
V2
Test
(
keras_parameterized
.
TestCase
):
def
tearDown
(
self
):
def
tearDown
(
self
):
super
(
Bert
Dense
EncoderTest
,
self
).
tearDown
()
super
(
BertEncoder
V2
Test
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
def
test_dict_outputs_network_creation
(
self
):
def
test_dict_outputs_network_creation
(
self
):
hidden_size
=
32
hidden_size
=
32
sequence_length
=
21
sequence_length
=
21
dense_sequence_length
=
20
dense_sequence_length
=
20
# Create a small dense Bert
Dense
Encoder for testing.
# Create a small dense BertEncoder
V2
for testing.
kwargs
=
{}
kwargs
=
{}
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
vocab_size
=
100
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_attention_heads
=
2
,
num_layers
=
3
,
num_layers
=
3
,
with_dense_inputs
=
True
,
**
kwargs
)
**
kwargs
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
@@ -86,12 +87,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -86,12 +87,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
sequence_length
=
21
sequence_length
=
21
dense_sequence_length
=
20
dense_sequence_length
=
20
# Create a small BertEncoder for testing.
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
vocab_size
=
100
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_attention_heads
=
2
,
num_layers
=
3
,
num_layers
=
3
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
@@ -134,12 +136,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -134,12 +136,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
dense_sequence_length
=
20
dense_sequence_length
=
20
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"mixed_float16"
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"mixed_float16"
)
# Create a small BertEncoder for testing.
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
vocab_size
=
100
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_attention_heads
=
2
,
num_layers
=
3
,
num_layers
=
3
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
@@ -176,9 +179,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -176,9 +179,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
self
.
assertAllEqual
(
tf
.
float16
,
pooled
.
dtype
)
self
.
assertAllEqual
(
tf
.
float16
,
pooled
.
dtype
)
@
parameterized
.
named_parameters
(
@
parameterized
.
named_parameters
(
(
"all_sequence_encoder_v2"
,
bert_dense_encoder
.
BertDenseEncoder
,
None
,
(
"all_sequence_encoder_v2"
,
bert_encoder
.
BertEncoderV2
,
None
,
41
),
41
),
(
"output_range_encoder_v2"
,
bert_encoder
.
BertEncoderV2
,
1
,
1
),
(
"output_range_encoder_v2"
,
bert_dense_encoder
.
BertDenseEncoder
,
1
,
1
),
)
)
def
test_dict_outputs_network_invocation
(
def
test_dict_outputs_network_invocation
(
self
,
encoder_cls
,
output_range
,
out_seq_len
):
self
,
encoder_cls
,
output_range
,
out_seq_len
):
...
@@ -195,7 +197,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -195,7 +197,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
num_layers
=
3
,
num_layers
=
3
,
type_vocab_size
=
num_types
,
type_vocab_size
=
num_types
,
output_range
=
output_range
,
output_range
=
output_range
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
@@ -276,7 +279,7 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -276,7 +279,7 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
# Creates a BertEncoder with embedding_width != hidden_size
# Creates a BertEncoder with embedding_width != hidden_size
embedding_width
=
16
embedding_width
=
16
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
vocab_size
,
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
max_sequence_length
=
max_sequence_length
,
max_sequence_length
=
max_sequence_length
,
...
@@ -316,11 +319,12 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
...
@@ -316,11 +319,12 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
sequence_length
=
21
sequence_length
=
21
dense_sequence_length
=
20
dense_sequence_length
=
20
# Create a small BertEncoder for testing.
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
vocab_size
=
100
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_attention_heads
=
2
,
num_layers
=
3
)
num_layers
=
3
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
official/nlp/modeling/networks/bert_encoder.py
View file @
09d9656f
...
@@ -23,6 +23,8 @@ from official.nlp.modeling import layers
...
@@ -23,6 +23,8 @@ from official.nlp.modeling import layers
_Initializer
=
Union
[
str
,
tf
.
keras
.
initializers
.
Initializer
]
_Initializer
=
Union
[
str
,
tf
.
keras
.
initializers
.
Initializer
]
_Activation
=
Union
[
str
,
Callable
[...,
Any
]]
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
...
@@ -72,6 +74,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
...
@@ -72,6 +74,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
norm_first: Whether to normalize inputs to attention and intermediate dense
norm_first: Whether to normalize inputs to attention and intermediate dense
layers. If set False, output of attention and intermediate dense layers is
layers. If set False, output of attention and intermediate dense layers is
normalized.
normalized.
with_dense_inputs: Whether to accept dense embeddings as the input.
"""
"""
def
__init__
(
def
__init__
(
...
@@ -83,7 +86,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
...
@@ -83,7 +86,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
max_sequence_length
:
int
=
512
,
max_sequence_length
:
int
=
512
,
type_vocab_size
:
int
=
16
,
type_vocab_size
:
int
=
16
,
inner_dim
:
int
=
3072
,
inner_dim
:
int
=
3072
,
inner_activation
:
Callable
[...,
Any
]
=
_approx_gelu
,
inner_activation
:
_Activation
=
_approx_gelu
,
output_dropout
:
float
=
0.1
,
output_dropout
:
float
=
0.1
,
attention_dropout
:
float
=
0.1
,
attention_dropout
:
float
=
0.1
,
initializer
:
_Initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
initializer
:
_Initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
...
@@ -92,6 +95,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
...
@@ -92,6 +95,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
embedding_width
:
Optional
[
int
]
=
None
,
embedding_width
:
Optional
[
int
]
=
None
,
embedding_layer
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
embedding_layer
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
norm_first
:
bool
=
False
,
norm_first
:
bool
=
False
,
with_dense_inputs
:
bool
=
False
,
**
kwargs
):
**
kwargs
):
# Pops kwargs that are used in V1 implementation.
# Pops kwargs that are used in V1 implementation.
if
'dict_outputs'
in
kwargs
:
if
'dict_outputs'
in
kwargs
:
...
@@ -190,11 +194,23 @@ class BertEncoderV2(tf.keras.layers.Layer):
...
@@ -190,11 +194,23 @@ class BertEncoderV2(tf.keras.layers.Layer):
'embedding_width'
:
embedding_width
,
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
'norm_first'
:
norm_first
,
'with_dense_inputs'
:
with_dense_inputs
,
}
}
self
.
inputs
=
dict
(
if
with_dense_inputs
:
input_word_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
self
.
inputs
=
dict
(
input_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_word_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
))
input_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
dense_inputs
=
tf
.
keras
.
Input
(
shape
=
(
None
,
embedding_width
),
dtype
=
tf
.
float32
),
dense_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
dense_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
)
else
:
self
.
inputs
=
dict
(
input_word_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
))
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
word_embeddings
=
None
word_embeddings
=
None
...
@@ -203,11 +219,22 @@ class BertEncoderV2(tf.keras.layers.Layer):
...
@@ -203,11 +219,22 @@ class BertEncoderV2(tf.keras.layers.Layer):
mask
=
inputs
.
get
(
'input_mask'
)
mask
=
inputs
.
get
(
'input_mask'
)
type_ids
=
inputs
.
get
(
'input_type_ids'
)
type_ids
=
inputs
.
get
(
'input_type_ids'
)
word_embeddings
=
inputs
.
get
(
'input_word_embeddings'
,
None
)
word_embeddings
=
inputs
.
get
(
'input_word_embeddings'
,
None
)
dense_inputs
=
inputs
.
get
(
'dense_inputs'
,
None
)
dense_mask
=
inputs
.
get
(
'dense_mask'
,
None
)
dense_type_ids
=
inputs
.
get
(
'dense_type_ids'
,
None
)
else
:
else
:
raise
ValueError
(
'Unexpected inputs type to %s.'
%
self
.
__class__
)
raise
ValueError
(
'Unexpected inputs type to %s.'
%
self
.
__class__
)
if
word_embeddings
is
None
:
if
word_embeddings
is
None
:
word_embeddings
=
self
.
_embedding_layer
(
word_ids
)
word_embeddings
=
self
.
_embedding_layer
(
word_ids
)
if
dense_inputs
is
not
None
:
# Concat the dense embeddings at sequence end.
word_embeddings
=
tf
.
concat
([
word_embeddings
,
dense_inputs
],
axis
=
1
)
type_ids
=
tf
.
concat
([
type_ids
,
dense_type_ids
],
axis
=
1
)
mask
=
tf
.
concat
([
mask
,
dense_mask
],
axis
=
1
)
# absolute position embeddings.
# absolute position embeddings.
position_embeddings
=
self
.
_position_embedding_layer
(
word_embeddings
)
position_embeddings
=
self
.
_position_embedding_layer
(
word_embeddings
)
type_embeddings
=
self
.
_type_embedding_layer
(
type_ids
)
type_embeddings
=
self
.
_type_embedding_layer
(
type_ids
)
...
...
official/nlp/modeling/networks/funnel_transformer.py
View file @
09d9656f
...
@@ -15,17 +15,32 @@
...
@@ -15,17 +15,32 @@
"""Funnel Transformer network."""
"""Funnel Transformer network."""
# pylint: disable=g-classes-have-attributes
# pylint: disable=g-classes-have-attributes
from
typing
import
Union
,
Sequence
from
typing
import
Any
,
Callable
,
Optional
,
Union
,
Sequence
from
absl
import
logging
from
absl
import
logging
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling
import
layers
from
official.nlp.modeling
import
layers
_Initializer
=
Union
[
str
,
tf
.
keras
.
initializers
.
Initializer
]
_Activation
=
Union
[
str
,
Callable
[...,
Any
]]
_MAX
=
'max'
_MAX
=
'max'
_AVG
=
'avg'
_AVG
=
'avg'
_TRUNCATED_AVG
=
'truncated_avg'
_TRUNCATED_AVG
=
'truncated_avg'
_transformer_cls2str
=
{
layers
.
TransformerEncoderBlock
:
'TransformerEncoderBlock'
,
layers
.
ReZeroTransformer
:
'ReZeroTransformer'
}
_str2transformer_cls
=
{
'TransformerEncoderBlock'
:
layers
.
TransformerEncoderBlock
,
'ReZeroTransformer'
:
layers
.
ReZeroTransformer
}
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
def
_get_policy_dtype
():
def
_get_policy_dtype
():
try
:
try
:
...
@@ -206,29 +221,37 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
...
@@ -206,29 +221,37 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
embeddings for the input word IDs.
embeddings for the input word IDs.
norm_first: Whether to normalize inputs to attention and intermediate dense
norm_first: Whether to normalize inputs to attention and intermediate dense
layers. If set False, output of attention and intermediate dense layers is
layers. If set False, output of attention and intermediate dense layers is
normalized.
normalized. This does not apply to ReZero.
transformer_cls: str or a keras Layer. This is the base TransformerBlock the
funnel encoder relies on.
share_rezero: bool. Whether to share ReZero alpha between the attention
layer and the ffn layer. This option is specific to ReZero.
"""
"""
def
__init__
(
def
__init__
(
self
,
self
,
vocab_size
,
vocab_size
:
int
,
hidden_size
=
768
,
hidden_size
:
int
=
768
,
num_layers
=
12
,
num_layers
:
int
=
12
,
num_attention_heads
=
12
,
num_attention_heads
:
int
=
12
,
max_sequence_length
=
512
,
max_sequence_length
:
int
=
512
,
type_vocab_size
=
16
,
type_vocab_size
:
int
=
16
,
inner_dim
=
3072
,
inner_dim
:
int
=
3072
,
inner_activation
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
),
inner_activation
:
_Activation
=
_approx_gelu
,
output_dropout
=
0.1
,
output_dropout
:
float
=
0.1
,
attention_dropout
=
0.1
,
attention_dropout
:
float
=
0.1
,
pool_type
=
_MAX
,
pool_type
:
str
=
_MAX
,
pool_stride
=
2
,
pool_stride
:
int
=
2
,
unpool_length
=
0
,
unpool_length
:
int
=
0
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
),
initializer
:
_Initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
output_range
=
None
,
stddev
=
0.02
),
embedding_width
=
None
,
output_range
:
Optional
[
int
]
=
None
,
embedding_layer
=
None
,
embedding_width
:
Optional
[
int
]
=
None
,
norm_first
=
False
,
embedding_layer
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
norm_first
:
bool
=
False
,
transformer_cls
:
Union
[
str
,
tf
.
keras
.
layers
.
Layer
]
=
layers
.
TransformerEncoderBlock
,
share_rezero
:
bool
=
True
,
**
kwargs
):
**
kwargs
):
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
activation
=
tf
.
keras
.
activations
.
get
(
inner_activation
)
activation
=
tf
.
keras
.
activations
.
get
(
inner_activation
)
...
@@ -278,16 +301,22 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
...
@@ -278,16 +301,22 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
self
.
_transformer_layers
=
[]
self
.
_transformer_layers
=
[]
self
.
_attention_mask_layer
=
layers
.
SelfAttentionMask
(
self
.
_attention_mask_layer
=
layers
.
SelfAttentionMask
(
name
=
'self_attention_mask'
)
name
=
'self_attention_mask'
)
# Will raise an error if the string is not supported.
if
isinstance
(
transformer_cls
,
str
):
transformer_cls
=
_str2transformer_cls
[
transformer_cls
]
for
i
in
range
(
num_layers
):
for
i
in
range
(
num_layers
):
layer
=
layers
.
T
ransformer
EncoderBlock
(
layer
=
t
ransformer
_cls
(
num_attention_heads
=
num_attention_heads
,
num_attention_heads
=
num_attention_heads
,
intermediate_size
=
inner_dim
,
inner_dim
=
inner_dim
,
inner_dim
=
inner_dim
,
intermediate_activation
=
inner_activation
,
inner_activation
=
inner_activation
,
inner_activation
=
inner_activation
,
output_dropout
=
output_dropout
,
output_dropout
=
output_dropout
,
attention_dropout
=
attention_dropout
,
attention_dropout
=
attention_dropout
,
norm_first
=
norm_first
,
norm_first
=
norm_first
,
output_range
=
output_range
if
i
==
num_layers
-
1
else
None
,
output_range
=
output_range
if
i
==
num_layers
-
1
else
None
,
kernel_initializer
=
initializer
,
kernel_initializer
=
initializer
,
share_rezero
=
share_rezero
,
name
=
'transformer/layer_%d'
%
i
)
name
=
'transformer/layer_%d'
%
i
)
self
.
_transformer_layers
.
append
(
layer
)
self
.
_transformer_layers
.
append
(
layer
)
...
@@ -333,24 +362,44 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
...
@@ -333,24 +362,44 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
self
.
_pool_type
=
pool_type
self
.
_pool_type
=
pool_type
self
.
_config
=
{
self
.
_config
=
{
'vocab_size'
:
vocab_size
,
'vocab_size'
:
'hidden_size'
:
hidden_size
,
vocab_size
,
'num_layers'
:
num_layers
,
'hidden_size'
:
'num_attention_heads'
:
num_attention_heads
,
hidden_size
,
'max_sequence_length'
:
max_sequence_length
,
'num_layers'
:
'type_vocab_size'
:
type_vocab_size
,
num_layers
,
'inner_dim'
:
inner_dim
,
'num_attention_heads'
:
'inner_activation'
:
tf
.
keras
.
activations
.
serialize
(
activation
),
num_attention_heads
,
'output_dropout'
:
output_dropout
,
'max_sequence_length'
:
'attention_dropout'
:
attention_dropout
,
max_sequence_length
,
'initializer'
:
tf
.
keras
.
initializers
.
serialize
(
initializer
),
'type_vocab_size'
:
'output_range'
:
output_range
,
type_vocab_size
,
'embedding_width'
:
embedding_width
,
'inner_dim'
:
'embedding_layer'
:
embedding_layer
,
inner_dim
,
'norm_first'
:
norm_first
,
'inner_activation'
:
'pool_type'
:
pool_type
,
tf
.
keras
.
activations
.
serialize
(
activation
),
'pool_stride'
:
pool_stride
,
'output_dropout'
:
'unpool_length'
:
unpool_length
,
output_dropout
,
'attention_dropout'
:
attention_dropout
,
'initializer'
:
tf
.
keras
.
initializers
.
serialize
(
initializer
),
'output_range'
:
output_range
,
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
'pool_type'
:
pool_type
,
'pool_stride'
:
pool_stride
,
'unpool_length'
:
unpool_length
,
'transformer_cls'
:
_transformer_cls2str
.
get
(
transformer_cls
,
str
(
transformer_cls
))
}
}
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
...
...
official/nlp/modeling/networks/funnel_transformer_test.py
View file @
09d9656f
...
@@ -38,13 +38,20 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -38,13 +38,20 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
@
parameterized
.
named_parameters
(
@
parameterized
.
named_parameters
(
(
"mix_truncated_avg"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
),
(
"mix_truncated_avg_rezero"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
,
(
"float32_truncated_avg"
,
"float32"
,
tf
.
float32
,
"truncated_avg"
),
"ReZeroTransformer"
),
(
"float32_truncated_avg_rezero"
,
"float32"
,
(
"mix_max"
,
"mixed_float16"
,
tf
.
float16
,
"max"
),
tf
.
float32
,
"truncated_avg"
,
"ReZeroTransformer"
),
(
"float32_max"
,
"float32"
,
tf
.
float32
,
"max"
),
(
"mix_truncated_avg"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
,
(
"mix_avg"
,
"mixed_float16"
,
tf
.
float16
,
"avg"
),
"TransformerEncoderBlock"
),
(
"float32_avg"
,
"float32"
,
tf
.
float32
,
"avg"
))
(
"float32_truncated_avg"
,
"float32"
,
tf
.
float32
,
"truncated_avg"
,
def
test_network_creation
(
self
,
policy
,
pooled_dtype
,
pool_type
):
"TransformerEncoderBlock"
),
(
"mix_max"
,
"mixed_float16"
,
tf
.
float16
,
"max"
,
"TransformerEncoderBlock"
),
(
"float32_max"
,
"float32"
,
tf
.
float32
,
"max"
,
"TransformerEncoderBlock"
),
(
"mix_avg"
,
"mixed_float16"
,
tf
.
float16
,
"avg"
,
"TransformerEncoderBlock"
),
(
"float32_avg"
,
"float32"
,
tf
.
float32
,
"avg"
,
"TransformerEncoderBlock"
))
def
test_network_creation
(
self
,
policy
,
pooled_dtype
,
pool_type
,
transformer_cls
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
policy
)
tf
.
keras
.
mixed_precision
.
set_global_policy
(
policy
)
hidden_size
=
32
hidden_size
=
32
...
@@ -60,7 +67,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -60,7 +67,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
pool_stride
=
pool_stride
,
pool_stride
=
pool_stride
,
pool_type
=
pool_type
,
pool_type
=
pool_type
,
max_sequence_length
=
sequence_length
,
max_sequence_length
=
sequence_length
,
unpool_length
=
0
)
unpool_length
=
0
,
transformer_cls
=
transformer_cls
)
# Create the inputs (note that the first dimension is implicit).
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
@@ -253,7 +261,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -253,7 +261,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
norm_first
=
False
,
norm_first
=
False
,
pool_type
=
"max"
,
pool_type
=
"max"
,
pool_stride
=
2
,
pool_stride
=
2
,
unpool_length
=
0
)
unpool_length
=
0
,
transformer_cls
=
"TransformerEncoderBlock"
)
network
=
funnel_transformer
.
FunnelTransformerEncoder
(
**
kwargs
)
network
=
funnel_transformer
.
FunnelTransformerEncoder
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
expected_config
=
dict
(
kwargs
)
expected_config
[
"inner_activation"
]
=
tf
.
keras
.
activations
.
serialize
(
expected_config
[
"inner_activation"
]
=
tf
.
keras
.
activations
.
serialize
(
...
...
official/nlp/serving/export_savedmodel.py
View file @
09d9656f
...
@@ -13,12 +13,14 @@
...
@@ -13,12 +13,14 @@
# limitations under the License.
# limitations under the License.
"""A binary/library to export TF-NLP serving `SavedModel`."""
"""A binary/library to export TF-NLP serving `SavedModel`."""
import
dataclasses
import
os
import
os
from
typing
import
Any
,
Dict
,
Text
from
typing
import
Any
,
Dict
,
Text
from
absl
import
app
from
absl
import
app
from
absl
import
flags
from
absl
import
flags
import
dataclasses
import
yaml
import
yaml
from
official.core
import
base_task
from
official.core
import
base_task
from
official.core
import
task_factory
from
official.core
import
task_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
hyperparams
...
@@ -29,6 +31,7 @@ from official.nlp.tasks import masked_lm
...
@@ -29,6 +31,7 @@ from official.nlp.tasks import masked_lm
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
translation
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
@@ -40,7 +43,9 @@ SERVING_MODULES = {
...
@@ -40,7 +43,9 @@ SERVING_MODULES = {
question_answering
.
QuestionAnsweringTask
:
question_answering
.
QuestionAnsweringTask
:
serving_modules
.
QuestionAnswering
,
serving_modules
.
QuestionAnswering
,
tagging
.
TaggingTask
:
tagging
.
TaggingTask
:
serving_modules
.
Tagging
serving_modules
.
Tagging
,
translation
.
TranslationTask
:
serving_modules
.
Translation
}
}
...
@@ -60,6 +65,10 @@ def define_flags():
...
@@ -60,6 +65,10 @@ def define_flags():
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
"function_keys"
,
None
,
"function_keys"
,
None
,
"A string key to retrieve pre-defined serving signatures."
)
"A string key to retrieve pre-defined serving signatures."
)
flags
.
DEFINE_string
(
"module_key"
,
None
,
"For multi-task case, load the export module weights from a specific "
"checkpoint item."
)
flags
.
DEFINE_bool
(
"convert_tpu"
,
False
,
""
)
flags
.
DEFINE_bool
(
"convert_tpu"
,
False
,
""
)
flags
.
DEFINE_multi_integer
(
"allowed_batch_size"
,
None
,
flags
.
DEFINE_multi_integer
(
"allowed_batch_size"
,
None
,
"Allowed batch sizes for batching ops."
)
"Allowed batch sizes for batching ops."
)
...
@@ -116,7 +125,8 @@ def main(_):
...
@@ -116,7 +125,8 @@ def main(_):
export_module
,
export_module
,
function_keys
=
[
FLAGS
.
function_keys
],
function_keys
=
[
FLAGS
.
function_keys
],
checkpoint_path
=
FLAGS
.
checkpoint_path
,
checkpoint_path
=
FLAGS
.
checkpoint_path
,
export_savedmodel_dir
=
FLAGS
.
export_savedmodel_dir
)
export_savedmodel_dir
=
FLAGS
.
export_savedmodel_dir
,
module_key
=
FLAGS
.
module_key
)
if
FLAGS
.
convert_tpu
:
if
FLAGS
.
convert_tpu
:
# pylint: disable=g-import-not-at-top
# pylint: disable=g-import-not-at-top
...
...
official/nlp/serving/export_savedmodel_util.py
View file @
09d9656f
...
@@ -13,24 +13,21 @@
...
@@ -13,24 +13,21 @@
# limitations under the License.
# limitations under the License.
"""Common library to export a SavedModel from the export module."""
"""Common library to export a SavedModel from the export module."""
import
os
import
time
from
typing
import
Dict
,
List
,
Optional
,
Text
,
Union
from
typing
import
Dict
,
List
,
Optional
,
Text
,
Union
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.core
import
export_base
from
official.core
import
export_base
get_timestamped_export_dir
=
export_base
.
get_timestamped_export_dir
MAX_DIRECTORY_CREATION_ATTEMPTS
=
10
def
export
(
export_module
:
export_base
.
ExportModule
,
def
export
(
export_module
:
export_base
.
ExportModule
,
function_keys
:
Union
[
List
[
Text
],
Dict
[
Text
,
Text
]],
function_keys
:
Union
[
List
[
Text
],
Dict
[
Text
,
Text
]],
export_savedmodel_dir
:
Text
,
export_savedmodel_dir
:
Text
,
checkpoint_path
:
Optional
[
Text
]
=
None
,
checkpoint_path
:
Optional
[
Text
]
=
None
,
timestamped
:
bool
=
True
)
->
Text
:
timestamped
:
bool
=
True
,
module_key
:
Optional
[
Text
]
=
None
)
->
Text
:
"""Exports to SavedModel format.
"""Exports to SavedModel format.
Args:
Args:
...
@@ -41,6 +38,8 @@ def export(export_module: export_base.ExportModule,
...
@@ -41,6 +38,8 @@ def export(export_module: export_base.ExportModule,
export_savedmodel_dir: Output saved model directory.
export_savedmodel_dir: Output saved model directory.
checkpoint_path: Object-based checkpoint path or directory.
checkpoint_path: Object-based checkpoint path or directory.
timestamped: Whether to export the savedmodel to a timestamped directory.
timestamped: Whether to export the savedmodel to a timestamped directory.
module_key: Optional string to identify a checkpoint object to load for the
model in the export module.
Returns:
Returns:
The savedmodel directory path.
The savedmodel directory path.
...
@@ -48,37 +47,16 @@ def export(export_module: export_base.ExportModule,
...
@@ -48,37 +47,16 @@ def export(export_module: export_base.ExportModule,
save_options
=
tf
.
saved_model
.
SaveOptions
(
function_aliases
=
{
save_options
=
tf
.
saved_model
.
SaveOptions
(
function_aliases
=
{
'tpu_candidate'
:
export_module
.
serve
,
'tpu_candidate'
:
export_module
.
serve
,
})
})
return
export_base
.
export
(
export_module
,
function_keys
,
export_savedmodel_dir
,
if
module_key
:
checkpoint_path
,
timestamped
,
save_options
)
kwargs
=
{
module_key
:
export_module
.
model
}
checkpoint
=
tf
.
train
.
Checkpoint
(
**
kwargs
)
else
:
def
get_timestamped_export_dir
(
export_dir_base
):
checkpoint
=
None
"""Builds a path to a new subdirectory within the base directory.
return
export_base
.
export
(
export_module
,
Args:
function_keys
,
export_dir_base: A string containing a directory to write the exported graph
export_savedmodel_dir
,
and checkpoints.
checkpoint_path
,
timestamped
,
Returns:
save_options
,
The full path of the new subdirectory (which is not actually created yet).
checkpoint
=
checkpoint
)
Raises:
RuntimeError: if repeated attempts fail to obtain a unique timestamped
directory name.
"""
attempts
=
0
while
attempts
<
MAX_DIRECTORY_CREATION_ATTEMPTS
:
timestamp
=
int
(
time
.
time
())
result_dir
=
os
.
path
.
join
(
export_dir_base
,
str
(
timestamp
))
if
not
tf
.
io
.
gfile
.
exists
(
result_dir
):
# Collisions are still possible (though extremely unlikely): this
# directory is not actually created yet, but it will be almost
# instantly on return from this function.
return
result_dir
time
.
sleep
(
1
)
attempts
+=
1
logging
.
warning
(
'Directory %s already exists; retrying (attempt %s/%s)'
,
str
(
result_dir
),
attempts
,
MAX_DIRECTORY_CREATION_ATTEMPTS
)
raise
RuntimeError
(
'Failed to obtain a unique export directory name after '
f
'
{
MAX_DIRECTORY_CREATION_ATTEMPTS
}
attempts.'
)
official/nlp/serving/serving_modules.py
View file @
09d9656f
...
@@ -14,10 +14,12 @@
...
@@ -14,10 +14,12 @@
"""Serving export modules for TF Model Garden NLP models."""
"""Serving export modules for TF Model Garden NLP models."""
# pylint:disable=missing-class-docstring
# pylint:disable=missing-class-docstring
import
dataclasses
from
typing
import
Dict
,
List
,
Optional
,
Text
from
typing
import
Dict
,
List
,
Optional
,
Text
import
dataclasses
import
tensorflow
as
tf
import
tensorflow
as
tf
import
tensorflow_text
as
tf_text
from
official.core
import
export_base
from
official.core
import
export_base
from
official.modeling.hyperparams
import
base_config
from
official.modeling.hyperparams
import
base_config
from
official.nlp.data
import
sentence_prediction_dataloader
from
official.nlp.data
import
sentence_prediction_dataloader
...
@@ -407,3 +409,48 @@ class Tagging(export_base.ExportModule):
...
@@ -407,3 +409,48 @@ class Tagging(export_base.ExportModule):
signatures
[
signature_key
]
=
self
.
serve_examples
.
get_concrete_function
(
signatures
[
signature_key
]
=
self
.
serve_examples
.
get_concrete_function
(
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"examples"
))
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"examples"
))
return
signatures
return
signatures
class
Translation
(
export_base
.
ExportModule
):
"""The export module for the translation task."""
@
dataclasses
.
dataclass
class
Params
(
base_config
.
Config
):
sentencepiece_model_path
:
str
=
""
def
__init__
(
self
,
params
,
model
:
tf
.
keras
.
Model
,
inference_step
=
None
):
super
().
__init__
(
params
,
model
,
inference_step
)
self
.
_sp_tokenizer
=
tf_text
.
SentencepieceTokenizer
(
model
=
tf
.
io
.
gfile
.
GFile
(
params
.
sentencepiece_model_path
,
"rb"
).
read
(),
add_eos
=
True
)
try
:
empty_str_tokenized
=
self
.
_sp_tokenizer
.
tokenize
(
""
).
numpy
()
except
tf
.
errors
.
InternalError
:
raise
ValueError
(
"EOS token not in tokenizer vocab."
"Please make sure the tokenizer generates a single token for an "
"empty string."
)
self
.
_eos_id
=
empty_str_tokenized
.
item
()
@
tf
.
function
def
serve
(
self
,
inputs
)
->
Dict
[
str
,
tf
.
Tensor
]:
return
self
.
inference_step
(
inputs
)
@
tf
.
function
def
serve_text
(
self
,
text
:
tf
.
Tensor
)
->
Dict
[
str
,
tf
.
Tensor
]:
tokenized
=
self
.
_sp_tokenizer
.
tokenize
(
text
).
to_tensor
(
0
)
return
self
.
_sp_tokenizer
.
detokenize
(
self
.
serve
({
"inputs"
:
tokenized
})[
"outputs"
])
def
get_inference_signatures
(
self
,
function_keys
:
Dict
[
Text
,
Text
]):
signatures
=
{}
valid_keys
=
(
"serve_text"
)
for
func_key
,
signature_key
in
function_keys
.
items
():
if
func_key
not
in
valid_keys
:
raise
ValueError
(
"Invalid function key for the module: %s with key %s. "
"Valid keys are: %s"
%
(
self
.
__class__
,
func_key
,
valid_keys
))
if
func_key
==
"serve_text"
:
signatures
[
signature_key
]
=
self
.
serve_text
.
get_concrete_function
(
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"text"
))
return
signatures
official/nlp/serving/serving_modules_test.py
View file @
09d9656f
...
@@ -15,8 +15,11 @@
...
@@ -15,8 +15,11 @@
"""Tests for nlp.serving.serving_modules."""
"""Tests for nlp.serving.serving_modules."""
import
os
import
os
from
absl.testing
import
parameterized
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow
as
tf
from
sentencepiece
import
SentencePieceTrainer
from
official.nlp.configs
import
bert
from
official.nlp.configs
import
bert
from
official.nlp.configs
import
encoders
from
official.nlp.configs
import
encoders
from
official.nlp.serving
import
serving_modules
from
official.nlp.serving
import
serving_modules
...
@@ -24,6 +27,7 @@ from official.nlp.tasks import masked_lm
...
@@ -24,6 +27,7 @@ from official.nlp.tasks import masked_lm
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
translation
def
_create_fake_serialized_examples
(
features_dict
):
def
_create_fake_serialized_examples
(
features_dict
):
...
@@ -59,6 +63,33 @@ def _create_fake_vocab_file(vocab_file_path):
...
@@ -59,6 +63,33 @@ def _create_fake_vocab_file(vocab_file_path):
outfile
.
write
(
"
\n
"
.
join
(
tokens
))
outfile
.
write
(
"
\n
"
.
join
(
tokens
))
def
_train_sentencepiece
(
input_path
,
vocab_size
,
model_path
,
eos_id
=
1
):
argstr
=
" "
.
join
([
f
"--input=
{
input_path
}
"
,
f
"--vocab_size=
{
vocab_size
}
"
,
"--character_coverage=0.995"
,
f
"--model_prefix=
{
model_path
}
"
,
"--model_type=bpe"
,
"--bos_id=-1"
,
"--pad_id=0"
,
f
"--eos_id=
{
eos_id
}
"
,
"--unk_id=2"
])
SentencePieceTrainer
.
Train
(
argstr
)
def
_generate_line_file
(
filepath
,
lines
):
with
tf
.
io
.
gfile
.
GFile
(
filepath
,
"w"
)
as
f
:
for
l
in
lines
:
f
.
write
(
"{}
\n
"
.
format
(
l
))
def
_make_sentencepeice
(
output_dir
):
src_lines
=
[
"abc ede fg"
,
"bbcd ef a g"
,
"de f a a g"
]
tgt_lines
=
[
"dd cc a ef g"
,
"bcd ef a g"
,
"gef cd ba"
]
sentencepeice_input_path
=
os
.
path
.
join
(
output_dir
,
"inputs.txt"
)
_generate_line_file
(
sentencepeice_input_path
,
src_lines
+
tgt_lines
)
sentencepeice_model_prefix
=
os
.
path
.
join
(
output_dir
,
"sp"
)
_train_sentencepiece
(
sentencepeice_input_path
,
11
,
sentencepeice_model_prefix
)
sentencepeice_model_path
=
"{}.model"
.
format
(
sentencepeice_model_prefix
)
return
sentencepeice_model_path
class
ServingModulesTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
class
ServingModulesTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
@
parameterized
.
parameters
(
...
@@ -312,6 +343,31 @@ class ServingModulesTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -312,6 +343,31 @@ class ServingModulesTest(tf.test.TestCase, parameterized.TestCase):
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
_
=
export_module
.
get_inference_signatures
({
"foo"
:
None
})
_
=
export_module
.
get_inference_signatures
({
"foo"
:
None
})
def
test_translation
(
self
):
sp_path
=
_make_sentencepeice
(
self
.
get_temp_dir
())
encdecoder
=
translation
.
EncDecoder
(
num_attention_heads
=
4
,
intermediate_size
=
256
)
config
=
translation
.
TranslationConfig
(
model
=
translation
.
ModelConfig
(
encoder
=
encdecoder
,
decoder
=
encdecoder
,
embedding_width
=
256
,
padded_decode
=
False
,
decode_max_length
=
100
),
sentencepiece_model_path
=
sp_path
,
)
task
=
translation
.
TranslationTask
(
config
)
model
=
task
.
build_model
()
params
=
serving_modules
.
Translation
.
Params
(
sentencepiece_model_path
=
sp_path
)
export_module
=
serving_modules
.
Translation
(
params
=
params
,
model
=
model
)
functions
=
export_module
.
get_inference_signatures
({
"serve_text"
:
"serving_default"
})
outputs
=
functions
[
"serving_default"
](
tf
.
constant
([
"abcd"
,
"ef gh"
]))
self
.
assertEqual
(
outputs
.
shape
,
(
2
,))
self
.
assertEqual
(
outputs
.
dtype
,
tf
.
string
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
tf
.
test
.
main
()
Prev
1
…
5
6
7
8
9
10
11
12
13
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment