Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
09d9656f
Unverified
Commit
09d9656f
authored
Jan 13, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Jan 13, 2022
Browse files
Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling
parents
ac671306
49a5706c
Changes
427
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
940 additions
and
448 deletions
+940
-448
official/nlp/modeling/layers/block_diag_feedforward.py
official/nlp/modeling/layers/block_diag_feedforward.py
+166
-0
official/nlp/modeling/layers/block_diag_feedforward_test.py
official/nlp/modeling/layers/block_diag_feedforward_test.py
+119
-0
official/nlp/modeling/layers/dense_einsum.py
official/nlp/modeling/layers/dense_einsum.py
+0
-180
official/nlp/modeling/layers/dense_einsum_test.py
official/nlp/modeling/layers/dense_einsum_test.py
+0
-119
official/nlp/modeling/layers/reuse_transformer_test.py
official/nlp/modeling/layers/reuse_transformer_test.py
+13
-13
official/nlp/modeling/layers/rezero_transformer.py
official/nlp/modeling/layers/rezero_transformer.py
+50
-8
official/nlp/modeling/layers/rezero_transformer_test.py
official/nlp/modeling/layers/rezero_transformer_test.py
+20
-2
official/nlp/modeling/layers/text_layers.py
official/nlp/modeling/layers/text_layers.py
+141
-1
official/nlp/modeling/layers/text_layers_test.py
official/nlp/modeling/layers/text_layers_test.py
+104
-0
official/nlp/modeling/layers/transformer_encoder_block.py
official/nlp/modeling/layers/transformer_encoder_block.py
+4
-1
official/nlp/modeling/layers/util.py
official/nlp/modeling/layers/util.py
+28
-2
official/nlp/modeling/models/seq2seq_transformer.py
official/nlp/modeling/models/seq2seq_transformer.py
+2
-9
official/nlp/modeling/networks/bert_dense_encoder_test.py
official/nlp/modeling/networks/bert_dense_encoder_test.py
+20
-16
official/nlp/modeling/networks/bert_encoder.py
official/nlp/modeling/networks/bert_encoder.py
+32
-5
official/nlp/modeling/networks/funnel_transformer.py
official/nlp/modeling/networks/funnel_transformer.py
+88
-39
official/nlp/modeling/networks/funnel_transformer_test.py
official/nlp/modeling/networks/funnel_transformer_test.py
+18
-9
official/nlp/serving/export_savedmodel.py
official/nlp/serving/export_savedmodel.py
+13
-3
official/nlp/serving/export_savedmodel_util.py
official/nlp/serving/export_savedmodel_util.py
+18
-40
official/nlp/serving/serving_modules.py
official/nlp/serving/serving_modules.py
+48
-1
official/nlp/serving/serving_modules_test.py
official/nlp/serving/serving_modules_test.py
+56
-0
No files found.
official/nlp/modeling/layers/block_diag_feedforward.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-based gated feedforward layer."""
# pylint: disable=g-classes-have-attributes
from
typing
import
Optional
import
tensorflow
as
tf
class
BlockDiagFeedforward
(
tf
.
keras
.
layers
.
Layer
):
"""Block diagonal feedforward layer.
This layer replaces the weight matrix of the output_dense layer with a block
diagonal matrix to save layer parameters and FLOPs. A linear mixing layer can
be added optionally to improve layer expressibility.
Args:
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout.
num_blocks: The number of blocks for the block diagonal matrix of the
output_dense layer.
apply_mixing: Apply linear mixing if True.
kernel_initializer: Initializer for dense layer kernels.
bias_initializer: Initializer for dense layer biases.
kernel_regularizer: Regularizer for dense layer kernels.
bias_regularizer: Regularizer for dense layer biases.
activity_regularizer: Regularizer for dense layer activity.
kernel_constraint: Constraint for dense layer kernels.
bias_constraint: Constraint for dense layer kernels.
"""
def
__init__
(
self
,
intermediate_size
:
int
,
intermediate_activation
:
str
,
dropout
:
float
,
num_blocks
:
int
=
1
,
apply_mixing
:
bool
=
True
,
kernel_initializer
:
str
=
"glorot_uniform"
,
bias_initializer
:
str
=
"zeros"
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activity_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
kernel_constraint
:
Optional
[
tf
.
keras
.
constraints
.
Constraint
]
=
None
,
bias_constraint
:
Optional
[
tf
.
keras
.
constraints
.
Constraint
]
=
None
,
**
kwargs
):
# pylint: disable=g-doc-args
super
(
BlockDiagFeedforward
,
self
).
__init__
(
**
kwargs
)
self
.
_intermediate_size
=
intermediate_size
self
.
_intermediate_activation
=
intermediate_activation
self
.
_dropout
=
dropout
self
.
_num_blocks
=
num_blocks
self
.
_apply_mixing
=
apply_mixing
if
intermediate_size
%
num_blocks
!=
0
:
raise
ValueError
(
"Intermediate_size (%d) isn't a multiple of num_blocks "
"(%d)."
%
(
intermediate_size
,
num_blocks
))
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_bias_initializer
=
tf
.
keras
.
initializers
.
get
(
bias_initializer
)
self
.
_kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
_bias_regularizer
=
tf
.
keras
.
regularizers
.
get
(
bias_regularizer
)
self
.
_activity_regularizer
=
tf
.
keras
.
regularizers
.
get
(
activity_regularizer
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
def
build
(
self
,
input_shape
):
hidden_size
=
input_shape
.
as_list
()[
-
1
]
common_kwargs
=
dict
(
kernel_initializer
=
self
.
_kernel_initializer
,
bias_initializer
=
self
.
_bias_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activity_regularizer
=
self
.
_activity_regularizer
,
kernel_constraint
=
self
.
_kernel_constraint
,
bias_constraint
=
self
.
_bias_constraint
)
self
.
_intermediate_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abc,cde->abde"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
self
.
_intermediate_size
//
self
.
_num_blocks
),
bias_axes
=
"de"
,
name
=
"intermediate"
,
**
common_kwargs
)
policy
=
tf
.
keras
.
mixed_precision
.
global_policy
()
if
policy
.
name
==
"mixed_bfloat16"
:
# bfloat16 causes BERT with the LAMB optimizer to not converge
# as well, so we use float32.
policy
=
tf
.
float32
self
.
_intermediate_activation_layer
=
tf
.
keras
.
layers
.
Activation
(
self
.
_intermediate_activation
,
dtype
=
policy
)
self
.
_output_dense
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abde,deo->abdo"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
hidden_size
//
self
.
_num_blocks
),
bias_axes
=
"do"
,
name
=
"output"
,
**
common_kwargs
)
if
self
.
_apply_mixing
:
self
.
_output_mixing
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
"abdo,de->abeo"
,
output_shape
=
(
None
,
self
.
_num_blocks
,
hidden_size
//
self
.
_num_blocks
),
name
=
"output_mixing"
,
**
common_kwargs
)
self
.
_output_reshape
=
tf
.
keras
.
layers
.
Reshape
((
-
1
,
hidden_size
))
self
.
_output_dropout
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_dropout
)
def
get_config
(
self
):
config
=
{
"intermediate_size"
:
self
.
_intermediate_size
,
"intermediate_activation"
:
self
.
_intermediate_activation
,
"dropout"
:
self
.
_dropout
,
"num_blocks"
:
self
.
_num_blocks
,
"apply_mixing"
:
self
.
_apply_mixing
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_bias_initializer
),
"kernel_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_kernel_regularizer
),
"bias_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_bias_regularizer
),
"activity_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_activity_regularizer
),
"kernel_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_kernel_constraint
),
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
BlockDiagFeedforward
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
intermediate_output
=
self
.
_intermediate_dense
(
inputs
)
intermediate_output
=
self
.
_intermediate_activation_layer
(
intermediate_output
)
layer_output
=
self
.
_output_dense
(
intermediate_output
)
if
self
.
_apply_mixing
:
layer_output
=
self
.
_output_mixing
(
layer_output
)
layer_output
=
self
.
_output_reshape
(
layer_output
)
layer_output
=
self
.
_output_dropout
(
layer_output
)
return
layer_output
official/nlp/modeling/layers/block_diag_feedforward_test.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Keras-based gated feedforward layer."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.layers
import
block_diag_feedforward
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
BlockDiagFeedforwardTest
(
keras_parameterized
.
TestCase
):
def
tearDown
(
self
):
super
(
BlockDiagFeedforwardTest
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
@
parameterized
.
parameters
(
(
1
,
True
,
"float32"
),
(
1
,
True
,
"mixed_float16"
),
(
1
,
False
,
"float32"
),
(
1
,
False
,
"mixed_float16"
),
(
2
,
True
,
"float32"
),
(
2
,
True
,
"mixed_float16"
),
(
2
,
False
,
"float32"
),
(
2
,
False
,
"mixed_float16"
),
)
def
test_layer_creation
(
self
,
num_blocks
,
apply_mixing
,
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
intermediate_size
=
128
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
num_blocks
,
apply_mixing
=
apply_mixing
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
sequence_length
=
64
width
=
128
# Create a 3-dimensional input (the first dimension is implicit).
data_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
data_tensor
)
# The default output of a transformer layer should be the same as the input.
self
.
assertEqual
(
data_tensor
.
shape
.
as_list
(),
output_tensor
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
1
,
True
,
"float32"
),
(
1
,
True
,
"mixed_float16"
),
(
1
,
False
,
"float32"
),
(
1
,
False
,
"mixed_float16"
),
(
2
,
True
,
"float32"
),
(
2
,
True
,
"mixed_float16"
),
(
2
,
False
,
"float32"
),
(
2
,
False
,
"mixed_float16"
),
)
def
test_layer_invocation
(
self
,
num_blocks
,
apply_mixing
,
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
intermediate_size
=
16
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
num_blocks
,
apply_mixing
=
apply_mixing
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
sequence_length
=
16
width
=
32
# Create a 3-dimensional input (the first dimension is implicit).
data_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
width
))
output_tensor
=
test_layer
(
data_tensor
)
# Create a model from the test layer.
model
=
tf
.
keras
.
Model
(
data_tensor
,
output_tensor
)
# Invoke the model on test data.
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
output_data
=
model
.
predict
(
input_data
)
self
.
assertEqual
(
output_data
.
shape
,
(
batch_size
,
sequence_length
,
width
))
def
test_get_config
(
self
):
kwargs
=
dict
(
intermediate_size
=
16
,
intermediate_activation
=
"relu"
,
dropout
=
0.1
,
num_blocks
=
2
,
apply_mixing
=
True
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
)
test_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
(
**
kwargs
)
new_layer
=
block_diag_feedforward
.
BlockDiagFeedforward
.
from_config
(
test_layer
.
get_config
())
self
.
assertAllEqual
(
test_layer
.
get_config
(),
new_layer
.
get_config
())
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/dense_einsum.py
deleted
100644 → 0
View file @
ac671306
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-based einsum layer."""
# pylint: disable=g-classes-have-attributes
import
tensorflow
as
tf
from
tensorflow.python.util
import
deprecation
_CHR_IDX
=
[
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
,
"g"
,
"h"
,
"i"
,
"j"
,
"k"
,
"l"
,
"m"
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
DenseEinsum
(
tf
.
keras
.
layers
.
Layer
):
"""A densely connected layer that uses `tf.einsum` as the backing computation.
This layer can perform einsum calculations of arbitrary dimensionality.
Args:
output_shape: Positive integer or tuple, dimensionality of the output space.
num_summed_dimensions: The number of dimensions to sum over. Standard 2D
matmul should use 1, 3D matmul should use 2, and so forth.
activation: Activation function to use. If you don't specify anything, no
activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation")..
kernel_constraint: Constraint function applied to the `kernel` weights
matrix.
bias_constraint: Constraint function applied to the bias vector.
Input shape:
N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common
situation would be a 2D input with shape `(batch_size, input_dim)`.
Output shape:
N-D tensor with shape: `(batch_size, ..., units)`. For instance, for a 2D
input with shape `(batch_size, input_dim)`, the output would have shape
`(batch_size, units)`.
"""
@
deprecation
.
deprecated
(
None
,
"DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead."
)
def
__init__
(
self
,
output_shape
,
num_summed_dimensions
=
1
,
activation
=
None
,
use_bias
=
True
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activity_regularizer
=
None
,
kernel_constraint
=
None
,
bias_constraint
=
None
,
**
kwargs
):
super
(
DenseEinsum
,
self
).
__init__
(
**
kwargs
)
self
.
_output_shape
=
output_shape
if
isinstance
(
output_shape
,
(
list
,
tuple
))
else
(
output_shape
,)
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_bias
=
use_bias
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_bias_initializer
=
tf
.
keras
.
initializers
.
get
(
bias_initializer
)
self
.
_kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
_bias_regularizer
=
tf
.
keras
.
regularizers
.
get
(
bias_regularizer
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_num_summed_dimensions
=
num_summed_dimensions
self
.
_einsum_string
=
None
def
_build_einsum_string
(
self
,
free_input_dims
,
bound_dims
,
output_dims
):
input_str
=
""
kernel_str
=
""
output_str
=
""
letter_offset
=
0
for
i
in
range
(
free_input_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
output_str
+=
char
letter_offset
+=
free_input_dims
for
i
in
range
(
bound_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
kernel_str
+=
char
letter_offset
+=
bound_dims
for
i
in
range
(
output_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
kernel_str
+=
char
output_str
+=
char
return
input_str
+
","
+
kernel_str
+
"->"
+
output_str
def
build
(
self
,
input_shape
):
input_shape
=
tf
.
TensorShape
(
input_shape
)
input_rank
=
input_shape
.
rank
free_input_dims
=
input_rank
-
self
.
_num_summed_dimensions
output_dims
=
len
(
self
.
_output_shape
)
self
.
_einsum_string
=
self
.
_build_einsum_string
(
free_input_dims
,
self
.
_num_summed_dimensions
,
output_dims
)
# This is only saved for testing purposes.
self
.
_kernel_shape
=
(
input_shape
[
free_input_dims
:].
concatenate
(
self
.
_output_shape
))
self
.
_kernel
=
self
.
add_weight
(
"kernel"
,
shape
=
self
.
_kernel_shape
,
initializer
=
self
.
_kernel_initializer
,
regularizer
=
self
.
_kernel_regularizer
,
constraint
=
self
.
_kernel_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
if
self
.
_use_bias
:
self
.
_bias
=
self
.
add_weight
(
"bias"
,
shape
=
self
.
_output_shape
,
initializer
=
self
.
_bias_initializer
,
regularizer
=
self
.
_bias_regularizer
,
constraint
=
self
.
_bias_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
else
:
self
.
_bias
=
None
super
(
DenseEinsum
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
"output_shape"
:
self
.
_output_shape
,
"num_summed_dimensions"
:
self
.
_num_summed_dimensions
,
"activation"
:
tf
.
keras
.
activations
.
serialize
(
self
.
_activation
),
"use_bias"
:
self
.
_use_bias
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_bias_initializer
),
"kernel_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_kernel_regularizer
),
"bias_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_bias_regularizer
),
"activity_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_activity_regularizer
),
"kernel_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_kernel_constraint
),
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
DenseEinsum
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
ret
=
tf
.
einsum
(
self
.
_einsum_string
,
inputs
,
self
.
_kernel
)
if
self
.
_use_bias
:
ret
+=
self
.
_bias
if
self
.
_activation
is
not
None
:
ret
=
self
.
_activation
(
ret
)
return
ret
official/nlp/modeling/layers/dense_einsum_test.py
deleted
100644 → 0
View file @
ac671306
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Keras-based einsum layer."""
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.layers
import
dense_einsum
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
DenseEinsumLayer
(
keras_parameterized
.
TestCase
):
def
test_3D_einsum_with_two_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
)
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
def
test_3D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,
32
),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cde->abde"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
,
32
))
def
test_2D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_bias_term_can_be_disabled
(
self
):
# A layer created using the bias should have two weights.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
True
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
2
,
len
(
test_layer
.
get_weights
()))
# A layer created without the bias should have only one weight.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
False
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
1
,
len
(
test_layer
.
get_weights
()))
def
test_activation
(
self
):
# Create a model that does not use an activation.
no_activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
None
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
no_activation_layer
(
input_tensor
)
no_activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Create a model that uses a softmax activation.
activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
"softmax"
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
activation_layer
(
input_tensor
)
activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Make sure the models' weights are identical.
activation_model
.
set_weights
(
no_activation_model
.
get_weights
())
# Predict using each model on the same input data. The output should be
# different, since one is using a softmax - even though the models' weights
# are the same.
input_values
=
10
*
np
.
random
.
random_sample
((
10
,
4
,
80
))
non_activated_data
=
no_activation_model
.
predict
(
input_values
)
activated_data
=
activation_model
.
predict
(
input_values
)
self
.
assertNotAllClose
(
activated_data
,
non_activated_data
)
def
test_non_iterable_output_shape
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_with_explicit_initializer
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
))
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/reuse_transformer_test.py
View file @
09d9656f
...
...
@@ -68,7 +68,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
_
=
model
.
predict
(
input_data
)
...
...
@@ -89,7 +89,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
...
...
@@ -104,7 +104,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
...
@@ -121,7 +121,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_with_relative_pe
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
...
...
@@ -131,7 +131,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
...
@@ -149,7 +149,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_without_mask
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
...
...
@@ -159,7 +159,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
output_tensor
,
_
=
test_layer
(
input_data
)
...
...
@@ -175,7 +175,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
(
input_data
)
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_output_range_with_pre_norm
(
self
,
transformer_cls
):
test_layer
=
transformer_cls
(
...
...
@@ -185,7 +185,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
width
=
80
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
mask_data
=
np
.
random
.
randint
(
2
,
size
=
(
batch_size
,
sequence_length
,
sequence_length
))
...
...
@@ -203,7 +203,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
new_layer
.
set_weights
(
test_layer
.
get_weights
())
new_output_tensor
,
_
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
5e-5
,
rtol
=
0.0
03
)
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:],
atol
=
0.002
,
rtol
=
0.0
1
)
def
test_layer_invocation_with_float16_dtype
(
self
,
transformer_cls
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
...
...
@@ -223,7 +223,7 @@ class ReuseTransformerLayerTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
input_data
=
(
10
*
np
.
random
.
random_sample
(
input_data
=
(
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
)))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
...
...
@@ -368,7 +368,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
input_data
=
10
*
np
.
random
.
random_sample
(
input_data
=
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
...
...
@@ -404,7 +404,7 @@ class ReuseTransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
# Invoke the model on test data. We can't validate the output data itself
# (the NN is too complex) but this will rule out structural runtime errors.
batch_size
=
6
input_data
=
(
10
*
np
.
random
.
random_sample
(
input_data
=
(
np
.
random
.
random_sample
(
(
batch_size
,
sequence_length
,
width
)))
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
# which here is (batch, sequence_length, sequence_length)
...
...
official/nlp/modeling/layers/rezero_transformer.py
View file @
09d9656f
...
...
@@ -18,6 +18,8 @@
import
gin
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
util
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
@
gin
.
configurable
...
...
@@ -45,6 +47,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
kernel_constraint: Constraint for dense layer kernels.
bias_constraint: Constraint for dense layer kernels.
use_layer_norm: If add layer_norm on top of the ReZero.
share_rezero: If attention layer and FFN layer share the same alpha.
"""
def
__init__
(
self
,
...
...
@@ -62,7 +65,14 @@ class ReZeroTransformer(tf.keras.layers.Layer):
kernel_constraint
=
None
,
bias_constraint
=
None
,
use_layer_norm
=
False
,
share_rezero
=
True
,
**
kwargs
):
# attention_dropout will override attention_dropout_rate.
# This is to unify the input params with TransformerEncoderBlock.
attention_dropout_rate
=
kwargs
.
pop
(
"attention_dropout"
,
attention_dropout_rate
)
dropout_rate
=
kwargs
.
pop
(
"output_dropout"
,
dropout_rate
)
util
.
filter_kwargs
(
kwargs
)
super
(
ReZeroTransformer
,
self
).
__init__
(
**
kwargs
)
self
.
_num_heads
=
num_attention_heads
...
...
@@ -78,10 +88,18 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_use_layer_norm
=
use_layer_norm
self
.
_share_rezero
=
share_rezero
def
build
(
self
,
input_shape
):
input_tensor
=
input_shape
[
0
]
if
len
(
input_shape
)
==
2
else
input_shape
input_tensor_shape
=
tf
.
TensorShape
(
input_tensor
)
if
isinstance
(
input_shape
,
tf
.
TensorShape
):
input_tensor_shape
=
input_shape
elif
isinstance
(
input_shape
,
(
list
,
tuple
)):
input_tensor_shape
=
tf
.
TensorShape
(
input_shape
[
0
])
else
:
raise
ValueError
(
"The type of input shape argument is not supported, got: %s"
%
type
(
input_shape
))
if
len
(
input_tensor_shape
.
as_list
())
!=
3
:
raise
ValueError
(
"TransformerLayer expects a three-dimensional input of "
"shape [batch, sequence, width]."
)
...
...
@@ -158,6 +176,15 @@ class ReZeroTransformer(tf.keras.layers.Layer):
trainable
=
True
,
dtype
=
tf
.
float32
)
if
self
.
_share_rezero
:
self
.
_rezero_a_ffn
=
self
.
_rezero_a
else
:
self
.
_rezero_a_ffn
=
self
.
add_weight
(
name
=
"rezero_alpha_ffn"
,
initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
True
,
dtype
=
tf
.
float32
)
super
(
ReZeroTransformer
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
...
...
@@ -176,6 +203,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self
.
_output_range
,
"use_layer_norm"
:
self
.
_use_layer_norm
,
"share_rezero"
:
self
.
_share_rezero
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
...
...
@@ -196,21 +225,34 @@ class ReZeroTransformer(tf.keras.layers.Layer):
def
reset_rezero
(
self
):
self
.
_rezero_a
.
assign
(
0.
)
if
not
self
.
_share_rezero
:
self
.
_rezero_a_ffn
.
assign
(
0.
)
def
call
(
self
,
inputs
):
if
isinstance
(
inputs
,
(
list
,
tuple
))
and
len
(
inputs
)
==
2
:
if
isinstance
(
inputs
,
(
list
,
tuple
)):
if
len
(
inputs
)
==
2
:
input_tensor
,
attention_mask
=
inputs
key_value
=
None
elif
len
(
inputs
)
==
3
:
input_tensor
,
key_value
,
attention_mask
=
inputs
else
:
input_tensor
,
attention_mask
=
(
inputs
,
None
)
raise
ValueError
(
"Unexpected inputs to %s with length at %d"
%
(
self
.
__class__
,
len
(
inputs
)))
else
:
input_tensor
,
key_value
,
attention_mask
=
(
inputs
,
None
,
None
)
if
self
.
_output_range
:
target_tensor
=
input_tensor
[:,
0
:
self
.
_output_range
,
:]
if
attention_mask
is
not
None
:
attention_mask
=
attention_mask
[:,
0
:
self
.
_output_range
,
:]
else
:
target_tensor
=
input_tensor
if
key_value
is
None
:
key_value
=
input_tensor
attention_output
=
self
.
_attention_layer
(
query
=
target_tensor
,
value
=
input_tensor
,
attention_mask
=
attention_mask
)
query
=
target_tensor
,
value
=
key_value
,
attention_mask
=
attention_mask
)
attention_output
=
self
.
_attention_dropout
(
attention_output
)
attention_output
=
target_tensor
+
self
.
_rezero_a
*
attention_output
if
self
.
_use_layer_norm
:
...
...
@@ -225,7 +267,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
layer_output
=
self
.
_output_dropout
(
layer_output
)
# During mixed precision training, attention_output is from layer norm and
# is always fp32 for now. Cast layer_output to fp32 for the subsequent add.
layer_output
=
attention_output
+
tf
.
cast
(
self
.
_rezero_a
*
layer_output
,
layer_output
=
attention_output
+
tf
.
cast
(
self
.
_rezero_a
_ffn
*
layer_output
,
tf
.
float32
)
if
self
.
_use_layer_norm
:
layer_output
=
self
.
_output_layer_norm
(
layer_output
)
...
...
official/nlp/modeling/layers/rezero_transformer_test.py
View file @
09d9656f
...
...
@@ -14,6 +14,7 @@
"""Tests for Keras-based rezero-transformer block layer."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -30,12 +31,15 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
super
(
TransformerWithReZeroLayerTest
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'float32'
)
def
test_layer_invocation_with_float16_dtype
(
self
):
@
parameterized
.
named_parameters
((
'no_share_attn_ffn'
,
False
),
(
'share_attn_ffn'
,
True
))
def
test_layer_invocation_with_float16_dtype
(
self
,
share_rezero
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
'mixed_float16'
)
test_layer
=
rezero_transformer
.
ReZeroTransformer
(
num_attention_heads
=
10
,
intermediate_size
=
2048
,
intermediate_activation
=
'relu'
)
intermediate_activation
=
'relu'
,
share_rezero
=
share_rezero
)
sequence_length
=
21
width
=
80
# Create a 3-dimensional input (the first dimension is implicit).
...
...
@@ -124,6 +128,20 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase):
new_output_tensor
=
new_layer
([
input_data
,
mask_data
])
self
.
assertAllClose
(
new_output_tensor
,
output_tensor
[:,
0
:
1
,
:])
def
test_separate_qkv
(
self
):
test_layer
=
rezero_transformer
.
ReZeroTransformer
(
num_attention_heads
=
2
,
intermediate_size
=
128
,
intermediate_activation
=
'relu'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
))
# Forward path.
q_tensor
=
tf
.
zeros
([
2
,
4
,
16
],
dtype
=
tf
.
float32
)
kv_tensor
=
tf
.
zeros
([
2
,
8
,
16
],
dtype
=
tf
.
float32
)
dummy_mask
=
tf
.
zeros
([
2
,
4
,
8
],
dtype
=
tf
.
float32
)
inputs
=
[
q_tensor
,
kv_tensor
,
dummy_mask
]
output
=
test_layer
(
inputs
)
self
.
assertEqual
(
output
.
shape
,
q_tensor
.
shape
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/nlp/modeling/layers/text_layers.py
View file @
09d9656f
...
...
@@ -13,18 +13,22 @@
# limitations under the License.
"""Keras Layers for BERT-specific preprocessing."""
# pylint: disable=g-import-not-at-top
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Union
from
absl
import
logging
import
tensorflow
as
tf
try
:
import
tensorflow_text
as
text
# pylint: disable=g-import-not-at-top
import
tensorflow_text
as
text
from
tensorflow_text.python.ops
import
bert_tokenizer
except
ImportError
:
text
=
None
bert_tokenizer
=
None
except
tf
.
errors
.
NotFoundError
as
e
:
logging
.
warn
(
"Encountered error when importing tensorflow_text: %s"
,
e
)
text
=
None
bert_tokenizer
=
None
def
_check_if_tf_text_installed
():
...
...
@@ -587,3 +591,139 @@ class BertPackInputs(tf.keras.layers.Layer):
return
dict
(
input_word_ids
=
_reshape
(
input_word_ids
),
input_mask
=
_reshape
(
input_mask
),
input_type_ids
=
_reshape
(
input_type_ids
))
class
FastWordpieceBertTokenizer
(
tf
.
keras
.
layers
.
Layer
):
"""A bert tokenizer keras layer using text.FastWordpieceTokenizer.
See details: "Fast WordPiece Tokenization" (https://arxiv.org/abs/2012.15524)
"""
def
__init__
(
self
,
*
,
vocab_file
:
str
,
lower_case
:
bool
,
tokenize_with_offsets
:
bool
=
False
,
**
kwargs
):
"""Initializes a FastWordpieceBertTokenizer layer.
Args:
vocab_file: A Python string with the path of the vocabulary file. This is
a text file with newline-separated wordpiece tokens. This layer loads
a list of tokens from it to create text.FastWordpieceTokenizer.
lower_case: A Python boolean forwarded to text.BasicTokenizer. If true,
input text is converted to lower case (where applicable) before
tokenization. This must be set to match the way in which the vocab_file
was created.
tokenize_with_offsets: A Python boolean. If true, this layer calls
FastWordpieceTokenizer.tokenize_with_offsets() instead of plain
.tokenize() and outputs a triple of (tokens, start_offsets,
limit_offsets) insead of just tokens.
**kwargs: standard arguments to Layer().
"""
super
().
__init__
(
**
kwargs
)
logging
.
info
(
"Initialize a FastWordpieceBertTokenizer."
)
self
.
tokenize_with_offsets
=
tokenize_with_offsets
self
.
_basic_tokenizer
=
bert_tokenizer
.
BasicTokenizer
(
lower_case
=
lower_case
)
# Read the vocab file into a list of tokens to create `fast_wp_tokenizer`.
self
.
_vocab
=
[
line
.
rstrip
()
for
line
in
tf
.
io
.
gfile
.
GFile
(
vocab_file
)]
self
.
_fast_wp_tokenizer
=
text
.
FastWordpieceTokenizer
(
vocab
=
self
.
_vocab
,
token_out_type
=
tf
.
int32
,
no_pretokenization
=
True
)
self
.
_special_tokens_dict
=
self
.
_create_special_tokens_dict
()
@
property
def
vocab_size
(
self
):
return
len
(
self
.
_vocab
)
def
get_config
(
self
):
# Skip in tf.saved_model.save(); fail if called direcly.
# We cannot just put the original, user-supplied vocab file name into
# the config, because the path has to change as the SavedModel is copied
# around.
raise
NotImplementedError
(
"Not implemented yet."
)
def
get_special_tokens_dict
(
self
):
"""Returns dict of token ids, keyed by standard names for their purpose.
Returns:
A dict from Python strings to Python integers. Each key is a standard
name for a special token describing its use. (For example, "padding_id"
is what BERT traditionally calls "[PAD]" but others may call "<pad>".)
The corresponding value is the integer token id. If a special token
is not found, its entry is omitted from the dict.
The supported keys and tokens are:
* start_of_sequence_id: looked up from "[CLS]"
* end_of_segment_id: looked up from "[SEP]"
* padding_id: looked up form "[PAD]"
* mask_id: looked up from "[MASK]"
* vocab_size: one past the largest token id used
"""
return
self
.
_special_tokens_dict
def
_create_special_tokens_dict
(
self
):
"""Creates dict of token ids, keyed by standard names for their purpose."""
special_tokens
=
{
"vocab_size"
:
self
.
vocab_size
}
def
add_special_token
(
key
,
token
):
try
:
token_id
=
self
.
_vocab
.
index
(
token
)
special_tokens
[
key
]
=
token_id
except
ValueError
:
# Similar as nlp.modeling.layers.BertTokenizer, if a special token
# is not found, its entry is omitted from the dict.
logging
.
warning
(
"Could not find %s as token
\"
%s
\"
in vocab file"
,
key
,
token
)
add_special_token
(
"start_of_sequence_id"
,
"[CLS]"
)
add_special_token
(
"end_of_segment_id"
,
"[SEP]"
)
add_special_token
(
"padding_id"
,
"[PAD]"
)
add_special_token
(
"mask_id"
,
"[MASK]"
)
return
special_tokens
def
_tokenize_with_offsets
(
self
,
text_input
:
tf
.
Tensor
):
tokens
,
begin
,
_
=
self
.
_basic_tokenizer
.
tokenize_with_offsets
(
text_input
)
wordpieces
,
wp_begin
,
wp_end
=
(
self
.
_fast_wp_tokenizer
.
tokenize_with_offsets
(
tokens
))
begin_expanded
=
tf
.
expand_dims
(
begin
,
axis
=
2
)
final_begin
=
begin_expanded
+
wp_begin
final_end
=
begin_expanded
+
wp_end
return
wordpieces
,
final_begin
,
final_end
def
_tokenize
(
self
,
text_input
:
tf
.
Tensor
):
tokens
=
self
.
_basic_tokenizer
.
tokenize
(
text_input
)
return
self
.
_fast_wp_tokenizer
.
tokenize
(
tokens
)
def
call
(
self
,
inputs
:
tf
.
Tensor
):
"""Calls text.BertTokenizer on inputs.
Args:
inputs: A string Tensor of shape [batch_size].
Returns:
One or three of RaggedTensors if tokenize_with_offsets is False or True,
respectively. These are
tokens: A RaggedTensor of shape [batch_size, (words), (pieces_per_word)]
and type int32. tokens[i,j,k] contains the k-th wordpiece of the
j-th word in the i-th input.
start_offsets, limit_offsets: If tokenize_with_offsets is True,
RaggedTensors of type int64 with the same indices as tokens.
Element [i,j,k] contains the byte offset at the start, or past the
end, resp., for the k-th wordpiece of the j-th word in the i-th input.
"""
# Prepare to reshape the result to work around broken shape inference.
batch_size
=
tf
.
shape
(
inputs
)[
0
]
def
_reshape
(
rt
):
values
=
rt
.
values
row_splits
=
rt
.
row_splits
row_splits
=
tf
.
reshape
(
row_splits
,
[
batch_size
+
1
])
return
tf
.
RaggedTensor
.
from_row_splits
(
values
,
row_splits
)
if
self
.
tokenize_with_offsets
:
tokens
,
start_offsets
,
limit_offsets
=
self
.
_tokenize_with_offsets
(
inputs
)
return
_reshape
(
tokens
),
_reshape
(
start_offsets
),
_reshape
(
limit_offsets
)
else
:
tokens
=
self
.
_tokenize
(
inputs
)
return
_reshape
(
tokens
)
official/nlp/modeling/layers/text_layers_test.py
View file @
09d9656f
...
...
@@ -442,5 +442,109 @@ class BertPackInputsTest(tf.test.TestCase):
[
1001
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
1002
]]))
# This test covers the in-process behavior of FastWordpieceBertTokenizer layer.
class
FastWordPieceBertTokenizerTest
(
tf
.
test
.
TestCase
):
def
_make_vocab_file
(
self
,
vocab
,
filename
=
"vocab.txt"
):
path
=
os
.
path
.
join
(
tempfile
.
mkdtemp
(
dir
=
self
.
get_temp_dir
()),
# New subdir each time.
filename
)
with
tf
.
io
.
gfile
.
GFile
(
path
,
"w"
)
as
f
:
f
.
write
(
"
\n
"
.
join
(
vocab
+
[
""
]))
return
path
def
test_uncased
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"xy"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
inputs
=
tf
.
constant
([
"abc def"
,
"ABC DEF d"
])
token_ids
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
tf
.
ragged
.
constant
([[[
6
],
[
4
,
5
]],
[[
6
],
[
4
,
5
],
[
4
]]]))
bert_tokenize
.
tokenize_with_offsets
=
True
token_ids_2
,
start_offsets
,
limit_offsets
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
token_ids_2
)
self
.
assertAllEqual
(
start_offsets
,
tf
.
ragged
.
constant
([[[
0
],
[
4
,
5
]],
[[
0
],
[
4
,
5
],
[
8
]]]))
self
.
assertAllEqual
(
limit_offsets
,
tf
.
ragged
.
constant
([[[
3
],
[
5
,
7
]],
[[
3
],
[
5
,
7
],
[
9
]]]))
self
.
assertEqual
(
bert_tokenize
.
vocab_size
,
8
)
# Repeat the above and test that case matters with lower_case=False.
def
test_cased
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"ABC"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
False
,
tokenize_with_offsets
=
True
)
inputs
=
tf
.
constant
([
"abc def"
,
"ABC DEF"
])
token_ids
,
start_offsets
,
limit_offsets
=
bert_tokenize
(
inputs
)
self
.
assertAllEqual
(
token_ids
,
tf
.
ragged
.
constant
([[[
6
],
[
4
,
5
]],
[[
7
],
[
1
]]]))
self
.
assertAllEqual
(
start_offsets
,
tf
.
ragged
.
constant
([[[
0
],
[
4
,
5
]],
[[
0
],
[
4
]]]))
self
.
assertAllEqual
(
limit_offsets
,
tf
.
ragged
.
constant
([[[
3
],
[
5
,
7
]],
[[
3
],
[
7
]]]))
def
test_special_tokens_complete
(
self
):
vocab_file
=
self
.
_make_vocab_file
(
[
"foo"
,
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"[MASK]"
,
"xy"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
self
.
assertDictEqual
(
bert_tokenize
.
get_special_tokens_dict
(),
dict
(
padding_id
=
1
,
start_of_sequence_id
=
3
,
end_of_segment_id
=
4
,
mask_id
=
5
,
vocab_size
=
7
))
def
test_special_tokens_partial
(
self
):
# [UNK] token is required by fast wordpiece tokenizer.
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[CLS]"
,
"[SEP]"
,
"[UNK]"
])
bert_tokenize
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
self
.
assertDictEqual
(
bert_tokenize
.
get_special_tokens_dict
(),
dict
(
padding_id
=
0
,
start_of_sequence_id
=
1
,
end_of_segment_id
=
2
,
vocab_size
=
4
))
# No mask_id,
def
test_special_tokens_in_estimator
(
self
):
"""Tests getting special tokens without an Eager init context."""
vocab_file
=
self
.
_make_vocab_file
(
[
"[PAD]"
,
"[UNK]"
,
"[CLS]"
,
"[SEP]"
,
"d"
,
"##ef"
,
"abc"
,
"xy"
])
def
input_fn
():
with
tf
.
init_scope
():
self
.
assertFalse
(
tf
.
executing_eagerly
())
# Build a preprocessing Model.
sentences
=
tf
.
keras
.
layers
.
Input
(
shape
=
[],
dtype
=
tf
.
string
)
bert_tokenizer
=
text_layers
.
FastWordpieceBertTokenizer
(
vocab_file
=
vocab_file
,
lower_case
=
True
)
special_tokens_dict
=
bert_tokenizer
.
get_special_tokens_dict
()
for
k
,
v
in
special_tokens_dict
.
items
():
self
.
assertIsInstance
(
v
,
int
,
"Unexpected type for {}"
.
format
(
k
))
tokens
=
bert_tokenizer
(
sentences
)
packed_inputs
=
text_layers
.
BertPackInputs
(
4
,
special_tokens_dict
=
special_tokens_dict
)(
tokens
)
preprocessing
=
tf
.
keras
.
Model
(
sentences
,
packed_inputs
)
# Map the dataset.
ds
=
tf
.
data
.
Dataset
.
from_tensors
(
(
tf
.
constant
([
"abc"
,
"DEF"
]),
tf
.
constant
([
0
,
1
])))
ds
=
ds
.
map
(
lambda
features
,
labels
:
(
preprocessing
(
features
),
labels
))
return
ds
def
model_fn
(
features
,
labels
,
mode
):
del
labels
# Unused.
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
features
[
"input_word_ids"
])
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
)
outputs
=
list
(
estimator
.
predict
(
input_fn
))
self
.
assertAllEqual
(
outputs
,
np
.
array
([[
2
,
6
,
3
,
0
],
[
2
,
4
,
5
,
3
]]))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/transformer_encoder_block.py
View file @
09d9656f
...
...
@@ -16,6 +16,8 @@
import
tensorflow
as
tf
from
official.nlp.modeling.layers
import
util
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
TransformerEncoderBlock
(
tf
.
keras
.
layers
.
Layer
):
...
...
@@ -86,8 +88,9 @@ class TransformerEncoderBlock(tf.keras.layers.Layer):
kernel.
attention_axes: axes over which the attention is applied. `None` means
attention over all axes, but batch, heads, and features.
**kwargs: keyword arguments
/
**kwargs: keyword arguments
.
"""
util
.
filter_kwargs
(
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
_num_heads
=
num_attention_heads
...
...
official/nlp/modeling/layers/util.py
View file @
09d9656f
...
...
@@ -30,13 +30,13 @@ class TfFunctionIfEagerDecorator(object):
@
functools
.
wraps
(
func
)
def
wrapped_func
(
*
args
):
# TODO(b/150147476, b/150024785): Fix tf.function in TF1 crash.
if
not
hasattr
(
tf
.
compat
.
v1
,
"
executing_eagerly_outside_functions
"
if
not
hasattr
(
tf
.
compat
.
v1
,
'
executing_eagerly_outside_functions
'
)
or
tf
.
compat
.
v1
.
executing_eagerly_outside_functions
():
return
tf
.
function
(
func
=
func
,
**
self
.
func_kwargs
)(
*
args
)
return
func
(
*
args
)
# Cache the created function in self._call_impl.
if
not
hasattr
(
self
,
"
_call_impl
"
):
if
not
hasattr
(
self
,
'
_call_impl
'
):
self
.
_call_impl
=
wrapped_func
return
self
.
_call_impl
...
...
@@ -44,3 +44,29 @@ class TfFunctionIfEagerDecorator(object):
def
tf_function_if_eager
(
**
kwargs
):
"""Applies the @tf.function decorator only if running in eager mode."""
return
TfFunctionIfEagerDecorator
(
**
kwargs
)
def
filter_kwargs
(
kwargs
):
"""In place removes unused options in kwargs.
This function removes the construction signatures: e.g.
number_attention_heads... in TransformerEncoderBlock. This is needed,
otherwise base_layer.py in Keras will complain.
Args:
kwargs: keyword arguments to be filtered.
"""
# This is the union of signatures of TransformerEncoderBlock and
# ReZeroTransformer. Every Transformer
# block that uses compatible signature with TransformerEncoderBlock should
# call this function before base constructor super().__init__(**kwargs).
denylist
=
[
'num_attention_heads'
,
'intermediate_size'
,
'intermediate_activation'
,
'inner_dim'
,
'inner_activation'
,
'output_range'
,
'kernel_initializer'
,
'bias_initializer'
,
'kernel_regularizer'
,
'bias_regularizer'
,
'activity_regularizer'
,
'kernel_constraint'
,
'bias_constraint'
,
'use_bias'
,
'norm_first'
,
'norm_epsilon'
,
'output_dropout'
,
'attention_dropout'
,
'inner_dropout'
,
'attention_initializer'
,
'attention_axes'
,
'share_rezero'
]
for
unused_key
in
denylist
:
kwargs
.
pop
(
unused_key
,
None
)
official/nlp/modeling/models/seq2seq_transformer.py
View file @
09d9656f
...
...
@@ -260,11 +260,9 @@ class Seq2SeqTransformer(tf.keras.Model):
return
{
"outputs"
:
top_decoded_ids
,
"scores"
:
top_scores
}
decoder_inputs
=
self
.
embedding_lookup
(
targets
)
embedding_mask
=
tf
.
cast
(
tf
.
not_equal
(
targets
,
0
),
decoder_inputs
.
dtype
)
decoder_inputs
*=
tf
.
expand_dims
(
embedding_mask
,
-
1
)
# Shift targets to the right, and remove the last element
decoder_inputs
=
tf
.
pad
(
decoder_inputs
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])[:,
:
-
1
,
:]
targets
=
tf
.
pad
(
targets
,
[[
0
,
0
],
[
1
,
0
]])[:,
:
-
1
]
decoder_inputs
=
self
.
embedding_lookup
(
targets
)
length
=
tf
.
shape
(
decoder_inputs
)[
1
]
pos_encoding
=
self
.
position_embedding
(
decoder_inputs
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
embedded_inputs
.
dtype
)
...
...
@@ -325,12 +323,7 @@ class Seq2SeqTransformer(tf.keras.Model):
decoder_input
=
ids
[:,
-
1
:]
# Preprocess decoder input by getting embeddings and adding timing signal.
# decoder_input = self.embedding_softmax_layer(decoder_input)
source_decoder_input
=
decoder_input
decoder_input
=
self
.
embedding_lookup
(
decoder_input
)
embedding_mask
=
tf
.
cast
(
tf
.
not_equal
(
source_decoder_input
,
0
),
decoder_input
.
dtype
)
decoder_input
*=
tf
.
expand_dims
(
embedding_mask
,
-
1
)
decoder_input
+=
timing_signal
[
i
]
if
self
.
_padded_decode
:
# indexing does not work on TPU.
...
...
official/nlp/modeling/networks/bert_dense_encoder_test.py
View file @
09d9656f
...
...
@@ -20,29 +20,30 @@ import numpy as np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.networks
import
bert_
dense_
encoder
from
official.nlp.modeling.networks
import
bert_encoder
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
Bert
Dense
EncoderTest
(
keras_parameterized
.
TestCase
):
class
BertEncoder
V2
Test
(
keras_parameterized
.
TestCase
):
def
tearDown
(
self
):
super
(
Bert
Dense
EncoderTest
,
self
).
tearDown
()
super
(
BertEncoder
V2
Test
,
self
).
tearDown
()
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
def
test_dict_outputs_network_creation
(
self
):
hidden_size
=
32
sequence_length
=
21
dense_sequence_length
=
20
# Create a small dense Bert
Dense
Encoder for testing.
# Create a small dense BertEncoder
V2
for testing.
kwargs
=
{}
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_layers
=
3
,
with_dense_inputs
=
True
,
**
kwargs
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
@@ -86,12 +87,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
sequence_length
=
21
dense_sequence_length
=
20
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_layers
=
3
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
@@ -134,12 +136,13 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
dense_sequence_length
=
20
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"mixed_float16"
)
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_layers
=
3
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
@@ -176,9 +179,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
self
.
assertAllEqual
(
tf
.
float16
,
pooled
.
dtype
)
@
parameterized
.
named_parameters
(
(
"all_sequence_encoder_v2"
,
bert_dense_encoder
.
BertDenseEncoder
,
None
,
41
),
(
"output_range_encoder_v2"
,
bert_dense_encoder
.
BertDenseEncoder
,
1
,
1
),
(
"all_sequence_encoder_v2"
,
bert_encoder
.
BertEncoderV2
,
None
,
41
),
(
"output_range_encoder_v2"
,
bert_encoder
.
BertEncoderV2
,
1
,
1
),
)
def
test_dict_outputs_network_invocation
(
self
,
encoder_cls
,
output_range
,
out_seq_len
):
...
...
@@ -195,7 +197,8 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
num_layers
=
3
,
type_vocab_size
=
num_types
,
output_range
=
output_range
,
dict_outputs
=
True
)
dict_outputs
=
True
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
@@ -276,7 +279,7 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
# Creates a BertEncoder with embedding_width != hidden_size
embedding_width
=
16
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
max_sequence_length
=
max_sequence_length
,
...
...
@@ -316,11 +319,12 @@ class BertDenseEncoderTest(keras_parameterized.TestCase):
sequence_length
=
21
dense_sequence_length
=
20
# Create a small BertEncoder for testing.
test_network
=
bert_
dense_
encoder
.
Bert
Dense
Encoder
(
test_network
=
bert_encoder
.
BertEncoder
V2
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
num_attention_heads
=
2
,
num_layers
=
3
)
num_layers
=
3
,
with_dense_inputs
=
True
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
official/nlp/modeling/networks/bert_encoder.py
View file @
09d9656f
...
...
@@ -23,6 +23,8 @@ from official.nlp.modeling import layers
_Initializer
=
Union
[
str
,
tf
.
keras
.
initializers
.
Initializer
]
_Activation
=
Union
[
str
,
Callable
[...,
Any
]]
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
...
...
@@ -72,6 +74,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
norm_first: Whether to normalize inputs to attention and intermediate dense
layers. If set False, output of attention and intermediate dense layers is
normalized.
with_dense_inputs: Whether to accept dense embeddings as the input.
"""
def
__init__
(
...
...
@@ -83,7 +86,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
max_sequence_length
:
int
=
512
,
type_vocab_size
:
int
=
16
,
inner_dim
:
int
=
3072
,
inner_activation
:
Callable
[...,
Any
]
=
_approx_gelu
,
inner_activation
:
_Activation
=
_approx_gelu
,
output_dropout
:
float
=
0.1
,
attention_dropout
:
float
=
0.1
,
initializer
:
_Initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
...
...
@@ -92,6 +95,7 @@ class BertEncoderV2(tf.keras.layers.Layer):
embedding_width
:
Optional
[
int
]
=
None
,
embedding_layer
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
norm_first
:
bool
=
False
,
with_dense_inputs
:
bool
=
False
,
**
kwargs
):
# Pops kwargs that are used in V1 implementation.
if
'dict_outputs'
in
kwargs
:
...
...
@@ -190,7 +194,19 @@ class BertEncoderV2(tf.keras.layers.Layer):
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
'with_dense_inputs'
:
with_dense_inputs
,
}
if
with_dense_inputs
:
self
.
inputs
=
dict
(
input_word_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
dense_inputs
=
tf
.
keras
.
Input
(
shape
=
(
None
,
embedding_width
),
dtype
=
tf
.
float32
),
dense_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
dense_type_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
)
else
:
self
.
inputs
=
dict
(
input_word_ids
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
input_mask
=
tf
.
keras
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
),
...
...
@@ -203,11 +219,22 @@ class BertEncoderV2(tf.keras.layers.Layer):
mask
=
inputs
.
get
(
'input_mask'
)
type_ids
=
inputs
.
get
(
'input_type_ids'
)
word_embeddings
=
inputs
.
get
(
'input_word_embeddings'
,
None
)
dense_inputs
=
inputs
.
get
(
'dense_inputs'
,
None
)
dense_mask
=
inputs
.
get
(
'dense_mask'
,
None
)
dense_type_ids
=
inputs
.
get
(
'dense_type_ids'
,
None
)
else
:
raise
ValueError
(
'Unexpected inputs type to %s.'
%
self
.
__class__
)
if
word_embeddings
is
None
:
word_embeddings
=
self
.
_embedding_layer
(
word_ids
)
if
dense_inputs
is
not
None
:
# Concat the dense embeddings at sequence end.
word_embeddings
=
tf
.
concat
([
word_embeddings
,
dense_inputs
],
axis
=
1
)
type_ids
=
tf
.
concat
([
type_ids
,
dense_type_ids
],
axis
=
1
)
mask
=
tf
.
concat
([
mask
,
dense_mask
],
axis
=
1
)
# absolute position embeddings.
position_embeddings
=
self
.
_position_embedding_layer
(
word_embeddings
)
type_embeddings
=
self
.
_type_embedding_layer
(
type_ids
)
...
...
official/nlp/modeling/networks/funnel_transformer.py
View file @
09d9656f
...
...
@@ -15,17 +15,32 @@
"""Funnel Transformer network."""
# pylint: disable=g-classes-have-attributes
from
typing
import
Union
,
Sequence
from
typing
import
Any
,
Callable
,
Optional
,
Union
,
Sequence
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
official.nlp.modeling
import
layers
_Initializer
=
Union
[
str
,
tf
.
keras
.
initializers
.
Initializer
]
_Activation
=
Union
[
str
,
Callable
[...,
Any
]]
_MAX
=
'max'
_AVG
=
'avg'
_TRUNCATED_AVG
=
'truncated_avg'
_transformer_cls2str
=
{
layers
.
TransformerEncoderBlock
:
'TransformerEncoderBlock'
,
layers
.
ReZeroTransformer
:
'ReZeroTransformer'
}
_str2transformer_cls
=
{
'TransformerEncoderBlock'
:
layers
.
TransformerEncoderBlock
,
'ReZeroTransformer'
:
layers
.
ReZeroTransformer
}
_approx_gelu
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
)
def
_get_policy_dtype
():
try
:
...
...
@@ -206,29 +221,37 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
embeddings for the input word IDs.
norm_first: Whether to normalize inputs to attention and intermediate dense
layers. If set False, output of attention and intermediate dense layers is
normalized.
normalized. This does not apply to ReZero.
transformer_cls: str or a keras Layer. This is the base TransformerBlock the
funnel encoder relies on.
share_rezero: bool. Whether to share ReZero alpha between the attention
layer and the ffn layer. This option is specific to ReZero.
"""
def
__init__
(
self
,
vocab_size
,
hidden_size
=
768
,
num_layers
=
12
,
num_attention_heads
=
12
,
max_sequence_length
=
512
,
type_vocab_size
=
16
,
inner_dim
=
3072
,
inner_activation
=
lambda
x
:
tf
.
keras
.
activations
.
gelu
(
x
,
approximate
=
True
),
output_dropout
=
0.1
,
attention_dropout
=
0.1
,
pool_type
=
_MAX
,
pool_stride
=
2
,
unpool_length
=
0
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
),
output_range
=
None
,
embedding_width
=
None
,
embedding_layer
=
None
,
norm_first
=
False
,
vocab_size
:
int
,
hidden_size
:
int
=
768
,
num_layers
:
int
=
12
,
num_attention_heads
:
int
=
12
,
max_sequence_length
:
int
=
512
,
type_vocab_size
:
int
=
16
,
inner_dim
:
int
=
3072
,
inner_activation
:
_Activation
=
_approx_gelu
,
output_dropout
:
float
=
0.1
,
attention_dropout
:
float
=
0.1
,
pool_type
:
str
=
_MAX
,
pool_stride
:
int
=
2
,
unpool_length
:
int
=
0
,
initializer
:
_Initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
),
output_range
:
Optional
[
int
]
=
None
,
embedding_width
:
Optional
[
int
]
=
None
,
embedding_layer
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
norm_first
:
bool
=
False
,
transformer_cls
:
Union
[
str
,
tf
.
keras
.
layers
.
Layer
]
=
layers
.
TransformerEncoderBlock
,
share_rezero
:
bool
=
True
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
activation
=
tf
.
keras
.
activations
.
get
(
inner_activation
)
...
...
@@ -278,16 +301,22 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
self
.
_transformer_layers
=
[]
self
.
_attention_mask_layer
=
layers
.
SelfAttentionMask
(
name
=
'self_attention_mask'
)
# Will raise an error if the string is not supported.
if
isinstance
(
transformer_cls
,
str
):
transformer_cls
=
_str2transformer_cls
[
transformer_cls
]
for
i
in
range
(
num_layers
):
layer
=
layers
.
T
ransformer
EncoderBlock
(
layer
=
t
ransformer
_cls
(
num_attention_heads
=
num_attention_heads
,
intermediate_size
=
inner_dim
,
inner_dim
=
inner_dim
,
intermediate_activation
=
inner_activation
,
inner_activation
=
inner_activation
,
output_dropout
=
output_dropout
,
attention_dropout
=
attention_dropout
,
norm_first
=
norm_first
,
output_range
=
output_range
if
i
==
num_layers
-
1
else
None
,
kernel_initializer
=
initializer
,
share_rezero
=
share_rezero
,
name
=
'transformer/layer_%d'
%
i
)
self
.
_transformer_layers
.
append
(
layer
)
...
...
@@ -333,24 +362,44 @@ class FunnelTransformerEncoder(tf.keras.layers.Layer):
self
.
_pool_type
=
pool_type
self
.
_config
=
{
'vocab_size'
:
vocab_size
,
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_attention_heads'
:
num_attention_heads
,
'max_sequence_length'
:
max_sequence_length
,
'type_vocab_size'
:
type_vocab_size
,
'inner_dim'
:
inner_dim
,
'inner_activation'
:
tf
.
keras
.
activations
.
serialize
(
activation
),
'output_dropout'
:
output_dropout
,
'attention_dropout'
:
attention_dropout
,
'initializer'
:
tf
.
keras
.
initializers
.
serialize
(
initializer
),
'output_range'
:
output_range
,
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
'pool_type'
:
pool_type
,
'pool_stride'
:
pool_stride
,
'unpool_length'
:
unpool_length
,
'vocab_size'
:
vocab_size
,
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_attention_heads'
:
num_attention_heads
,
'max_sequence_length'
:
max_sequence_length
,
'type_vocab_size'
:
type_vocab_size
,
'inner_dim'
:
inner_dim
,
'inner_activation'
:
tf
.
keras
.
activations
.
serialize
(
activation
),
'output_dropout'
:
output_dropout
,
'attention_dropout'
:
attention_dropout
,
'initializer'
:
tf
.
keras
.
initializers
.
serialize
(
initializer
),
'output_range'
:
output_range
,
'embedding_width'
:
embedding_width
,
'embedding_layer'
:
embedding_layer
,
'norm_first'
:
norm_first
,
'pool_type'
:
pool_type
,
'pool_stride'
:
pool_stride
,
'unpool_length'
:
unpool_length
,
'transformer_cls'
:
_transformer_cls2str
.
get
(
transformer_cls
,
str
(
transformer_cls
))
}
def
call
(
self
,
inputs
):
...
...
official/nlp/modeling/networks/funnel_transformer_test.py
View file @
09d9656f
...
...
@@ -38,13 +38,20 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
"float32"
)
@
parameterized
.
named_parameters
(
(
"mix_truncated_avg"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
),
(
"float32_truncated_avg"
,
"float32"
,
tf
.
float32
,
"truncated_avg"
),
(
"mix_max"
,
"mixed_float16"
,
tf
.
float16
,
"max"
),
(
"float32_max"
,
"float32"
,
tf
.
float32
,
"max"
),
(
"mix_avg"
,
"mixed_float16"
,
tf
.
float16
,
"avg"
),
(
"float32_avg"
,
"float32"
,
tf
.
float32
,
"avg"
))
def
test_network_creation
(
self
,
policy
,
pooled_dtype
,
pool_type
):
(
"mix_truncated_avg_rezero"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
,
"ReZeroTransformer"
),
(
"float32_truncated_avg_rezero"
,
"float32"
,
tf
.
float32
,
"truncated_avg"
,
"ReZeroTransformer"
),
(
"mix_truncated_avg"
,
"mixed_float16"
,
tf
.
float16
,
"truncated_avg"
,
"TransformerEncoderBlock"
),
(
"float32_truncated_avg"
,
"float32"
,
tf
.
float32
,
"truncated_avg"
,
"TransformerEncoderBlock"
),
(
"mix_max"
,
"mixed_float16"
,
tf
.
float16
,
"max"
,
"TransformerEncoderBlock"
),
(
"float32_max"
,
"float32"
,
tf
.
float32
,
"max"
,
"TransformerEncoderBlock"
),
(
"mix_avg"
,
"mixed_float16"
,
tf
.
float16
,
"avg"
,
"TransformerEncoderBlock"
),
(
"float32_avg"
,
"float32"
,
tf
.
float32
,
"avg"
,
"TransformerEncoderBlock"
))
def
test_network_creation
(
self
,
policy
,
pooled_dtype
,
pool_type
,
transformer_cls
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
policy
)
hidden_size
=
32
...
...
@@ -60,7 +67,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
pool_stride
=
pool_stride
,
pool_type
=
pool_type
,
max_sequence_length
=
sequence_length
,
unpool_length
=
0
)
unpool_length
=
0
,
transformer_cls
=
transformer_cls
)
# Create the inputs (note that the first dimension is implicit).
word_ids
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
mask
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
)
...
...
@@ -253,7 +261,8 @@ class FunnelTransformerEncoderTest(parameterized.TestCase, tf.test.TestCase):
norm_first
=
False
,
pool_type
=
"max"
,
pool_stride
=
2
,
unpool_length
=
0
)
unpool_length
=
0
,
transformer_cls
=
"TransformerEncoderBlock"
)
network
=
funnel_transformer
.
FunnelTransformerEncoder
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
expected_config
[
"inner_activation"
]
=
tf
.
keras
.
activations
.
serialize
(
...
...
official/nlp/serving/export_savedmodel.py
View file @
09d9656f
...
...
@@ -13,12 +13,14 @@
# limitations under the License.
"""A binary/library to export TF-NLP serving `SavedModel`."""
import
dataclasses
import
os
from
typing
import
Any
,
Dict
,
Text
from
absl
import
app
from
absl
import
flags
import
dataclasses
import
yaml
from
official.core
import
base_task
from
official.core
import
task_factory
from
official.modeling
import
hyperparams
...
...
@@ -29,6 +31,7 @@ from official.nlp.tasks import masked_lm
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
translation
FLAGS
=
flags
.
FLAGS
...
...
@@ -40,7 +43,9 @@ SERVING_MODULES = {
question_answering
.
QuestionAnsweringTask
:
serving_modules
.
QuestionAnswering
,
tagging
.
TaggingTask
:
serving_modules
.
Tagging
serving_modules
.
Tagging
,
translation
.
TranslationTask
:
serving_modules
.
Translation
}
...
...
@@ -60,6 +65,10 @@ def define_flags():
flags
.
DEFINE_string
(
"function_keys"
,
None
,
"A string key to retrieve pre-defined serving signatures."
)
flags
.
DEFINE_string
(
"module_key"
,
None
,
"For multi-task case, load the export module weights from a specific "
"checkpoint item."
)
flags
.
DEFINE_bool
(
"convert_tpu"
,
False
,
""
)
flags
.
DEFINE_multi_integer
(
"allowed_batch_size"
,
None
,
"Allowed batch sizes for batching ops."
)
...
...
@@ -116,7 +125,8 @@ def main(_):
export_module
,
function_keys
=
[
FLAGS
.
function_keys
],
checkpoint_path
=
FLAGS
.
checkpoint_path
,
export_savedmodel_dir
=
FLAGS
.
export_savedmodel_dir
)
export_savedmodel_dir
=
FLAGS
.
export_savedmodel_dir
,
module_key
=
FLAGS
.
module_key
)
if
FLAGS
.
convert_tpu
:
# pylint: disable=g-import-not-at-top
...
...
official/nlp/serving/export_savedmodel_util.py
View file @
09d9656f
...
...
@@ -13,24 +13,21 @@
# limitations under the License.
"""Common library to export a SavedModel from the export module."""
import
os
import
time
from
typing
import
Dict
,
List
,
Optional
,
Text
,
Union
from
absl
import
logging
import
tensorflow
as
tf
from
official.core
import
export_base
MAX_DIRECTORY_CREATION_ATTEMPTS
=
10
get_timestamped_export_dir
=
export_base
.
get_timestamped_export_dir
def
export
(
export_module
:
export_base
.
ExportModule
,
function_keys
:
Union
[
List
[
Text
],
Dict
[
Text
,
Text
]],
export_savedmodel_dir
:
Text
,
checkpoint_path
:
Optional
[
Text
]
=
None
,
timestamped
:
bool
=
True
)
->
Text
:
timestamped
:
bool
=
True
,
module_key
:
Optional
[
Text
]
=
None
)
->
Text
:
"""Exports to SavedModel format.
Args:
...
...
@@ -41,6 +38,8 @@ def export(export_module: export_base.ExportModule,
export_savedmodel_dir: Output saved model directory.
checkpoint_path: Object-based checkpoint path or directory.
timestamped: Whether to export the savedmodel to a timestamped directory.
module_key: Optional string to identify a checkpoint object to load for the
model in the export module.
Returns:
The savedmodel directory path.
...
...
@@ -48,37 +47,16 @@ def export(export_module: export_base.ExportModule,
save_options
=
tf
.
saved_model
.
SaveOptions
(
function_aliases
=
{
'tpu_candidate'
:
export_module
.
serve
,
})
return
export_base
.
export
(
export_module
,
function_keys
,
export_savedmodel_dir
,
checkpoint_path
,
timestamped
,
save_options
)
def
get_timestamped_export_dir
(
export_dir_base
):
"""Builds a path to a new subdirectory within the base directory.
Args:
export_dir_base: A string containing a directory to write the exported graph
and checkpoints.
Returns:
The full path of the new subdirectory (which is not actually created yet).
Raises:
RuntimeError: if repeated attempts fail to obtain a unique timestamped
directory name.
"""
attempts
=
0
while
attempts
<
MAX_DIRECTORY_CREATION_ATTEMPTS
:
timestamp
=
int
(
time
.
time
())
result_dir
=
os
.
path
.
join
(
export_dir_base
,
str
(
timestamp
))
if
not
tf
.
io
.
gfile
.
exists
(
result_dir
):
# Collisions are still possible (though extremely unlikely): this
# directory is not actually created yet, but it will be almost
# instantly on return from this function.
return
result_dir
time
.
sleep
(
1
)
attempts
+=
1
logging
.
warning
(
'Directory %s already exists; retrying (attempt %s/%s)'
,
str
(
result_dir
),
attempts
,
MAX_DIRECTORY_CREATION_ATTEMPTS
)
raise
RuntimeError
(
'Failed to obtain a unique export directory name after '
f
'
{
MAX_DIRECTORY_CREATION_ATTEMPTS
}
attempts.'
)
if
module_key
:
kwargs
=
{
module_key
:
export_module
.
model
}
checkpoint
=
tf
.
train
.
Checkpoint
(
**
kwargs
)
else
:
checkpoint
=
None
return
export_base
.
export
(
export_module
,
function_keys
,
export_savedmodel_dir
,
checkpoint_path
,
timestamped
,
save_options
,
checkpoint
=
checkpoint
)
official/nlp/serving/serving_modules.py
View file @
09d9656f
...
...
@@ -14,10 +14,12 @@
"""Serving export modules for TF Model Garden NLP models."""
# pylint:disable=missing-class-docstring
import
dataclasses
from
typing
import
Dict
,
List
,
Optional
,
Text
import
dataclasses
import
tensorflow
as
tf
import
tensorflow_text
as
tf_text
from
official.core
import
export_base
from
official.modeling.hyperparams
import
base_config
from
official.nlp.data
import
sentence_prediction_dataloader
...
...
@@ -407,3 +409,48 @@ class Tagging(export_base.ExportModule):
signatures
[
signature_key
]
=
self
.
serve_examples
.
get_concrete_function
(
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"examples"
))
return
signatures
class
Translation
(
export_base
.
ExportModule
):
"""The export module for the translation task."""
@
dataclasses
.
dataclass
class
Params
(
base_config
.
Config
):
sentencepiece_model_path
:
str
=
""
def
__init__
(
self
,
params
,
model
:
tf
.
keras
.
Model
,
inference_step
=
None
):
super
().
__init__
(
params
,
model
,
inference_step
)
self
.
_sp_tokenizer
=
tf_text
.
SentencepieceTokenizer
(
model
=
tf
.
io
.
gfile
.
GFile
(
params
.
sentencepiece_model_path
,
"rb"
).
read
(),
add_eos
=
True
)
try
:
empty_str_tokenized
=
self
.
_sp_tokenizer
.
tokenize
(
""
).
numpy
()
except
tf
.
errors
.
InternalError
:
raise
ValueError
(
"EOS token not in tokenizer vocab."
"Please make sure the tokenizer generates a single token for an "
"empty string."
)
self
.
_eos_id
=
empty_str_tokenized
.
item
()
@
tf
.
function
def
serve
(
self
,
inputs
)
->
Dict
[
str
,
tf
.
Tensor
]:
return
self
.
inference_step
(
inputs
)
@
tf
.
function
def
serve_text
(
self
,
text
:
tf
.
Tensor
)
->
Dict
[
str
,
tf
.
Tensor
]:
tokenized
=
self
.
_sp_tokenizer
.
tokenize
(
text
).
to_tensor
(
0
)
return
self
.
_sp_tokenizer
.
detokenize
(
self
.
serve
({
"inputs"
:
tokenized
})[
"outputs"
])
def
get_inference_signatures
(
self
,
function_keys
:
Dict
[
Text
,
Text
]):
signatures
=
{}
valid_keys
=
(
"serve_text"
)
for
func_key
,
signature_key
in
function_keys
.
items
():
if
func_key
not
in
valid_keys
:
raise
ValueError
(
"Invalid function key for the module: %s with key %s. "
"Valid keys are: %s"
%
(
self
.
__class__
,
func_key
,
valid_keys
))
if
func_key
==
"serve_text"
:
signatures
[
signature_key
]
=
self
.
serve_text
.
get_concrete_function
(
tf
.
TensorSpec
(
shape
=
[
None
],
dtype
=
tf
.
string
,
name
=
"text"
))
return
signatures
official/nlp/serving/serving_modules_test.py
View file @
09d9656f
...
...
@@ -15,8 +15,11 @@
"""Tests for nlp.serving.serving_modules."""
import
os
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
sentencepiece
import
SentencePieceTrainer
from
official.nlp.configs
import
bert
from
official.nlp.configs
import
encoders
from
official.nlp.serving
import
serving_modules
...
...
@@ -24,6 +27,7 @@ from official.nlp.tasks import masked_lm
from
official.nlp.tasks
import
question_answering
from
official.nlp.tasks
import
sentence_prediction
from
official.nlp.tasks
import
tagging
from
official.nlp.tasks
import
translation
def
_create_fake_serialized_examples
(
features_dict
):
...
...
@@ -59,6 +63,33 @@ def _create_fake_vocab_file(vocab_file_path):
outfile
.
write
(
"
\n
"
.
join
(
tokens
))
def
_train_sentencepiece
(
input_path
,
vocab_size
,
model_path
,
eos_id
=
1
):
argstr
=
" "
.
join
([
f
"--input=
{
input_path
}
"
,
f
"--vocab_size=
{
vocab_size
}
"
,
"--character_coverage=0.995"
,
f
"--model_prefix=
{
model_path
}
"
,
"--model_type=bpe"
,
"--bos_id=-1"
,
"--pad_id=0"
,
f
"--eos_id=
{
eos_id
}
"
,
"--unk_id=2"
])
SentencePieceTrainer
.
Train
(
argstr
)
def
_generate_line_file
(
filepath
,
lines
):
with
tf
.
io
.
gfile
.
GFile
(
filepath
,
"w"
)
as
f
:
for
l
in
lines
:
f
.
write
(
"{}
\n
"
.
format
(
l
))
def
_make_sentencepeice
(
output_dir
):
src_lines
=
[
"abc ede fg"
,
"bbcd ef a g"
,
"de f a a g"
]
tgt_lines
=
[
"dd cc a ef g"
,
"bcd ef a g"
,
"gef cd ba"
]
sentencepeice_input_path
=
os
.
path
.
join
(
output_dir
,
"inputs.txt"
)
_generate_line_file
(
sentencepeice_input_path
,
src_lines
+
tgt_lines
)
sentencepeice_model_prefix
=
os
.
path
.
join
(
output_dir
,
"sp"
)
_train_sentencepiece
(
sentencepeice_input_path
,
11
,
sentencepeice_model_prefix
)
sentencepeice_model_path
=
"{}.model"
.
format
(
sentencepeice_model_prefix
)
return
sentencepeice_model_path
class
ServingModulesTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
...
...
@@ -312,6 +343,31 @@ class ServingModulesTest(tf.test.TestCase, parameterized.TestCase):
with
self
.
assertRaises
(
ValueError
):
_
=
export_module
.
get_inference_signatures
({
"foo"
:
None
})
def
test_translation
(
self
):
sp_path
=
_make_sentencepeice
(
self
.
get_temp_dir
())
encdecoder
=
translation
.
EncDecoder
(
num_attention_heads
=
4
,
intermediate_size
=
256
)
config
=
translation
.
TranslationConfig
(
model
=
translation
.
ModelConfig
(
encoder
=
encdecoder
,
decoder
=
encdecoder
,
embedding_width
=
256
,
padded_decode
=
False
,
decode_max_length
=
100
),
sentencepiece_model_path
=
sp_path
,
)
task
=
translation
.
TranslationTask
(
config
)
model
=
task
.
build_model
()
params
=
serving_modules
.
Translation
.
Params
(
sentencepiece_model_path
=
sp_path
)
export_module
=
serving_modules
.
Translation
(
params
=
params
,
model
=
model
)
functions
=
export_module
.
get_inference_signatures
({
"serve_text"
:
"serving_default"
})
outputs
=
functions
[
"serving_default"
](
tf
.
constant
([
"abcd"
,
"ef gh"
]))
self
.
assertEqual
(
outputs
.
shape
,
(
2
,))
self
.
assertEqual
(
outputs
.
dtype
,
tf
.
string
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
Prev
1
…
5
6
7
8
9
10
11
12
13
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment