Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
88253ce5
Commit
88253ce5
authored
Aug 12, 2020
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Aug 12, 2020
Browse files
Internal change
PiperOrigin-RevId: 326286926
parent
52371ffe
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
93 additions
and
102 deletions
+93
-102
official/nlp/data/create_pretraining_data_test.py
official/nlp/data/create_pretraining_data_test.py
+10
-22
official/nlp/data/data_loader.py
official/nlp/data/data_loader.py
+2
-2
official/nlp/data/data_loader_factory_test.py
official/nlp/data/data_loader_factory_test.py
+1
-0
official/nlp/data/question_answering_dataloader.py
official/nlp/data/question_answering_dataloader.py
+1
-0
official/nlp/data/question_answering_dataloader_test.py
official/nlp/data/question_answering_dataloader_test.py
+1
-0
official/nlp/data/sentence_prediction_dataloader.py
official/nlp/data/sentence_prediction_dataloader.py
+1
-1
official/nlp/data/sentence_retrieval_lib.py
official/nlp/data/sentence_retrieval_lib.py
+2
-4
official/nlp/data/squad_lib.py
official/nlp/data/squad_lib.py
+15
-13
official/nlp/data/squad_lib_sp.py
official/nlp/data/squad_lib_sp.py
+13
-9
official/nlp/data/tagging_data_lib.py
official/nlp/data/tagging_data_lib.py
+10
-11
official/nlp/data/tagging_data_loader.py
official/nlp/data/tagging_data_loader.py
+1
-0
official/nlp/modeling/layers/dense_einsum.py
official/nlp/modeling/layers/dense_einsum.py
+2
-3
official/nlp/modeling/layers/gated_feedforward.py
official/nlp/modeling/layers/gated_feedforward.py
+14
-14
official/nlp/modeling/layers/gated_feedforward_test.py
official/nlp/modeling/layers/gated_feedforward_test.py
+1
-0
official/nlp/modeling/layers/masked_lm_test.py
official/nlp/modeling/layers/masked_lm_test.py
+4
-8
official/nlp/modeling/layers/on_device_embedding.py
official/nlp/modeling/layers/on_device_embedding.py
+1
-1
official/nlp/modeling/layers/on_device_embedding_test.py
official/nlp/modeling/layers/on_device_embedding_test.py
+2
-2
official/nlp/modeling/layers/position_embedding.py
official/nlp/modeling/layers/position_embedding.py
+9
-11
official/nlp/modeling/layers/position_embedding_test.py
official/nlp/modeling/layers/position_embedding_test.py
+1
-0
official/nlp/modeling/layers/rezero_transformer.py
official/nlp/modeling/layers/rezero_transformer.py
+2
-1
No files found.
official/nlp/data/create_pretraining_data_test.py
View file @
88253ce5
...
...
@@ -25,10 +25,7 @@ _VOCAB_WORDS = ["vocab_1", "vocab_2"]
class
CreatePretrainingDataTest
(
tf
.
test
.
TestCase
):
def
assertTokens
(
self
,
input_tokens
,
output_tokens
,
masked_positions
,
def
assertTokens
(
self
,
input_tokens
,
output_tokens
,
masked_positions
,
masked_labels
):
# Ensure the masked positions are unique.
self
.
assertCountEqual
(
masked_positions
,
set
(
masked_positions
))
...
...
@@ -42,24 +39,18 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# Ensure each label is valid.
for
pos
,
label
in
zip
(
masked_positions
,
masked_labels
):
output_token
=
output_tokens
[
pos
]
if
(
output_token
==
"[MASK]"
or
output_token
in
_VOCAB_WORDS
or
if
(
output_token
==
"[MASK]"
or
output_token
in
_VOCAB_WORDS
or
output_token
==
input_tokens
[
pos
]):
continue
self
.
fail
(
"invalid mask value: {}"
.
format
(
output_token
))
def
test_wordpieces_to_grams
(
self
):
tests
=
[
([
"That"
,
"cone"
],
[(
0
,
1
),
(
1
,
2
)]),
([
"That"
,
"cone"
,
"##s"
],
[(
0
,
1
),
(
1
,
3
)]),
([
"Swit"
,
"##zer"
,
"##land"
],
[(
0
,
3
)]),
([
"[CLS]"
,
"Up"
,
"##dog"
],
[(
1
,
3
)]),
([
"[CLS]"
,
"Up"
,
"##dog"
,
"[SEP]"
,
"Down"
],
[(
1
,
3
),
(
4
,
5
)]),
([
"That"
,
"cone"
],
[(
0
,
1
),
(
1
,
2
)]),
([
"That"
,
"cone"
,
"##s"
],
[(
0
,
1
),
(
1
,
3
)]),
([
"Swit"
,
"##zer"
,
"##land"
],
[(
0
,
3
)]),
([
"[CLS]"
,
"Up"
,
"##dog"
],
[(
1
,
3
)]),
([
"[CLS]"
,
"Up"
,
"##dog"
,
"[SEP]"
,
"Down"
],
[(
1
,
3
),
(
4
,
5
)]),
]
for
inp
,
expected
in
tests
:
output
=
cpd
.
_wordpieces_to_grams
(
inp
)
...
...
@@ -93,8 +84,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size
=
None
))
self
.
assertEqual
(
len
(
masked_positions
),
3
)
self
.
assertEqual
(
len
(
masked_labels
),
3
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
def
test_create_masked_lm_predictions_whole_word
(
self
):
tokens
=
[
"[CLS]"
,
"a"
,
"##a"
,
"b"
,
"##b"
,
"c"
,
"##c"
,
"[SEP]"
]
...
...
@@ -113,8 +103,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
# only take two.
self
.
assertEqual
(
len
(
masked_positions
),
2
)
self
.
assertEqual
(
len
(
masked_labels
),
2
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
# ensure that we took an entire word.
self
.
assertIn
(
masked_labels
,
[[
"a"
,
"##a"
],
[
"b"
,
"##b"
],
[
"c"
,
"##c"
]])
...
...
@@ -133,8 +122,7 @@ class CreatePretrainingDataTest(tf.test.TestCase):
max_ngram_size
=
3
))
self
.
assertEqual
(
len
(
masked_positions
),
76
)
self
.
assertEqual
(
len
(
masked_labels
),
76
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
self
.
assertTokens
(
tokens
,
output_tokens
,
masked_positions
,
masked_labels
)
if
__name__
==
"__main__"
:
...
...
official/nlp/data/data_loader.py
View file @
88253ce5
...
...
@@ -37,8 +37,8 @@ class DataLoader(metaclass=abc.ABCMeta):
Args:
input_context: This is a context class that is passed to the user's input
function and contains information about the compute replicas and input
pipelines. This object is used for multi-host inputs and passed by
the
distribution strategy.
pipelines. This object is used for multi-host inputs and passed by
the
distribution strategy.
Returns:
A per-host tf.data dataset. Note that, we usually create the distributed
...
...
official/nlp/data/data_loader_factory_test.py
View file @
88253ce5
...
...
@@ -14,6 +14,7 @@
# limitations under the License.
# ==============================================================================
"""Tests for official.nlp.data.data_loader_factory."""
import
dataclasses
import
tensorflow
as
tf
...
...
official/nlp/data/question_answering_dataloader.py
View file @
88253ce5
...
...
@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the question answering (e.g, SQuAD) task."""
from
typing
import
Mapping
,
Optional
import
dataclasses
import
tensorflow
as
tf
...
...
official/nlp/data/question_answering_dataloader_test.py
View file @
88253ce5
...
...
@@ -15,6 +15,7 @@
# ==============================================================================
"""Tests for official.nlp.data.question_answering_dataloader."""
import
os
import
numpy
as
np
import
tensorflow
as
tf
...
...
official/nlp/data/sentence_prediction_dataloader.py
View file @
88253ce5
...
...
@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the sentence prediction (classification) task."""
from
typing
import
Mapping
,
Optional
import
dataclasses
import
tensorflow
as
tf
...
...
@@ -23,7 +24,6 @@ from official.modeling.hyperparams import config_definitions as cfg
from
official.nlp.data
import
data_loader
from
official.nlp.data
import
data_loader_factory
LABEL_TYPES_MAP
=
{
'int'
:
tf
.
int64
,
'float'
:
tf
.
float32
}
...
...
official/nlp/data/sentence_retrieval_lib.py
View file @
88253ce5
...
...
@@ -25,8 +25,7 @@ class BuccProcessor(classifier_data_lib.DataProcessor):
"""Procssor for Xtreme BUCC data set."""
supported_languages
=
[
"de"
,
"fr"
,
"ru"
,
"zh"
]
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
super
(
BuccProcessor
,
self
).
__init__
(
process_text_fn
)
self
.
languages
=
BuccProcessor
.
supported_languages
...
...
@@ -66,8 +65,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor):
"nl"
,
"pt"
,
"ru"
,
"sw"
,
"ta"
,
"te"
,
"th"
,
"tl"
,
"tr"
,
"ur"
,
"vi"
,
"zh"
]
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
def
__init__
(
self
,
process_text_fn
=
tokenization
.
convert_to_unicode
):
super
(
TatoebaProcessor
,
self
).
__init__
(
process_text_fn
)
self
.
languages
=
TatoebaProcessor
.
supported_languages
...
...
official/nlp/data/squad_lib.py
View file @
88253ce5
...
...
@@ -24,6 +24,7 @@ import copy
import
json
import
math
import
os
import
six
from
absl
import
logging
...
...
@@ -40,8 +41,8 @@ class SquadExample(object):
Attributes:
qas_id: ID of the question-answer pair.
question_text: Original text for the question.
doc_tokens: The list of tokens in the context obtained by splitting
on
whitespace only.
doc_tokens: The list of tokens in the context obtained by splitting
on
whitespace only.
orig_answer_text: Original text for the answer.
start_position: Starting index of the answer in `doc_tokens`.
end_position: Ending index of the answer in `doc_tokens`.
...
...
@@ -209,8 +210,8 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
#
# Note that this means for training mode, every example is NOT
# guaranteed to be preserved.
actual_text
=
" "
.
join
(
doc_tokens
[
start_position
:(
end_position
+
1
)])
actual_text
=
" "
.
join
(
doc_tokens
[
start_position
:(
end_position
+
1
)])
cleaned_answer_text
=
" "
.
join
(
tokenization
.
whitespace_tokenize
(
orig_answer_text
))
if
actual_text
.
find
(
cleaned_answer_text
)
==
-
1
:
...
...
@@ -520,15 +521,16 @@ def write_predictions(all_examples,
logging
.
info
(
"Writing nbest to: %s"
,
(
output_nbest_file
))
all_predictions
,
all_nbest_json
,
scores_diff_json
=
(
postprocess_output
(
all_examples
=
all_examples
,
all_features
=
all_features
,
all_results
=
all_results
,
n_best_size
=
n_best_size
,
max_answer_length
=
max_answer_length
,
do_lower_case
=
do_lower_case
,
version_2_with_negative
=
version_2_with_negative
,
null_score_diff_threshold
=
null_score_diff_threshold
,
verbose
=
verbose
))
postprocess_output
(
all_examples
=
all_examples
,
all_features
=
all_features
,
all_results
=
all_results
,
n_best_size
=
n_best_size
,
max_answer_length
=
max_answer_length
,
do_lower_case
=
do_lower_case
,
version_2_with_negative
=
version_2_with_negative
,
null_score_diff_threshold
=
null_score_diff_threshold
,
verbose
=
verbose
))
write_to_json_files
(
all_predictions
,
output_prediction_file
)
write_to_json_files
(
all_nbest_json
,
output_nbest_file
)
...
...
official/nlp/data/squad_lib_sp.py
View file @
88253ce5
...
...
@@ -27,6 +27,7 @@ import copy
import
json
import
math
import
os
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -246,6 +247,7 @@ def convert_examples_to_features(examples,
f
=
np
.
zeros
((
max_n
,
max_m
),
dtype
=
np
.
float32
)
g
=
{}
# pylint: disable=cell-var-from-loop
def
_lcs_match
(
max_dist
,
n
=
n
,
m
=
m
):
"""Longest-common-substring algorithm."""
...
...
@@ -277,6 +279,7 @@ def convert_examples_to_features(examples,
remove_space
=
False
)
==
tok_cat_text
[
j
]
and
f_prev
+
1
>
f
[
i
,
j
]):
g
[(
i
,
j
)]
=
2
f
[
i
,
j
]
=
f_prev
+
1
# pylint: enable=cell-var-from-loop
max_dist
=
abs
(
n
-
m
)
+
5
...
...
@@ -580,15 +583,16 @@ def write_predictions(all_examples,
logging
.
info
(
"Writing nbest to: %s"
,
(
output_nbest_file
))
all_predictions
,
all_nbest_json
,
scores_diff_json
=
(
postprocess_output
(
all_examples
=
all_examples
,
all_features
=
all_features
,
all_results
=
all_results
,
n_best_size
=
n_best_size
,
max_answer_length
=
max_answer_length
,
do_lower_case
=
do_lower_case
,
version_2_with_negative
=
version_2_with_negative
,
null_score_diff_threshold
=
null_score_diff_threshold
,
verbose
=
verbose
))
postprocess_output
(
all_examples
=
all_examples
,
all_features
=
all_features
,
all_results
=
all_results
,
n_best_size
=
n_best_size
,
max_answer_length
=
max_answer_length
,
do_lower_case
=
do_lower_case
,
version_2_with_negative
=
version_2_with_negative
,
null_score_diff_threshold
=
null_score_diff_threshold
,
verbose
=
verbose
))
write_to_json_files
(
all_predictions
,
output_prediction_file
)
write_to_json_files
(
all_nbest_json
,
output_nbest_file
)
...
...
official/nlp/data/tagging_data_lib.py
View file @
88253ce5
...
...
@@ -267,12 +267,12 @@ def write_example_to_file(examples,
logging
.
info
(
"Writing example %d of %d to %s"
,
ex_index
,
len
(
examples
),
output_file
)
tokenized_examples
=
_tokenize_example
(
example
,
max_seq_length
,
tokenizer
,
text_preprocessing
)
tokenized_examples
=
_tokenize_example
(
example
,
max_seq_length
,
tokenizer
,
text_preprocessing
)
num_tokenized_examples
+=
len
(
tokenized_examples
)
for
per_tokenized_example
in
tokenized_examples
:
tf_example
=
_convert_single_example
(
per_tokenized_example
,
max_seq_length
,
tokenizer
)
tf_example
=
_convert_single_example
(
per_tokenized_example
,
max_seq_length
,
tokenizer
)
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
close
()
...
...
@@ -307,17 +307,16 @@ def token_classification_meta_data(train_data_size,
return
meta_data
def
generate_tf_record_from_data_file
(
processor
,
data_dir
,
tokenizer
,
max_seq_length
,
train_data_output_path
,
def
generate_tf_record_from_data_file
(
processor
,
data_dir
,
tokenizer
,
max_seq_length
,
train_data_output_path
,
eval_data_output_path
,
test_data_output_path
,
text_preprocessing
):
"""Generates tfrecord files from the raw data."""
common_kwargs
=
dict
(
tokenizer
=
tokenizer
,
max_seq_length
=
max_seq_length
,
text_preprocessing
=
text_preprocessing
)
common_kwargs
=
dict
(
tokenizer
=
tokenizer
,
max_seq_length
=
max_seq_length
,
text_preprocessing
=
text_preprocessing
)
train_examples
=
processor
.
get_train_examples
(
data_dir
)
train_data_size
=
write_example_to_file
(
train_examples
,
output_file
=
train_data_output_path
,
**
common_kwargs
)
...
...
official/nlp/data/tagging_data_loader.py
View file @
88253ce5
...
...
@@ -15,6 +15,7 @@
# ==============================================================================
"""Loads dataset for the tagging (e.g., NER/POS) task."""
from
typing
import
Mapping
,
Optional
import
dataclasses
import
tensorflow
as
tf
...
...
official/nlp/modeling/layers/dense_einsum.py
View file @
88253ce5
...
...
@@ -59,9 +59,8 @@ class DenseEinsum(tf.keras.layers.Layer):
`(batch_size, units)`.
"""
@
deprecation
.
deprecated
(
None
,
"DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead."
)
@
deprecation
.
deprecated
(
None
,
"DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead."
)
def
__init__
(
self
,
output_shape
,
num_summed_dimensions
=
1
,
...
...
official/nlp/modeling/layers/gated_feedforward.py
View file @
88253ce5
...
...
@@ -36,19 +36,19 @@ class GatedFeedforward(tf.keras.layers.Layer):
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
dropout: Dropout probability for the output dropout.
use_gate: Whether to use gated linear units. If True, assuming `GELU` as
the
activation and omitting bias, will apply
`GEGLU(x, W, V, W_2) = (GEGLU(xW)
* xV)W2`; if False, will follow
"Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper
and
apply `FFN(x, W, W_2) = GELU(xW_1)W_2.`
num_blocks: The number of feedforward blocks to stack. Each block contains
a
(gated) linear layer and a fully connected layer followed by dropout,
use_gate: Whether to use gated linear units. If True, assuming `GELU` as
the
activation and omitting bias, will apply
`GEGLU(x, W, V, W_2) = (GEGLU(xW)
* xV)W2`; if False, will follow
"Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper
and
apply `FFN(x, W, W_2) = GELU(xW_1)W_2.`
num_blocks: The number of feedforward blocks to stack. Each block contains
a
(gated) linear layer and a fully connected layer followed by dropout,
layer norm and residual.
dropout_position: Where to apply the dropout, the value can be either
`before_residual` or `after_residual`. If `before_residual`, will apply
`layer_output = layer_norm(dropout(layer_output) + layer_input)`;
if
`after residual`, will apply
`layer_output =
dropout(layer_norm(layer_output + layer_input))`.
`layer_output = layer_norm(dropout(layer_output) + layer_input)`;
if
`after residual`, will apply
`layer_output =
dropout(layer_norm(layer_output + layer_input))`.
kernel_initializer: Initializer for dense layer kernels.
bias_initializer: Initializer for dense layer biases.
kernel_regularizer: Regularizer for dense layer kernels.
...
...
@@ -124,8 +124,9 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes
=
"d"
,
name
=
"intermediate_%d"
%
i
,
**
common_kwargs
))
self
.
_intermediate_activation_layers
.
append
(
tf
.
keras
.
layers
.
Activation
(
self
.
_intermediate_activation
,
dtype
=
activation_policy
))
self
.
_intermediate_activation_layers
.
append
(
tf
.
keras
.
layers
.
Activation
(
self
.
_intermediate_activation
,
dtype
=
activation_policy
))
if
self
.
_use_gate
:
self
.
_gate_dense
.
append
(
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
...
...
@@ -141,8 +142,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_axes
=
"d"
,
name
=
"output_%d"
%
i
,
**
common_kwargs
))
self
.
_output_dropout
.
append
(
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_dropout
))
self
.
_output_dropout
.
append
(
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_dropout
))
# Use float32 in layernorm for numeric stability.
self
.
_output_layer_norm
.
append
(
tf
.
keras
.
layers
.
LayerNormalization
(
...
...
official/nlp/modeling/layers/gated_feedforward_test.py
View file @
88253ce5
...
...
@@ -123,5 +123,6 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
test_layer
.
get_config
(),
new_layer
.
get_config
())
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/masked_lm_test.py
View file @
88253ce5
...
...
@@ -49,8 +49,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
# Create a maskedLM from the transformer stack.
test_layer
=
masked_lm
.
MaskedLM
(
embedding_table
=
xformer_stack
.
get_embedding_table
(),
output
=
output
)
embedding_table
=
xformer_stack
.
get_embedding_table
(),
output
=
output
)
return
test_layer
def
test_layer_creation
(
self
):
...
...
@@ -59,8 +58,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size
=
64
num_predictions
=
21
test_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
)
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
)
# Make sure that the output tensor of the masked LM is the right shape.
lm_input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
hidden_size
))
...
...
@@ -127,8 +125,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
hidden_size
=
64
num_predictions
=
21
test_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
)
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
)
# Create a model from the masked LM layer.
lm_input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
,
hidden_size
))
...
...
@@ -147,8 +144,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
def
test_unknown_output_type_fails
(
self
):
with
self
.
assertRaisesRegex
(
ValueError
,
'Unknown `output` value "bad".*'
):
_
=
self
.
create_layer
(
vocab_size
=
8
,
hidden_size
=
8
,
output
=
'bad'
)
_
=
self
.
create_layer
(
vocab_size
=
8
,
hidden_size
=
8
,
output
=
'bad'
)
if
__name__
==
'__main__'
:
...
...
official/nlp/modeling/layers/on_device_embedding.py
View file @
88253ce5
...
...
@@ -92,5 +92,5 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
tf
.
concat
([
tf
.
shape
(
inputs
),
[
self
.
_embedding_width
]],
axis
=
0
))
embeddings
.
set_shape
(
inputs
.
shape
.
as_list
()
+
[
self
.
_embedding_width
])
if
self
.
_use_scale
:
embeddings
*=
self
.
_embedding_width
**
0.5
embeddings
*=
self
.
_embedding_width
**
0.5
return
embeddings
official/nlp/modeling/layers/on_device_embedding_test.py
View file @
88253ce5
...
...
@@ -89,8 +89,7 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
embedding_width
=
27
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
"mixed_float16"
)
test_layer
=
on_device_embedding
.
OnDeviceEmbedding
(
vocab_size
=
vocab_size
,
embedding_width
=
embedding_width
,
dtype
=
policy
)
vocab_size
=
vocab_size
,
embedding_width
=
embedding_width
,
dtype
=
policy
)
# Create a 2-dimensional input (the first dimension is implicit).
sequence_length
=
23
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
sequence_length
),
dtype
=
tf
.
int32
)
...
...
@@ -214,5 +213,6 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase):
output
=
model
.
predict
(
input_data
)
self
.
assertEqual
(
tf
.
float32
,
output
.
dtype
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/position_embedding.py
View file @
88253ce5
...
...
@@ -171,22 +171,20 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inputs: An tensor whose second dimension will be used as `length`. If
`None`, the other `length` argument must be specified.
length: An optional integer specifying the number of positions. If both
`inputs` and `length` are spcified, `length` must be equal to the
second
dimension of `inputs`.
`inputs` and `length` are spcified, `length` must be equal to the
second
dimension of `inputs`.
Returns:
A tensor in shape of [length, hidden_size].
"""
if
inputs
is
None
and
length
is
None
:
raise
ValueError
(
"If inputs is None, `length` must be set in "
"RelativePositionEmbedding()."
)
raise
ValueError
(
"If inputs is None, `length` must be set in "
"RelativePositionEmbedding()."
)
if
inputs
is
not
None
:
input_shape
=
tf_utils
.
get_shape_list
(
inputs
)
if
length
is
not
None
and
length
!=
input_shape
[
1
]:
raise
ValueError
(
"If inputs is not None, `length` must equal to input_shape[1]."
)
"If inputs is not None, `length` must equal to input_shape[1]."
)
length
=
input_shape
[
1
]
position
=
tf
.
cast
(
tf
.
range
(
length
),
tf
.
float32
)
num_timescales
=
self
.
_hidden_size
//
2
...
...
@@ -197,8 +195,8 @@ class RelativePositionEmbedding(tf.keras.layers.Layer):
inv_timescales
=
min_timescale
*
tf
.
exp
(
tf
.
cast
(
tf
.
range
(
num_timescales
),
tf
.
float32
)
*
-
log_timescale_increment
)
scaled_time
=
tf
.
expand_dims
(
position
,
1
)
*
tf
.
expand_dims
(
inv_timescales
,
0
)
position_embeddings
=
tf
.
concat
(
[
tf
.
sin
(
scaled_time
),
tf
.
cos
(
scaled_time
)],
axis
=
1
)
scaled_time
=
tf
.
expand_dims
(
position
,
1
)
*
tf
.
expand_dims
(
inv_timescales
,
0
)
position_embeddings
=
tf
.
concat
(
[
tf
.
sin
(
scaled_time
),
tf
.
cos
(
scaled_time
)],
axis
=
1
)
return
position_embeddings
official/nlp/modeling/layers/position_embedding_test.py
View file @
88253ce5
...
...
@@ -127,5 +127,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
expected_output_tensor
=
tf
.
constant
([[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
self
.
assertAllEqual
(
output_tensor
,
expected_output_tensor
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/modeling/layers/rezero_transformer.py
View file @
88253ce5
...
...
@@ -161,7 +161,8 @@ class ReZeroTransformer(tf.keras.layers.Layer):
self
.
_rezero_a
=
self
.
add_weight
(
name
=
"rezero_alpha"
,
initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
True
,
dtype
=
tf
.
float32
)
trainable
=
True
,
dtype
=
tf
.
float32
)
super
(
ReZeroTransformer
,
self
).
build
(
input_shape
)
...
...
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment