Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a62c2bfc
Commit
a62c2bfc
authored
Jul 30, 2020
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Jul 30, 2020
Browse files
Internal change
PiperOrigin-RevId: 324137297
parent
76f760c4
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
40 additions
and
79 deletions
+40
-79
official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py
...ial/nlp/albert/tf2_albert_encoder_checkpoint_converter.py
+1
-1
official/nlp/bert/bert_models.py
official/nlp/bert/bert_models.py
+3
-4
official/nlp/bert/bert_models_test.py
official/nlp/bert/bert_models_test.py
+4
-10
official/nlp/bert/tf2_encoder_checkpoint_converter.py
official/nlp/bert/tf2_encoder_checkpoint_converter.py
+1
-1
official/nlp/configs/encoders.py
official/nlp/configs/encoders.py
+0
-2
official/nlp/modeling/layers/masked_lm_test.py
official/nlp/modeling/layers/masked_lm_test.py
+1
-8
official/nlp/modeling/models/bert_classifier_test.py
official/nlp/modeling/models/bert_classifier_test.py
+3
-3
official/nlp/modeling/models/bert_pretrainer.py
official/nlp/modeling/models/bert_pretrainer.py
+2
-1
official/nlp/modeling/models/bert_span_labeler_test.py
official/nlp/modeling/models/bert_span_labeler_test.py
+4
-5
official/nlp/modeling/models/electra_pretrainer.py
official/nlp/modeling/models/electra_pretrainer.py
+0
-4
official/nlp/modeling/models/electra_pretrainer_test.py
official/nlp/modeling/models/electra_pretrainer_test.py
+6
-3
official/nlp/modeling/networks/albert_transformer_encoder.py
official/nlp/modeling/networks/albert_transformer_encoder.py
+4
-11
official/nlp/modeling/networks/albert_transformer_encoder_test.py
.../nlp/modeling/networks/albert_transformer_encoder_test.py
+0
-4
official/nlp/modeling/networks/encoder_scaffold.py
official/nlp/modeling/networks/encoder_scaffold.py
+4
-3
official/nlp/modeling/networks/transformer_encoder.py
official/nlp/modeling/networks/transformer_encoder.py
+7
-11
official/nlp/modeling/networks/transformer_encoder_test.py
official/nlp/modeling/networks/transformer_encoder_test.py
+0
-7
official/nlp/nhnet/models.py
official/nlp/nhnet/models.py
+0
-1
No files found.
official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py
View file @
a62c2bfc
...
...
@@ -86,7 +86,7 @@ def _create_albert_model(cfg):
activation
=
activations
.
gelu
,
dropout_rate
=
cfg
.
hidden_dropout_prob
,
attention_dropout_rate
=
cfg
.
attention_probs_dropout_prob
,
sequence_length
=
cfg
.
max_position_embeddings
,
max_
sequence_length
=
cfg
.
max_position_embeddings
,
type_vocab_size
=
cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
))
...
...
official/nlp/bert/bert_models.py
View file @
a62c2bfc
...
...
@@ -104,14 +104,14 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer):
@
gin
.
configurable
def
get_transformer_encoder
(
bert_config
,
sequence_length
,
sequence_length
=
None
,
transformer_encoder_cls
=
None
,
output_range
=
None
):
"""Gets a 'TransformerEncoder' object.
Args:
bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object.
sequence_length:
Maximum sequence length of the training data
.
sequence_length:
[Deprecated]
.
transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the
default BERT encoder implementation.
output_range: the sequence output range, [0, output_range). Default setting
...
...
@@ -120,13 +120,13 @@ def get_transformer_encoder(bert_config,
Returns:
A networks.TransformerEncoder object.
"""
del
sequence_length
if
transformer_encoder_cls
is
not
None
:
# TODO(hongkuny): evaluate if it is better to put cfg definition in gin.
embedding_cfg
=
dict
(
vocab_size
=
bert_config
.
vocab_size
,
type_vocab_size
=
bert_config
.
type_vocab_size
,
hidden_size
=
bert_config
.
hidden_size
,
seq_length
=
sequence_length
,
max_seq_length
=
bert_config
.
max_position_embeddings
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
bert_config
.
initializer_range
),
...
...
@@ -161,7 +161,6 @@ def get_transformer_encoder(bert_config,
activation
=
tf_utils
.
get_activation
(
bert_config
.
hidden_act
),
dropout_rate
=
bert_config
.
hidden_dropout_prob
,
attention_dropout_rate
=
bert_config
.
attention_probs_dropout_prob
,
sequence_length
=
sequence_length
,
max_sequence_length
=
bert_config
.
max_position_embeddings
,
type_vocab_size
=
bert_config
.
type_vocab_size
,
embedding_width
=
bert_config
.
embedding_size
,
...
...
official/nlp/bert/bert_models_test.py
View file @
a62c2bfc
...
...
@@ -56,8 +56,6 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from encoder: sequence and classification output.
self
.
assertIsInstance
(
encoder
.
output
,
list
)
self
.
assertLen
(
encoder
.
output
,
2
)
# shape should be [batch size, seq_length, hidden_size]
self
.
assertEqual
(
encoder
.
output
[
0
].
shape
.
as_list
(),
[
None
,
5
,
16
])
# shape should be [batch size, hidden_size]
self
.
assertEqual
(
encoder
.
output
[
1
].
shape
.
as_list
(),
[
None
,
16
])
...
...
@@ -74,16 +72,12 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from model: start positions and end positions
self
.
assertIsInstance
(
model
.
output
,
list
)
self
.
assertLen
(
model
.
output
,
2
)
# shape should be [batch size, seq_length]
self
.
assertEqual
(
model
.
output
[
0
].
shape
.
as_list
(),
[
None
,
5
])
# shape should be [batch size, seq_length]
self
.
assertEqual
(
model
.
output
[
1
].
shape
.
as_list
(),
[
None
,
5
])
# Expect two output from core_model: sequence and classification output.
self
.
assertIsInstance
(
core_model
.
output
,
list
)
self
.
assertLen
(
core_model
.
output
,
2
)
# shape should be [batch size,
seq_length
, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
0
].
shape
.
as_list
(),
[
None
,
5
,
16
])
# shape should be [batch size,
None
, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
0
].
shape
.
as_list
(),
[
None
,
None
,
16
])
# shape should be [batch size, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
1
].
shape
.
as_list
(),
[
None
,
16
])
...
...
@@ -104,8 +98,8 @@ class BertModelsTest(tf.test.TestCase):
# Expect two output from core_model: sequence and classification output.
self
.
assertIsInstance
(
core_model
.
output
,
list
)
self
.
assertLen
(
core_model
.
output
,
2
)
# shape should be [batch size,
1
, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
0
].
shape
.
as_list
(),
[
None
,
1
,
16
])
# shape should be [batch size,
None
, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
0
].
shape
.
as_list
(),
[
None
,
None
,
16
])
# shape should be [batch size, hidden_size]
self
.
assertEqual
(
core_model
.
output
[
1
].
shape
.
as_list
(),
[
None
,
16
])
...
...
official/nlp/bert/tf2_encoder_checkpoint_converter.py
View file @
a62c2bfc
...
...
@@ -61,7 +61,7 @@ def _create_bert_model(cfg):
activation
=
activations
.
gelu
,
dropout_rate
=
cfg
.
hidden_dropout_prob
,
attention_dropout_rate
=
cfg
.
attention_probs_dropout_prob
,
sequence_length
=
cfg
.
max_position_embeddings
,
max_
sequence_length
=
cfg
.
max_position_embeddings
,
type_vocab_size
=
cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
cfg
.
initializer_range
),
...
...
official/nlp/configs/encoders.py
View file @
a62c2bfc
...
...
@@ -54,7 +54,6 @@ def instantiate_encoder_from_cfg(
vocab_size
=
config
.
vocab_size
,
type_vocab_size
=
config
.
type_vocab_size
,
hidden_size
=
config
.
hidden_size
,
seq_length
=
None
,
max_seq_length
=
config
.
max_position_embeddings
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
config
.
initializer_range
),
...
...
@@ -90,7 +89,6 @@ def instantiate_encoder_from_cfg(
activation
=
tf_utils
.
get_activation
(
config
.
hidden_activation
),
dropout_rate
=
config
.
dropout_rate
,
attention_dropout_rate
=
config
.
attention_dropout_rate
,
sequence_length
=
None
,
max_sequence_length
=
config
.
max_position_embeddings
,
type_vocab_size
=
config
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
...
...
official/nlp/modeling/layers/masked_lm_test.py
View file @
a62c2bfc
...
...
@@ -34,7 +34,6 @@ class MaskedLMTest(keras_parameterized.TestCase):
def
create_layer
(
self
,
vocab_size
,
sequence_length
,
hidden_size
,
output
=
'predictions'
,
xformer_stack
=
None
):
...
...
@@ -44,7 +43,6 @@ class MaskedLMTest(keras_parameterized.TestCase):
xformer_stack
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
1
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
,
num_attention_heads
=
4
,
)
...
...
@@ -62,7 +60,6 @@ class MaskedLMTest(keras_parameterized.TestCase):
num_predictions
=
21
test_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
)
# Make sure that the output tensor of the masked LM is the right shape.
...
...
@@ -81,19 +78,16 @@ class MaskedLMTest(keras_parameterized.TestCase):
xformer_stack
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
1
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
,
num_attention_heads
=
4
,
)
test_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
,
xformer_stack
=
xformer_stack
,
output
=
'predictions'
)
logit_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
,
xformer_stack
=
xformer_stack
,
output
=
'logits'
)
...
...
@@ -134,7 +128,6 @@ class MaskedLMTest(keras_parameterized.TestCase):
num_predictions
=
21
test_layer
=
self
.
create_layer
(
vocab_size
=
vocab_size
,
sequence_length
=
sequence_length
,
hidden_size
=
hidden_size
)
# Create a model from the masked LM layer.
...
...
@@ -155,7 +148,7 @@ class MaskedLMTest(keras_parameterized.TestCase):
def
test_unknown_output_type_fails
(
self
):
with
self
.
assertRaisesRegex
(
ValueError
,
'Unknown `output` value "bad".*'
):
_
=
self
.
create_layer
(
vocab_size
=
8
,
sequence_length
=
8
,
hidden_size
=
8
,
output
=
'bad'
)
vocab_size
=
8
,
hidden_size
=
8
,
output
=
'bad'
)
if
__name__
==
'__main__'
:
...
...
official/nlp/modeling/models/bert_classifier_test.py
View file @
a62c2bfc
...
...
@@ -38,7 +38,7 @@ class BertClassifierTest(keras_parameterized.TestCase):
vocab_size
=
100
sequence_length
=
512
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
2
,
sequence_length
=
sequence_length
)
vocab_size
=
vocab_size
,
num_layers
=
2
)
# Create a BERT trainer with the created network.
bert_trainer_model
=
bert_classifier
.
BertClassifier
(
...
...
@@ -62,7 +62,7 @@ class BertClassifierTest(keras_parameterized.TestCase):
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
100
,
num_layers
=
2
,
sequence_length
=
2
)
vocab_size
=
100
,
num_layers
=
2
)
# Create a BERT trainer with the created network.
bert_trainer_model
=
bert_classifier
.
BertClassifier
(
...
...
@@ -83,7 +83,7 @@ class BertClassifierTest(keras_parameterized.TestCase):
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
100
,
num_layers
=
2
,
sequence_length
=
5
)
vocab_size
=
100
,
num_layers
=
2
)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
...
...
official/nlp/modeling/models/bert_pretrainer.py
View file @
a62c2bfc
...
...
@@ -94,7 +94,8 @@ class BertPretrainer(tf.keras.Model):
if
isinstance
(
cls_output
,
list
):
cls_output
=
cls_output
[
-
1
]
sequence_output_length
=
sequence_output
.
shape
.
as_list
()[
1
]
if
sequence_output_length
<
num_token_predictions
:
if
sequence_output_length
is
not
None
and
(
sequence_output_length
<
num_token_predictions
):
raise
ValueError
(
"The passed network's output length is %s, which is less than the "
'requested num_token_predictions %s.'
%
...
...
official/nlp/modeling/models/bert_span_labeler_test.py
View file @
a62c2bfc
...
...
@@ -36,7 +36,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
vocab_size
=
100
sequence_length
=
512
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
2
,
sequence_length
=
sequence_length
)
vocab_size
=
vocab_size
,
num_layers
=
2
)
# Create a BERT trainer with the created network.
bert_trainer_model
=
bert_span_labeler
.
BertSpanLabeler
(
test_network
)
...
...
@@ -59,9 +59,8 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
"""Validate compilation using explicit output names."""
# Build a transformer network to use within the BERT trainer.
vocab_size
=
100
sequence_length
=
512
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
2
,
sequence_length
=
sequence_length
)
vocab_size
=
vocab_size
,
num_layers
=
2
)
# Create a BERT trainer with the created network.
bert_trainer_model
=
bert_span_labeler
.
BertSpanLabeler
(
test_network
)
...
...
@@ -81,7 +80,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
100
,
num_layers
=
2
,
sequence_length
=
2
)
vocab_size
=
100
,
num_layers
=
2
)
# Create a BERT trainer with the created network.
bert_trainer_model
=
bert_span_labeler
.
BertSpanLabeler
(
test_network
)
...
...
@@ -101,7 +100,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network
=
networks
.
TransformerEncoder
(
vocab_size
=
100
,
num_layers
=
2
,
sequence_length
=
5
)
vocab_size
=
100
,
num_layers
=
2
)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
...
...
official/nlp/modeling/models/electra_pretrainer.py
View file @
a62c2bfc
...
...
@@ -50,7 +50,6 @@ class ElectraPretrainer(tf.keras.Model):
vocab_size: Size of generator output vocabulary
num_classes: Number of classes to predict from the classification network
for the generator network (not used now)
sequence_length: Input sequence length
num_token_predictions: Number of tokens to predict from the masked LM.
mlm_activation: The activation (if any) to use in the masked LM and
classification networks. If None, no activation will be used.
...
...
@@ -67,7 +66,6 @@ class ElectraPretrainer(tf.keras.Model):
discriminator_network
,
vocab_size
,
num_classes
,
sequence_length
,
num_token_predictions
,
mlm_activation
=
None
,
mlm_initializer
=
'glorot_uniform'
,
...
...
@@ -80,7 +78,6 @@ class ElectraPretrainer(tf.keras.Model):
'discriminator_network'
:
discriminator_network
,
'vocab_size'
:
vocab_size
,
'num_classes'
:
num_classes
,
'sequence_length'
:
sequence_length
,
'num_token_predictions'
:
num_token_predictions
,
'mlm_activation'
:
mlm_activation
,
'mlm_initializer'
:
mlm_initializer
,
...
...
@@ -94,7 +91,6 @@ class ElectraPretrainer(tf.keras.Model):
self
.
discriminator_network
=
discriminator_network
self
.
vocab_size
=
vocab_size
self
.
num_classes
=
num_classes
self
.
sequence_length
=
sequence_length
self
.
num_token_predictions
=
num_token_predictions
self
.
mlm_activation
=
mlm_activation
self
.
mlm_initializer
=
mlm_initializer
...
...
official/nlp/modeling/models/electra_pretrainer_test.py
View file @
a62c2bfc
...
...
@@ -36,9 +36,13 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
vocab_size
=
100
sequence_length
=
512
test_generator_network
=
networks
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
2
,
sequence_length
=
sequence_length
)
vocab_size
=
vocab_size
,
num_layers
=
2
,
max_sequence_length
=
sequence_length
)
test_discriminator_network
=
networks
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
num_layers
=
2
,
sequence_length
=
sequence_length
)
vocab_size
=
vocab_size
,
num_layers
=
2
,
max_sequence_length
=
sequence_length
)
# Create a ELECTRA trainer with the created network.
num_classes
=
3
...
...
@@ -48,7 +52,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
discriminator_network
=
test_discriminator_network
,
vocab_size
=
vocab_size
,
num_classes
=
num_classes
,
sequence_length
=
sequence_length
,
num_token_predictions
=
num_token_predictions
,
disallow_correct
=
True
)
...
...
official/nlp/modeling/networks/albert_transformer_encoder.py
View file @
a62c2bfc
...
...
@@ -53,9 +53,6 @@ class AlbertTransformerEncoder(tf.keras.Model):
num_layers: The number of transformer layers.
num_attention_heads: The number of attention heads for each transformer. The
hidden size must be divisible by the number of attention heads.
sequence_length: The sequence length that this encoder expects. If None, the
sequence length is dynamic; if an integer, the encoder will require
sequences padded to this length.
max_sequence_length: The maximum sequence length that this encoder can
consume. If None, max_sequence_length uses the value from sequence length.
This determines the variable shape for positional embeddings.
...
...
@@ -74,8 +71,7 @@ class AlbertTransformerEncoder(tf.keras.Model):
hidden_size
=
768
,
num_layers
=
12
,
num_attention_heads
=
12
,
sequence_length
=
512
,
max_sequence_length
=
None
,
max_sequence_length
=
512
,
type_vocab_size
=
16
,
intermediate_size
=
3072
,
activation
=
activations
.
gelu
,
...
...
@@ -86,8 +82,6 @@ class AlbertTransformerEncoder(tf.keras.Model):
activation
=
tf
.
keras
.
activations
.
get
(
activation
)
initializer
=
tf
.
keras
.
initializers
.
get
(
initializer
)
if
not
max_sequence_length
:
max_sequence_length
=
sequence_length
self
.
_self_setattr_tracking
=
False
self
.
_config_dict
=
{
'vocab_size'
:
vocab_size
,
...
...
@@ -95,7 +89,6 @@ class AlbertTransformerEncoder(tf.keras.Model):
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_attention_heads'
:
num_attention_heads
,
'sequence_length'
:
sequence_length
,
'max_sequence_length'
:
max_sequence_length
,
'type_vocab_size'
:
type_vocab_size
,
'intermediate_size'
:
intermediate_size
,
...
...
@@ -106,11 +99,11 @@ class AlbertTransformerEncoder(tf.keras.Model):
}
word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_word_ids'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_word_ids'
)
mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_mask'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_mask'
)
type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_type_ids'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_type_ids'
)
if
embedding_width
is
None
:
embedding_width
=
hidden_size
...
...
official/nlp/modeling/networks/albert_transformer_encoder_test.py
View file @
a62c2bfc
...
...
@@ -48,7 +48,6 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
kwargs
=
dict
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
)
if
expected_dtype
==
tf
.
float16
:
...
...
@@ -92,7 +91,6 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
vocab_size
=
vocab_size
,
embedding_width
=
8
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
type_vocab_size
=
num_types
)
...
...
@@ -123,7 +121,6 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
vocab_size
=
vocab_size
,
embedding_width
=
8
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
max_sequence_length
=
max_sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
...
...
@@ -141,7 +138,6 @@ class AlbertTransformerEncoderTest(keras_parameterized.TestCase):
hidden_size
=
32
,
num_layers
=
3
,
num_attention_heads
=
2
,
sequence_length
=
21
,
max_sequence_length
=
21
,
type_vocab_size
=
12
,
intermediate_size
=
1223
,
...
...
official/nlp/modeling/networks/encoder_scaffold.py
View file @
a62c2bfc
...
...
@@ -129,16 +129,17 @@ class EncoderScaffold(tf.keras.Model):
embeddings
,
attention_mask
=
self
.
_embedding_network
(
inputs
)
else
:
self
.
_embedding_network
=
None
seq_length
=
embedding_cfg
.
get
(
'seq_length'
,
None
)
word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
embedding_cfg
[
'
seq_length
'
]
,),
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
,
name
=
'input_word_ids'
)
mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
embedding_cfg
[
'
seq_length
'
]
,),
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
,
name
=
'input_mask'
)
type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
embedding_cfg
[
'
seq_length
'
]
,),
shape
=
(
seq_length
,),
dtype
=
tf
.
int32
,
name
=
'input_type_ids'
)
inputs
=
[
word_ids
,
mask
,
type_ids
]
...
...
official/nlp/modeling/networks/transformer_encoder.py
View file @
a62c2bfc
...
...
@@ -48,9 +48,8 @@ class TransformerEncoder(tf.keras.Model):
num_layers: The number of transformer layers.
num_attention_heads: The number of attention heads for each transformer. The
hidden size must be divisible by the number of attention heads.
sequence_length: The sequence length that this encoder expects. If None, the
sequence length is dynamic; if an integer, the encoder will require
sequences padded to this length.
sequence_length: [Deprecated]. TODO(hongkuny): remove this argument once no
user is using it.
max_sequence_length: The maximum sequence length that this encoder can
consume. If None, max_sequence_length uses the value from sequence length.
This determines the variable shape for positional embeddings.
...
...
@@ -83,8 +82,8 @@ class TransformerEncoder(tf.keras.Model):
hidden_size
=
768
,
num_layers
=
12
,
num_attention_heads
=
12
,
sequence_length
=
512
,
max_sequence_length
=
None
,
sequence_length
=
None
,
max_sequence_length
=
512
,
type_vocab_size
=
16
,
intermediate_size
=
3072
,
activation
=
activations
.
gelu
,
...
...
@@ -99,15 +98,12 @@ class TransformerEncoder(tf.keras.Model):
activation
=
tf
.
keras
.
activations
.
get
(
activation
)
initializer
=
tf
.
keras
.
initializers
.
get
(
initializer
)
if
not
max_sequence_length
:
max_sequence_length
=
sequence_length
self
.
_self_setattr_tracking
=
False
self
.
_config_dict
=
{
'vocab_size'
:
vocab_size
,
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_attention_heads'
:
num_attention_heads
,
'sequence_length'
:
sequence_length
,
'max_sequence_length'
:
max_sequence_length
,
'type_vocab_size'
:
type_vocab_size
,
'intermediate_size'
:
intermediate_size
,
...
...
@@ -121,11 +117,11 @@ class TransformerEncoder(tf.keras.Model):
}
word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_word_ids'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_word_ids'
)
mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_mask'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_mask'
)
type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
sequence_length
,),
dtype
=
tf
.
int32
,
name
=
'input_type_ids'
)
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
'input_type_ids'
)
if
embedding_width
is
None
:
embedding_width
=
hidden_size
...
...
official/nlp/modeling/networks/transformer_encoder_test.py
View file @
a62c2bfc
...
...
@@ -42,7 +42,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
)
# Create the inputs (note that the first dimension is implicit).
...
...
@@ -71,7 +70,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
return_all_encoder_outputs
=
True
)
...
...
@@ -100,7 +98,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
100
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
)
# Create the inputs (note that the first dimension is implicit).
...
...
@@ -132,7 +129,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
type_vocab_size
=
num_types
,
...
...
@@ -163,7 +159,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
max_sequence_length
=
max_sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
...
...
@@ -177,7 +172,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
test_network
=
transformer_encoder
.
TransformerEncoder
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
sequence_length
=
sequence_length
,
max_sequence_length
=
max_sequence_length
,
num_attention_heads
=
2
,
num_layers
=
3
,
...
...
@@ -196,7 +190,6 @@ class TransformerEncoderTest(keras_parameterized.TestCase):
hidden_size
=
32
,
num_layers
=
3
,
num_attention_heads
=
2
,
sequence_length
=
21
,
max_sequence_length
=
21
,
type_vocab_size
=
12
,
intermediate_size
=
1223
,
...
...
official/nlp/nhnet/models.py
View file @
a62c2bfc
...
...
@@ -413,7 +413,6 @@ def get_bert2bert_layers(params: configs.BERT2BERTConfig):
activation
=
tf_utils
.
get_activation
(
bert_config
.
hidden_act
),
dropout_rate
=
bert_config
.
hidden_dropout_prob
,
attention_dropout_rate
=
bert_config
.
attention_probs_dropout_prob
,
sequence_length
=
None
,
max_sequence_length
=
bert_config
.
max_position_embeddings
,
type_vocab_size
=
bert_config
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment