"...data/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "1d32722f5b855c01c5079bf76db8d9dc3787692f"
Commit 36d44db9 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 388118554
parent 45e66fd4
......@@ -87,10 +87,8 @@ class BertClassifierTest(keras_parameterized.TestCase):
inner_dim=0, num_classes=4)))
def test_serialize_deserialize(self, cls_head):
"""Validate that the BERT trainer can be serialized and deserialized."""
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=5)
# Build a transformer network to use within the BERT trainer.
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
......
......@@ -67,10 +67,8 @@ class BertPretrainerTest(keras_parameterized.TestCase):
def test_bert_trainer_tensor_call(self):
"""Validate that the Keras object can be invoked."""
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=2)
# Build a transformer network to use within the BERT trainer.
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_pretrainer.BertPretrainer(
......@@ -213,10 +211,8 @@ class BertPretrainerV2Test(keras_parameterized.TestCase):
def test_v2_serialize_deserialize(self):
"""Validate that the BERT trainer can be serialized and deserialized."""
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=5)
# Build a transformer network to use within the BERT trainer.
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
......
......@@ -93,10 +93,8 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
def test_serialize_deserialize(self):
"""Validate that the BERT trainer can be serialized and deserialized."""
# Build a transformer network to use within the BERT trainer. (Here, we use
# a short sequence_length for convenience.)
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=5)
# Build a transformer network to use within the BERT trainer.
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a BERT trainer with the created network. (Note that all the args
# are different, so we can catch any serialization mismatches.)
......
......@@ -37,7 +37,6 @@ class DualEncoderTest(keras_parameterized.TestCase):
vocab_size=vocab_size,
num_layers=2,
hidden_size=hidden_size,
sequence_length=sequence_length,
dict_outputs=True)
# Create a dual encoder model with the created network.
......@@ -72,11 +71,9 @@ class DualEncoderTest(keras_parameterized.TestCase):
@parameterized.parameters((192, 'logits'), (768, 'predictions'))
def test_dual_encoder_tensor_call(self, hidden_size, output):
"""Validate that the Keras object can be invoked."""
# Build a transformer network to use within the dual encoder model. (Here,
# we use # a short sequence_length for convenience.)
# Build a transformer network to use within the dual encoder model.
sequence_length = 2
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=sequence_length)
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a dual encoder model with the created network.
dual_encoder_model = dual_encoder.DualEncoder(
......@@ -98,18 +95,16 @@ class DualEncoderTest(keras_parameterized.TestCase):
def test_serialize_deserialize(self):
"""Validate that the dual encoder model can be serialized / deserialized."""
# Build a transformer network to use within the dual encoder model. (Here,
# we use a short sequence_length for convenience.)
# Build a transformer network to use within the dual encoder model.
sequence_length = 32
test_network = networks.BertEncoder(
vocab_size=100, num_layers=2, sequence_length=sequence_length)
test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
# Create a dual encoder model with the created network. (Note that all the
# args are different, so we can catch any serialization mismatches.)
dual_encoder_model = dual_encoder.DualEncoder(
test_network, max_seq_length=sequence_length, output='predictions')
# Create another dual encoder model via serialization and deserialization.
# Create another dual encoder moel via serialization and deserialization.
config = dual_encoder_model.get_config()
new_dual_encoder = dual_encoder.DualEncoder.from_config(config)
......
......@@ -100,7 +100,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
discriminator_network=test_discriminator_network,
vocab_size=100,
num_classes=2,
sequence_length=3,
num_token_predictions=2)
# Create a set of 2-dimensional data tensors to feed into the model.
......@@ -138,7 +137,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
discriminator_network=test_discriminator_network,
vocab_size=100,
num_classes=2,
sequence_length=3,
num_token_predictions=2)
# Create another BERT trainer via serialization and deserialization.
......
......@@ -15,6 +15,8 @@
"""Transformer-based text encoder network."""
# pylint: disable=g-classes-have-attributes
import collections
from absl import logging
import tensorflow as tf
from official.modeling import activations
......@@ -47,8 +49,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
num_layers: The number of transformer layers.
num_attention_heads: The number of attention heads for each transformer. The
hidden size must be divisible by the number of attention heads.
sequence_length: [Deprecated]. TODO(hongkuny): remove this argument once no
user is using it.
max_sequence_length: The maximum sequence length that this encoder can
consume. If None, max_sequence_length uses the value from sequence length.
This determines the variable shape for positional embeddings.
......@@ -87,7 +87,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
hidden_size=768,
num_layers=12,
num_attention_heads=12,
sequence_length=None,
max_sequence_length=512,
type_vocab_size=16,
intermediate_size=3072,
......@@ -126,6 +125,11 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
embedding_width=embedding_width,
embedding_layer=embedding_layer,
norm_first=norm_first)
if 'sequence_length' in kwargs:
kwargs.pop('sequence_length')
logging.warning('`sequence_length` is a deprecated argument to '
'`BertEncoder`, which has no effect for a while. Please '
'remove `sequence_length` argument.')
self._embedding_layer_instance = embedding_layer
......
......@@ -458,7 +458,6 @@ def get_nhnet_layers(params: configs.NHNetConfig):
activation=tf_utils.get_activation(bert_config.hidden_act),
dropout_rate=bert_config.hidden_dropout_prob,
attention_dropout_rate=bert_config.attention_probs_dropout_prob,
sequence_length=None,
max_sequence_length=bert_config.max_position_embeddings,
type_vocab_size=bert_config.type_vocab_size,
initializer=tf.keras.initializers.TruncatedNormal(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment