Internal change

PiperOrigin-RevId: 388118554

Internal change
PiperOrigin-RevId: 388118554
461b3587 · Hongkun Yu · A. Unique TensorFlower · 992a864b · 461b3587 · 461b3587
Commit 461b3587 authored Aug 01, 2021 by Hongkun Yu Committed by A. Unique TensorFlower Aug 01, 2021
7 changed files
--- a/official/nlp/modeling/models/bert_classifier_test.py
+++ b/official/nlp/modeling/models/bert_classifier_test.py
@@ -87,10 +87,8 @@ class BertClassifierTest(keras_parameterized.TestCase):
          inner_dim=0, num_classes=4)))
  def test_serialize_deserialize(self, cls_head):
    """Validate that the BERT trainer can be serialized and deserialized."""
-    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # Build a transformer network to use within the BERT trainer.
-    # a short sequence_length for convenience.)
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-    test_network = networks.BertEncoder(
-        vocab_size=100, num_layers=2, sequence_length=5)
    # Create a BERT trainer with the created network. (Note that all the args
    # are different, so we can catch any serialization mismatches.)

--- a/official/nlp/modeling/models/bert_pretrainer_test.py
+++ b/official/nlp/modeling/models/bert_pretrainer_test.py
@@ -67,10 +67,8 @@ class BertPretrainerTest(keras_parameterized.TestCase):
  def test_bert_trainer_tensor_call(self):
    """Validate that the Keras object can be invoked."""
-    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # Build a transformer network to use within the BERT trainer.
-    # a short sequence_length for convenience.)
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-    test_network = networks.BertEncoder(
-        vocab_size=100, num_layers=2, sequence_length=2)
    # Create a BERT trainer with the created network.
    bert_trainer_model = bert_pretrainer.BertPretrainer(
@@ -213,10 +211,8 @@ class BertPretrainerV2Test(keras_parameterized.TestCase):
  def test_v2_serialize_deserialize(self):
    """Validate that the BERT trainer can be serialized and deserialized."""
-    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # Build a transformer network to use within the BERT trainer.
-    # a short sequence_length for convenience.)
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-    test_network = networks.BertEncoder(
-        vocab_size=100, num_layers=2, sequence_length=5)
    # Create a BERT trainer with the created network. (Note that all the args
    # are different, so we can catch any serialization mismatches.)

--- a/official/nlp/modeling/models/bert_span_labeler_test.py
+++ b/official/nlp/modeling/models/bert_span_labeler_test.py
@@ -93,10 +93,8 @@ class BertSpanLabelerTest(keras_parameterized.TestCase):
  def test_serialize_deserialize(self):
    """Validate that the BERT trainer can be serialized and deserialized."""
-    # Build a transformer network to use within the BERT trainer. (Here, we use
+    # Build a transformer network to use within the BERT trainer.
-    # a short sequence_length for convenience.)
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-    test_network = networks.BertEncoder(
-        vocab_size=100, num_layers=2, sequence_length=5)
    # Create a BERT trainer with the created network. (Note that all the args
    # are different, so we can catch any serialization mismatches.)

--- a/official/nlp/modeling/models/dual_encoder_test.py
+++ b/official/nlp/modeling/models/dual_encoder_test.py
@@ -37,7 +37,6 @@ class DualEncoderTest(keras_parameterized.TestCase):
        vocab_size=vocab_size,
        num_layers=2,
        hidden_size=hidden_size,
-        sequence_length=sequence_length,
        dict_outputs=True)
    # Create a dual encoder model with the created network.
@@ -72,11 +71,9 @@ class DualEncoderTest(keras_parameterized.TestCase):
  @parameterized.parameters((192, 'logits'), (768, 'predictions'))
  def test_dual_encoder_tensor_call(self, hidden_size, output):
    """Validate that the Keras object can be invoked."""
-    # Build a transformer network to use within the dual encoder model. (Here,
+    # Build a transformer network to use within the dual encoder model.
-    # we use # a short sequence_length for convenience.)
    sequence_length = 2
-    test_network = networks.BertEncoder(
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-        vocab_size=100, num_layers=2, sequence_length=sequence_length)
    # Create a dual encoder model with the created network.
    dual_encoder_model = dual_encoder.DualEncoder(
@@ -98,18 +95,16 @@ class DualEncoderTest(keras_parameterized.TestCase):
  def test_serialize_deserialize(self):
    """Validate that the dual encoder model can be serialized / deserialized."""
-    # Build a transformer network to use within the dual encoder model. (Here,
+    # Build a transformer network to use within the dual encoder model.
-    # we use a short sequence_length for convenience.)
    sequence_length = 32
-    test_network = networks.BertEncoder(
+    test_network = networks.BertEncoder(vocab_size=100, num_layers=2)
-        vocab_size=100, num_layers=2, sequence_length=sequence_length)
    # Create a dual encoder model with the created network. (Note that all the
    # args are different, so we can catch any serialization mismatches.)
    dual_encoder_model = dual_encoder.DualEncoder(
        test_network, max_seq_length=sequence_length, output='predictions')
-    # Create another dual encoder model via serialization and deserialization.
+    # Create another dual encoder moel via serialization and deserialization.
    config = dual_encoder_model.get_config()
    new_dual_encoder = dual_encoder.DualEncoder.from_config(config)

--- a/official/nlp/modeling/models/electra_pretrainer_test.py
+++ b/official/nlp/modeling/models/electra_pretrainer_test.py
@@ -100,7 +100,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
        discriminator_network=test_discriminator_network,
        vocab_size=100,
        num_classes=2,
-        sequence_length=3,
        num_token_predictions=2)
    # Create a set of 2-dimensional data tensors to feed into the model.
@@ -138,7 +137,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase):
        discriminator_network=test_discriminator_network,
        vocab_size=100,
        num_classes=2,
-        sequence_length=3,
        num_token_predictions=2)
    # Create another BERT trainer via serialization and deserialization.

--- a/official/nlp/modeling/networks/bert_encoder.py
+++ b/official/nlp/modeling/networks/bert_encoder.py
@@ -15,6 +15,8 @@
 """Transformer-based text encoder network."""
 # pylint: disable=g-classes-have-attributes
 import collections
+from absl import logging
 import tensorflow as tf
 from official.modeling import activations
@@ -47,8 +49,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
    num_layers: The number of transformer layers.
    num_attention_heads: The number of attention heads for each transformer. The
      hidden size must be divisible by the number of attention heads.
-    sequence_length: [Deprecated]. TODO(hongkuny): remove this argument once no
-      user is using it.
    max_sequence_length: The maximum sequence length that this encoder can
      consume. If None, max_sequence_length uses the value from sequence length.
      This determines the variable shape for positional embeddings.
@@ -87,7 +87,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
               hidden_size=768,
               num_layers=12,
               num_attention_heads=12,
-               sequence_length=None,
               max_sequence_length=512,
               type_vocab_size=16,
               intermediate_size=3072,
@@ -126,6 +125,11 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
        embedding_width=embedding_width,
        embedding_layer=embedding_layer,
        norm_first=norm_first)
+    if 'sequence_length' in kwargs:
+      kwargs.pop('sequence_length')
+      logging.warning('`sequence_length` is a deprecated argument to '
+                      '`BertEncoder`, which has no effect for a while. Please '
+                      'remove `sequence_length` argument.')
    self._embedding_layer_instance = embedding_layer

--- a/official/nlp/nhnet/models.py
+++ b/official/nlp/nhnet/models.py
@@ -458,7 +458,6 @@ def get_nhnet_layers(params: configs.NHNetConfig):
      activation=tf_utils.get_activation(bert_config.hidden_act),
      dropout_rate=bert_config.hidden_dropout_prob,
      attention_dropout_rate=bert_config.attention_probs_dropout_prob,
-      sequence_length=None,
      max_sequence_length=bert_config.max_position_embeddings,
      type_vocab_size=bert_config.type_vocab_size,
      initializer=tf.keras.initializers.TruncatedNormal(