"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "b658b16dc76f34e7d8db91ad2f37aaff3ea8f5dc"
Commit 71a2fc91 authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Clean keras_nlp.BertEncoder:

Remove return_all_encoder_outputs argument from keras_nlp.BertEncoder as it is unused and we will always return all encoder outputs in the returned dict.

PiperOrigin-RevId: 332802761
parent 1308ecdc
...@@ -56,8 +56,6 @@ class BertEncoder(tf.keras.Model): ...@@ -56,8 +56,6 @@ class BertEncoder(tf.keras.Model):
attention_dropout: The dropout rate to use for the attention layers attention_dropout: The dropout rate to use for the attention layers
within the transformer layers. within the transformer layers.
initializer: The initialzer to use for all weights in this encoder. initializer: The initialzer to use for all weights in this encoder.
return_all_encoder_outputs: Whether to output sequence embedding outputs of
all encoder transformer layers.
output_range: The sequence output range, [0, output_range), by slicing the output_range: The sequence output range, [0, output_range), by slicing the
target sequence of the last transformer layer. `None` means the entire target sequence of the last transformer layer. `None` means the entire
target sequence will attend to the source sequence, which yeilds the full target sequence will attend to the source sequence, which yeilds the full
...@@ -82,7 +80,6 @@ class BertEncoder(tf.keras.Model): ...@@ -82,7 +80,6 @@ class BertEncoder(tf.keras.Model):
output_dropout=0.1, output_dropout=0.1,
attention_dropout=0.1, attention_dropout=0.1,
initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
return_all_encoder_outputs=False,
output_range=None, output_range=None,
embedding_width=None, embedding_width=None,
**kwargs): **kwargs):
...@@ -102,7 +99,6 @@ class BertEncoder(tf.keras.Model): ...@@ -102,7 +99,6 @@ class BertEncoder(tf.keras.Model):
'output_dropout': output_dropout, 'output_dropout': output_dropout,
'attention_dropout': attention_dropout, 'attention_dropout': attention_dropout,
'initializer': tf.keras.initializers.serialize(initializer), 'initializer': tf.keras.initializers.serialize(initializer),
'return_all_encoder_outputs': return_all_encoder_outputs,
'output_range': output_range, 'output_range': output_range,
'embedding_width': embedding_width, 'embedding_width': embedding_width,
} }
......
...@@ -69,8 +69,7 @@ class BertEncoderTest(keras_parameterized.TestCase): ...@@ -69,8 +69,7 @@ class BertEncoderTest(keras_parameterized.TestCase):
vocab_size=100, vocab_size=100,
hidden_size=hidden_size, hidden_size=hidden_size,
num_attention_heads=2, num_attention_heads=2,
num_layers=3, num_layers=3)
return_all_encoder_outputs=True)
# Create the inputs (note that the first dimension is implicit). # Create the inputs (note that the first dimension is implicit).
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
...@@ -204,7 +203,6 @@ class BertEncoderTest(keras_parameterized.TestCase): ...@@ -204,7 +203,6 @@ class BertEncoderTest(keras_parameterized.TestCase):
output_dropout=0.05, output_dropout=0.05,
attention_dropout=0.22, attention_dropout=0.22,
initializer="glorot_uniform", initializer="glorot_uniform",
return_all_encoder_outputs=False,
output_range=-1, output_range=-1,
embedding_width=16) embedding_width=16)
network = bert_encoder.BertEncoder(**kwargs) network = bert_encoder.BertEncoder(**kwargs)
......
...@@ -60,7 +60,9 @@ class BertEncoder(keras_nlp.encoders.BertEncoder): ...@@ -60,7 +60,9 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
within the transformer layers. within the transformer layers.
initializer: The initialzer to use for all weights in this encoder. initializer: The initialzer to use for all weights in this encoder.
return_all_encoder_outputs: Whether to output sequence embedding outputs of return_all_encoder_outputs: Whether to output sequence embedding outputs of
all encoder transformer layers. all encoder transformer layers. Note: when the following `dict_outputs`
argument is True, all encoder outputs are always returned in the dict,
keyed by `encoder_outputs`.
output_range: The sequence output range, [0, output_range), by slicing the output_range: The sequence output range, [0, output_range), by slicing the
target sequence of the last transformer layer. `None` means the entire target sequence of the last transformer layer. `None` means the entire
target sequence will attend to the source sequence, which yeilds the full target sequence will attend to the source sequence, which yeilds the full
...@@ -112,7 +114,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder): ...@@ -112,7 +114,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
output_dropout=dropout_rate, output_dropout=dropout_rate,
attention_dropout=attention_dropout_rate, attention_dropout=attention_dropout_rate,
initializer=initializer, initializer=initializer,
return_all_encoder_outputs=return_all_encoder_outputs,
output_range=output_range, output_range=output_range,
embedding_width=embedding_width) embedding_width=embedding_width)
...@@ -123,6 +124,7 @@ class BertEncoder(keras_nlp.encoders.BertEncoder): ...@@ -123,6 +124,7 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
self._config_dict['attention_dropout_rate'] = self._config_dict.pop( self._config_dict['attention_dropout_rate'] = self._config_dict.pop(
'attention_dropout') 'attention_dropout')
self._config_dict['dict_outputs'] = dict_outputs self._config_dict['dict_outputs'] = dict_outputs
self._config_dict['return_all_encoder_outputs'] = return_all_encoder_outputs
if dict_outputs: if dict_outputs:
return return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment