"vscode:/vscode.git/clone" did not exist on "fb41088f854abfdb3824adf9cd3d3569f9ebaabe"
Commit 71a2fc91 authored by Chen Chen's avatar Chen Chen Committed by A. Unique TensorFlower
Browse files

Clean keras_nlp.BertEncoder:

Remove return_all_encoder_outputs argument from keras_nlp.BertEncoder as it is unused and we will always return all encoder outputs in the returned dict.

PiperOrigin-RevId: 332802761
parent 1308ecdc
......@@ -56,8 +56,6 @@ class BertEncoder(tf.keras.Model):
attention_dropout: The dropout rate to use for the attention layers
within the transformer layers.
initializer: The initialzer to use for all weights in this encoder.
return_all_encoder_outputs: Whether to output sequence embedding outputs of
all encoder transformer layers.
output_range: The sequence output range, [0, output_range), by slicing the
target sequence of the last transformer layer. `None` means the entire
target sequence will attend to the source sequence, which yeilds the full
......@@ -82,7 +80,6 @@ class BertEncoder(tf.keras.Model):
output_dropout=0.1,
attention_dropout=0.1,
initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
return_all_encoder_outputs=False,
output_range=None,
embedding_width=None,
**kwargs):
......@@ -102,7 +99,6 @@ class BertEncoder(tf.keras.Model):
'output_dropout': output_dropout,
'attention_dropout': attention_dropout,
'initializer': tf.keras.initializers.serialize(initializer),
'return_all_encoder_outputs': return_all_encoder_outputs,
'output_range': output_range,
'embedding_width': embedding_width,
}
......
......@@ -69,8 +69,7 @@ class BertEncoderTest(keras_parameterized.TestCase):
vocab_size=100,
hidden_size=hidden_size,
num_attention_heads=2,
num_layers=3,
return_all_encoder_outputs=True)
num_layers=3)
# Create the inputs (note that the first dimension is implicit).
word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
......@@ -204,7 +203,6 @@ class BertEncoderTest(keras_parameterized.TestCase):
output_dropout=0.05,
attention_dropout=0.22,
initializer="glorot_uniform",
return_all_encoder_outputs=False,
output_range=-1,
embedding_width=16)
network = bert_encoder.BertEncoder(**kwargs)
......
......@@ -60,7 +60,9 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
within the transformer layers.
initializer: The initialzer to use for all weights in this encoder.
return_all_encoder_outputs: Whether to output sequence embedding outputs of
all encoder transformer layers.
all encoder transformer layers. Note: when the following `dict_outputs`
argument is True, all encoder outputs are always returned in the dict,
keyed by `encoder_outputs`.
output_range: The sequence output range, [0, output_range), by slicing the
target sequence of the last transformer layer. `None` means the entire
target sequence will attend to the source sequence, which yeilds the full
......@@ -112,7 +114,6 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
output_dropout=dropout_rate,
attention_dropout=attention_dropout_rate,
initializer=initializer,
return_all_encoder_outputs=return_all_encoder_outputs,
output_range=output_range,
embedding_width=embedding_width)
......@@ -123,6 +124,7 @@ class BertEncoder(keras_nlp.encoders.BertEncoder):
self._config_dict['attention_dropout_rate'] = self._config_dict.pop(
'attention_dropout')
self._config_dict['dict_outputs'] = dict_outputs
self._config_dict['return_all_encoder_outputs'] = return_all_encoder_outputs
if dict_outputs:
return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment