Merged commit includes the following changes: (#7370)

261393597 by hongkuny<hongkuny@google.com>: add an encoder mode for BertModel which returns all layers. -- PiperOrigin-RevId: 261393597

Merged commit includes the following changes: (#7370)
261393597 by hongkuny<hongkuny@google.com>: add an encoder mode for BertModel which returns all layers. -- PiperOrigin-RevId: 261393597
6b586a91 · Hongkun Yu · GitHub · 23c0017f · 6b586a91
Unverified Commit 6b586a91 authored Aug 02, 2019 by Hongkun Yu Committed by GitHub Aug 02, 2019
Show whitespace changes
Inline Side-by-side

Showing with 33 additions and 7 deletions

official/bert/modeling.py official/bert/modeling.py +33 -7

No files found.
--- a/official/bert/modeling.py
+++ b/official/bert/modeling.py
@@ -193,8 +193,17 @@ class BertModel(tf.keras.layers.Layer):
    inputs = pack_inputs([input_word_ids, input_mask, input_type_ids])
    return super(BertModel, self).__call__(inputs, **kwargs)
-  def call(self, inputs):
+  def call(self, inputs, mode="bert"):
-    """Implements call() for the layer."""
+    """Implements call() for the layer.
+    Args:
+      inputs: packed input tensors.
+      mode: string, `bert` or `encoder`.
+    Returns:
+      Output tensor of the last layer for BERT training (mode=`bert`) which
+      is a float Tensor of shape [batch_size, seq_length, hidden_size] or
+      a list of output tensors for encoder usage (mode=`encoder`).
+    """
    unpacked_inputs = unpack_inputs(inputs)
    input_word_ids = unpacked_inputs[0]
    input_mask = unpacked_inputs[1]
@@ -209,10 +218,13 @@ class BertModel(tf.keras.layers.Layer):
    if input_mask is not None:
      attention_mask = create_attention_mask_from_input_mask(
          input_word_ids, input_mask)
-    sequence_output = self.encoder(embedding_tensor, attention_mask)
-    first_token_tensor = tf.squeeze(sequence_output[:, 0:1, :], axis=1)
+    if mode == "encoder":
+      return self.encoder(
+          embedding_tensor, attention_mask, return_all_layers=True)
+    sequence_output = self.encoder(embedding_tensor, attention_mask)
+    first_token_tensor = tf.squeeze(sequence_output[:, 0:1, :], axis=1)
    pooled_output = self.pooler_transform(first_token_tensor)
    return (pooled_output, sequence_output)
@@ -803,16 +815,30 @@ class Transformer(tf.keras.layers.Layer):
    inputs = pack_inputs([input_tensor, attention_mask])
    return super(Transformer, self).__call__(inputs=inputs, **kwargs)
-  def call(self, inputs):
+  def call(self, inputs, return_all_layers=False):
-    """Implements call() for the layer."""
+    """Implements call() for the layer.
+    Args:
+      inputs: packed inputs.
+      return_all_layers: bool, whether to return outputs of all layers inside
+        encoders.
+    Returns:
+      Output tensor of the last layer or a list of output tensors.
+    """
    unpacked_inputs = unpack_inputs(inputs)
    input_tensor = unpacked_inputs[0]
    attention_mask = unpacked_inputs[1]
    output_tensor = input_tensor
+    all_layer_outputs = []
    for layer in self.layers:
      output_tensor = layer(output_tensor, attention_mask)
-    return output_tensor
+      all_layer_outputs.append(output_tensor)
+    if return_all_layers:
+      return all_layer_outputs
+    return all_layer_outputs[-1]
 def pack_inputs(inputs):