Enable XLA compilation using `@tf.function(experimental_compile=True) for transformer layer.

To debug the tf.function this API can be used: https://www.tensorflow.org/api_docs/python/tf/config/experimental_run_functions_eagerly PiperOrigin-RevId: 296458870

Enable XLA compilation using `@tf.function(experimental_compile=True) for transformer layer.
To debug the tf.function this API can be used: https://www.tensorflow.org/api_docs/python/tf/config/experimental_run_functions_eagerly PiperOrigin-RevId: 296458870
393c1399 · George Karpenkov · A. Unique TensorFlower · 867f0c47 · 393c1399
Commit 393c1399 authored Feb 21, 2020 by George Karpenkov Committed by A. Unique TensorFlower Feb 21, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 19 additions and 16 deletions

official/nlp/modeling/layers/transformer.py official/nlp/modeling/layers/transformer.py +19 -16

No files found.
--- a/official/nlp/modeling/layers/transformer.py
+++ b/official/nlp/modeling/layers/transformer.py
@@ -193,6 +193,7 @@ class Transformer(tf.keras.layers.Layer):
    base_config = super(Transformer, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
+  @tf.function(experimental_compile=True)
  def call(self, inputs):
    if isinstance(inputs, (list, tuple)) and len(inputs) == 2:
      input_tensor, attention_mask = inputs
@@ -204,19 +205,21 @@ class Transformer(tf.keras.layers.Layer):
    if attention_mask is not None:
      attention_inputs.append(attention_mask)
-    attention_output = self._attention_layer(attention_inputs)
+    with tf.name_scope(self.name):
-    attention_output = self._attention_output_dense(attention_output)
+      attention_output = self._attention_layer(attention_inputs)
-    attention_output = self._attention_dropout(attention_output)
+      attention_output = self._attention_output_dense(attention_output)
-    attention_output = self._attention_layer_norm(input_tensor +
+      attention_output = self._attention_dropout(attention_output)
-                                                  attention_output)
+      attention_output = self._attention_layer_norm(input_tensor +
-    intermediate_output = self._intermediate_dense(attention_output)
+                                                    attention_output)
-    intermediate_output = self._intermediate_activation_layer(
+      intermediate_output = self._intermediate_dense(attention_output)
-        intermediate_output)
+      intermediate_output = self._intermediate_activation_layer(
-    layer_output = self._output_dense(intermediate_output)
+          intermediate_output)
-    layer_output = self._output_dropout(layer_output)
+      layer_output = self._output_dense(intermediate_output)
-    # During mixed precision training, attention_output is from layer norm and
+      layer_output = self._output_dropout(layer_output)
-    # is always fp32 for now. cast layer_output to fp32 for the subsequent add.
+      # During mixed precision training, attention_output is from layer norm and
-    layer_output = tf.cast(layer_output, tf.float32)
+      # is always fp32 for now. Cast layer_output to fp32 for the subsequent
-    layer_output = self._output_layer_norm(layer_output + attention_output)
+      # add.
+      layer_output = tf.cast(layer_output, tf.float32)
-    return layer_output
+      layer_output = self._output_layer_norm(layer_output + attention_output)
+      return layer_output