"vscode:/vscode.git/clone" did not exist on "682d18efe133d074163bdc7de930940d654d4e65"
Commit 8849285f authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Use mixed precision for gelu intermediate activation in BERT SQuAD model

PiperOrigin-RevId: 303407939
parent da5860f2
...@@ -142,10 +142,8 @@ class Transformer(tf.keras.layers.Layer): ...@@ -142,10 +142,8 @@ class Transformer(tf.keras.layers.Layer):
kernel_constraint=self._kernel_constraint, kernel_constraint=self._kernel_constraint,
bias_constraint=self._bias_constraint, bias_constraint=self._bias_constraint,
name="intermediate") name="intermediate")
# Use float32 in intermediate gelu activation for numeric stability.
# TODO(b/149117297): investigate gelu numeric stability.
self._intermediate_activation_layer = tf.keras.layers.Activation( self._intermediate_activation_layer = tf.keras.layers.Activation(
self._intermediate_activation, dtype=tf.float32) self._intermediate_activation)
self._output_dense = dense_einsum.DenseEinsum( self._output_dense = dense_einsum.DenseEinsum(
output_shape=hidden_size, output_shape=hidden_size,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment