Internal change

PiperOrigin-RevId: 315789209

Internal change
PiperOrigin-RevId: 315789209
ef99be0b · A. Unique TensorFlower · dc9c75dd · ef99be0b · ef99be0b
Commit ef99be0b authored Jun 10, 2020 by A. Unique TensorFlower
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 3 deletions

official/nlp/modeling/networks/classification.py official/nlp/modeling/networks/classification.py +7 -1

official/nlp/tasks/masked_lm.py official/nlp/tasks/masked_lm.py +7 -2

No files found.
--- a/official/nlp/modeling/networks/classification.py
+++ b/official/nlp/modeling/networks/classification.py
@@ -63,7 +63,13 @@ class Classification(tf.keras.Model):
        kernel_initializer=initializer,
        name='predictions/transform/logits')(
            cls_output)
-    predictions = tf.keras.layers.Activation(tf.nn.log_softmax)(self.logits)
+    policy = tf.keras.mixed_precision.experimental.global_policy()
+    if policy.name == 'mixed_bfloat16':
+      # b/158514794: bf16 is not stable with post-softmax cross-entropy.
+      policy = tf.float32
+    predictions = tf.keras.layers.Activation(tf.nn.log_softmax,
+                                             dtype=policy)(self.logits)
    if output == 'logits':
      output_tensors = self.logits

--- a/official/nlp/tasks/masked_lm.py
+++ b/official/nlp/tasks/masked_lm.py
@@ -55,11 +55,16 @@ class MaskedLMTask(base_task.Task):
        weights=features['masked_lm_weights'])
    metrics['lm_example_loss'].update_state(mlm_loss)
    if 'next_sentence_labels' in features:
+      policy = tf.keras.mixed_precision.experimental.global_policy()
+      if policy.name == 'mixed_bfloat16':  # b/158514794: bf16 is not stable.
+        policy = tf.float32
+      predictions = tf.keras.layers.Activation(
+          tf.nn.log_softmax, dtype=policy)(model_outputs['next_sentence'])
      sentence_labels = features['next_sentence_labels']
      sentence_loss = loss_lib.weighted_sparse_categorical_crossentropy_loss(
          labels=sentence_labels,
-          predictions=tf.nn.log_softmax(
+          predictions=predictions)
-              model_outputs['next_sentence'], axis=-1))
      metrics['next_sentence_loss'].update_state(sentence_loss)
      total_loss = mlm_loss + sentence_loss
    else: