Merge pull request #2 from tensorflow/master

Updated

Merge pull request #2 from tensorflow/master
Updated
63af6ba5 · Ayushman Kumar · GitHub · 7ebfc3dd · 89dd9a4e · 63af6ba5
Unverified Commit 63af6ba5 authored Feb 24, 2020 by Ayushman Kumar Committed by GitHub Feb 24, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 4 deletions

official/nlp/bert/run_squad.py official/nlp/bert/run_squad.py +2 -2

official/nlp/optimization.py official/nlp/optimization.py +8 -2

No files found.
--- a/official/nlp/bert/run_squad.py
+++ b/official/nlp/bert/run_squad.py
@@ -109,9 +109,9 @@ def squad_loss_fn(start_positions,
                  end_logits,
                  loss_factor=1.0):
  """Returns sparse categorical crossentropy for start/end logits."""
-  start_loss = tf.keras.backend.sparse_categorical_crossentropy(
+  start_loss = tf.keras.losses.sparse_categorical_crossentropy(
      start_positions, start_logits, from_logits=True)
-  end_loss = tf.keras.backend.sparse_categorical_crossentropy(
+  end_loss = tf.keras.losses.sparse_categorical_crossentropy(
      end_positions, end_logits, from_logits=True)

  total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2

--- a/official/nlp/optimization.py
+++ b/official/nlp/optimization.py
@@ -137,10 +137,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
          use_locking=self._use_locking)
    return tf.no_op()

-  def apply_gradients(self, grads_and_vars, name=None):
+  def apply_gradients(self,
+                      grads_and_vars,
+                      name=None,
+                      all_reduce_sum_gradients=True):
    grads, tvars = list(zip(*grads_and_vars))
    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
-    return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars))
+    return super(AdamWeightDecay, self).apply_gradients(
+        zip(grads, tvars),
+        name=name,
+        all_reduce_sum_gradients=all_reduce_sum_gradients)

  def _get_lr(self, var_device, var_dtype, apply_state):
    """Retrieves the learning rate with the given state."""