Make the python training loop handle properly with async eager.

PiperOrigin-RevId: 300127838

Make the python training loop handle properly with async eager.
PiperOrigin-RevId: 300127838
ebc28058 · Ruoxin Sang · A. Unique TensorFlower · 6e070e77 · ebc28058
Commit ebc28058 authored Mar 10, 2020 by Ruoxin Sang Committed by A. Unique TensorFlower Mar 10, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 6 deletions

official/staging/training/utils.py official/staging/training/utils.py +10 -6

No files found.
--- a/official/staging/training/utils.py
+++ b/official/staging/training/utils.py
@@ -53,13 +53,17 @@ def create_loop_fn(step_fn):
    """
    try:
      step = 0
-      while (num_steps == -1 or step < num_steps):
+      # To make sure the OutOfRangeError exception can be handled well with
-        outputs = step_fn(iterator)
+      # async remote eager, we need to wrap the loop body in a `async_scope`.
-        if reduce_fn is not None:
+      with tf.experimental.async_scope():
-          state = reduce_fn(state, outputs)
+        while (num_steps == -1 or step < num_steps):
-        step += 1
+          outputs = step_fn(iterator)
-      return state
+          if reduce_fn is not None:
+            state = reduce_fn(state, outputs)
+          step += 1
+        return state
    except (StopIteration, tf.errors.OutOfRangeError):
+      tf.experimental.async_clear_error()
      return state
  return loop_fn