Modify fused_adam to take advantage of undo feature

eb8384b5 · Thor Johnsen · f1e565f5 · eb8384b5
Commit eb8384b5 authored Mar 12, 2020 by Thor Johnsen
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

apex/contrib/optimizers/fused_adam.py apex/contrib/optimizers/fused_adam.py +7 -4

No files found.
--- a/apex/contrib/optimizers/fused_adam.py
+++ b/apex/contrib/optimizers/fused_adam.py
@@ -92,7 +92,7 @@ class FusedAdam(torch.optim.Optimizer):
                stride,
                1 if clear else 0)
-    def step(self, closure=None, grads=None, output_params=None, scale=1., grad_norms=None):
+    def step(self, closure=None, grads=None, output_params=None, scale=1., grad_norms=None, allow_undo=False):
        """Performs a single optimization step.
        Arguments:
@@ -106,12 +106,15 @@ class FusedAdam(torch.optim.Optimizer):
                updated weights. Have to be of same type as gradients. (default: None)
            scale (float, optional): factor to divide gradient tensor values
                by before applying to weights. (default: 1)
+            allow_undo (bool, optional): allow use of undo feature. Internal buffers
+                will be restored to pre-step state if overflow is detected in gradient.
        """
        loss = None
        if closure is not None:
            loss = closure()
        self._step(grads, output_params, scale, grad_norms, False)
+        if allow_overflow:
            self.strided_check_finite(output_params, output_params.numel(), 0, output_params.numel())
            if self.peek_overflow:
                self._step(grads, output_params, scale, grad_norms, True)