support gradient checkpointing

31fe887e · Guolin Ke · GitHub · 89ac1b7b · 31fe887e
Unverified Commit 31fe887e authored Jul 30, 2022 by Guolin Ke Committed by GitHub Jul 30, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 0 deletions

unicore/utils.py unicore/utils.py +29 -0

No files found.
--- a/unicore/utils.py
+++ b/unicore/utils.py
@@ -290,6 +290,35 @@ def eval_bool(x, default=False):
        return default
+def checkpoint_sequential(
+    functions,
+    input,
+):
+    def wrap_tuple(a):
+        return (a,) if type(a) is not tuple else a
+    def exec(func, a):
+        return wrap_tuple(func(*a))
+    def get_wrap_exec(func):
+        def wrap_exec(*a):
+            return exec(func, a)
+        return wrap_exec
+    input = wrap_tuple(input)
+    is_grad_enabled = torch.is_grad_enabled()
+    if is_grad_enabled:
+        for func in functions:
+            input = torch.utils.checkpoint.checkpoint(get_wrap_exec(func), *input)
+    else:
+        for func in functions:
+            input = exec(func, input)
+    return input
 def permute_final_dims(tensor: torch.Tensor, inds: List[int]):
    zero_index = -1 * len(inds)
    first_inds = list(range(len(tensor.shape[:zero_index])))