[util] fixed activation checkpointing on torch 1.9 (#719)

2412429d · Frank Lee · GitHub · 04ff5ea5 · 2412429d
Unverified Commit 2412429d authored Apr 12, 2022 by Frank Lee Committed by GitHub Apr 12, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

colossalai/utils/activation_checkpoint.py colossalai/utils/activation_checkpoint.py +9 -2

No files found.
--- a/colossalai/utils/activation_checkpoint.py
+++ b/colossalai/utils/activation_checkpoint.py
@@ -68,7 +68,10 @@ class CheckpointFunction(torch.autograd.Function):
            else:
                ctx.inputs.append(arg)

-        ctx.save_for_backward(*tensor_inputs)
+        if activation_offload:
+            ctx.tensor_inputs = tensor_inputs
+        else:
+            ctx.save_for_backward(*tensor_inputs)
        return outputs

    @staticmethod
@@ -79,7 +82,11 @@ class CheckpointFunction(torch.autograd.Function):
        # Copy the list to avoid modifying original list.
        inputs = list(ctx.inputs)
        tensor_indices = ctx.tensor_indices
-        tensors = ctx.saved_tensors
+
+        if ctx.activation_offload:
+            tensors = ctx.tensor_inputs
+        else:
+            tensors = ctx.saved_tensors

        # store the current states
        bwd_cpu_rng_state = torch.get_rng_state()