Merge branch 'activation-checkpoint-fix' into 'main'

fix bug in uniform activation recompute See merge request ADLR/megatron-lm!551

Merge branch 'activation-checkpoint-fix' into 'main'
fix bug in uniform activation recompute See merge request ADLR/megatron-lm!551
2c493fb3 · Jared Casper · 035cae2e · fc2c81d3 · 2c493fb3
Commit 2c493fb3 authored Mar 31, 2023 by Jared Casper
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

megatron/model/transformer.py megatron/model/transformer.py +2 -1

No files found.
--- a/megatron/model/transformer.py
+++ b/megatron/model/transformer.py
@@ -1036,9 +1036,10 @@ class ParallelTransformer(MegatronModule):
        """Forward method with activation checkpointing."""
        def custom(start, end, is_transformer_engine=False):
            def custom_forward(*args, **kwargs):
+                x_, *args = args
                for index in range(start, end):
                    layer = self._get_layer(index)
-                    x_ = layer(*args, **kwargs)
+                    x_ = layer(x_, *args, **kwargs)
                return x_
            def custom_forward_transformer_engine(*args, **kwargs):
                return custom_forward(*args, is_first_microbatch=is_first_microbatch, **kwargs)