Don't serialize a None tensor if not using fp8 (#1749)

Signed-off-by: Peter St. John <pstjohn@nvidia.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>

Don't serialize a None tensor if not using fp8 (#1749)
Signed-off-by: Peter St. John <pstjohn@nvidia.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
26ecb2f1 · Peter St. John · GitHub · 0c5e3a52 · 26ecb2f1
Unverified Commit 26ecb2f1 authored May 07, 2025 by Peter St. John Committed by GitHub May 07, 2025
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 21 deletions

transformer_engine/pytorch/module/base.py transformer_engine/pytorch/module/base.py +22 -21

No files found.
--- a/transformer_engine/pytorch/module/base.py
+++ b/transformer_engine/pytorch/module/base.py
@@ -592,7 +592,7 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
            reset("scaling_fwd")
            reset("scaling_bwd")

-    def get_extra_state(self) -> torch.Tensor:
+    def get_extra_state(self) -> Optional[torch.Tensor]:
        """Save before checkpointing."""

        # This implementation is working around a few issues:
@@ -626,7 +626,8 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
        # Store FP8 state if needed
        state = None
        fp8_checkpoint = self.fp8_meta["fp8_checkpoint"] or self.fp8 or self.fp8_calibration
-        if fp8_checkpoint:
+        if not fp8_checkpoint:
+            return None

        # Copy tensors to CPU and store
        state = {}
@@ -652,7 +653,7 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
        state_serialized = torch.frombuffer(state_serialized, dtype=torch.uint8)
        return state_serialized

-    def set_extra_state(self, state: torch.Tensor) -> None:
+    def set_extra_state(self, state: Optional[torch.Tensor]) -> None:
        """Load previous state."""
        if state is None:
            return