Ensure contiguous inputs (#38)

ensure contiguous inputs Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>

Ensure contiguous inputs (#38)
ensure contiguous inputs Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
509bf877 · Kirthi Shankar Sivamani · GitHub · 89f94ba2 · 509bf877 · 509bf877
Unverified Commit 509bf877 authored Dec 06, 2022 by Kirthi Shankar Sivamani Committed by GitHub Dec 06, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 4 deletions

transformer_engine/pytorch/module.py transformer_engine/pytorch/module.py +6 -4

transformer_engine/pytorch/transformer.py transformer_engine/pytorch/transformer.py +2 -0

No files found.
--- a/transformer_engine/pytorch/module.py
+++ b/transformer_engine/pytorch/module.py
@@ -329,7 +329,7 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
        # Activation recomputation is used and this is the second forward phase.
        if self.fp8 and in_fp8_activation_recompute_phase():
            get_old_fp8_meta_tensors_for_recompute(self.fp8_meta)
-            return
+            return inp.contiguous()
        assert inp.is_cuda, "TransformerEngine needs CUDA."
@@ -371,6 +371,8 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
        ):
            copy_forward_fp8_meta_tensors_for_recompute(self.fp8_meta)
+        return inp.contiguous()
    def post_forward(self) -> None:
        """This is needed because there isn't a way for a module to know
        if it's the last FP8 module in the forward autocast. It is useful
@@ -1089,7 +1091,7 @@ class LayerNormLinear(TransformerEngineBaseModule):
                               produced)
        """
-        self.pre_forward(inp)
+        inp = self.pre_forward(inp)
        bias_tensor = bias if bias is not None else self.bias
@@ -1615,7 +1617,7 @@ class Linear(TransformerEngineBaseModule):
                               produced)
        """
-        self.pre_forward(inp)
+        inp = self.pre_forward(inp)
        bias_tensor = bias if bias is not None else self.bias
@@ -2418,7 +2420,7 @@ class LayerNormMLP(TransformerEngineBaseModule):
                               produced)
        """
-        self.pre_forward(inp, num_gemms=2)
+        inp = self.pre_forward(inp, num_gemms=2)
        out = _LayerNormMLP.apply(
            inp,

--- a/transformer_engine/pytorch/transformer.py
+++ b/transformer_engine/pytorch/transformer.py
@@ -1004,6 +1004,8 @@ class TransformerLayer(torch.nn.Module):
                                  backprop.
        """
+        hidden_states = hidden_states.contiguous()
        # For AMP
        if torch.is_autocast_enabled():
            hidden_states = cast_if_needed(