[PyTorch] Fix linter warnings from unused args (#816)

* Fix linter warnings from unused args Signed-off-by: Tim Moon <tmoon@nvidia.com> * Update .gitignore Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> --------- Signed-off-by: Tim Moon <tmoon@nvidia.com> Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>

[PyTorch] Fix linter warnings from unused args (#816)
* Fix linter warnings from unused args Signed-off-by: Tim Moon <tmoon@nvidia.com> * Update .gitignore Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> --------- Signed-off-by: Tim Moon <tmoon@nvidia.com> Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
1f36c2c9 · Tim Moon · GitHub · 071b9508 · 1f36c2c9 · 1f36c2c9
Unverified Commit 1f36c2c9 authored Apr 30, 2024 by Tim Moon Committed by GitHub Apr 30, 2024
Showing with 23 additions and 15 deletions

.gitignore .gitignore +1 -1

transformer_engine/pytorch/attention.py transformer_engine/pytorch/attention.py +11 -9

transformer_engine/pytorch/float8_tensor.py transformer_engine/pytorch/float8_tensor.py +11 -5

No files found.
--- a/.gitignore
+++ b/.gitignore
--- a/transformer_engine/pytorch/attention.py
+++ b/transformer_engine/pytorch/attention.py
@@ -1757,11 +1757,12 @@ class _PrepareQKVForFA(torch.autograd.Function):
       to separate contiguous q, k, v tensors in (b, s, ...) layout."""
    @staticmethod
-    def forward(ctx,
+    def forward(
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
        query_layer: torch.Tensor,
        key_layer: torch.Tensor,
        value_layer: torch.Tensor
-    ) -> torch.Tensor:
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        # All inputs received are non-contiguous tensors.
        # The `query_layer` tensor is used to access the
        # full memory region of the QKV tensor.
@@ -1773,7 +1774,8 @@ class _PrepareQKVForFA(torch.autograd.Function):
        return query_layer, key_layer, value_layer
    @staticmethod
-    def backward(ctx,
+    def backward(
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
        dq: torch.Tensor,
        dk: torch.Tensor,
        dv: torch.Tensor

--- a/transformer_engine/pytorch/float8_tensor.py
+++ b/transformer_engine/pytorch/float8_tensor.py
@@ -46,7 +46,7 @@ class _FromFloat8Func(torch.autograd.Function):
    """Cast from FP8 to other dtype"""
    @staticmethod
    def forward(
-        ctx,
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
        tensor: Float8Tensor,
        dtype: Optional[torch.dtype] = None,
    ) -> torch.Tensor:
@@ -63,7 +63,10 @@ class _FromFloat8Func(torch.autograd.Function):
        return out
    @staticmethod
-    def backward(ctx, grad):
+    def backward(
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
+        grad: torch.Tensor,
+    ) -> Tuple[Optional[torch.Tensor], ...]:
        # Assume that we want gradients in full precision
        return grad, None
@@ -97,7 +100,7 @@ class _ToFloat8Func(torch.autograd.Function):
    """Cast to FP8 from other dtype"""
    @staticmethod
    def forward(
-        ctx,
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
        tensor: torch.Tensor,
        fp8_meta: Optional[Dict[str, Any]] = None,
        fp8_meta_forward: bool = True,
@@ -106,7 +109,7 @@ class _ToFloat8Func(torch.autograd.Function):
        scale: Optional[torch.Tensor] = None,
        amax: Optional[torch.Tensor] = None,
        scale_inv: Optional[torch.Tensor] = None,
-    ):
+    ) -> Float8Tensor:
        # Manually compute scale-inverse if needed
        if scale is not None and scale_inv is None:
@@ -189,7 +192,10 @@ class _ToFloat8Func(torch.autograd.Function):
        )
    @staticmethod
-    def backward(ctx, grad):
+    def backward(
+        _ctx: torch.autograd.function.FunctionCtx,  # unused
+        grad: torch.Tensor,
+    ) -> Tuple[Optional[torch.Tensor], ...]:
        # Assume that we want gradients in full precision
        return grad, None, None, None, None, None, None, None