Fix some Pylance errors (#259)

* Ignore IDE files Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Fix typing errors Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Ignore devcontainer files Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Avoid import from private module Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Apply @timmoon10 's suggestions Signed-off-by: Jan Bielak <jbielak@nvidia.com> --------- Signed-off-by: Jan Bielak <jbielak@nvidia.com>

Fix some Pylance errors (#259)
* Ignore IDE files Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Fix typing errors Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Ignore devcontainer files Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Avoid import from private module Signed-off-by: Jan Bielak <jbielak@nvidia.com> * Apply @timmoon10 's suggestions Signed-off-by: Jan Bielak <jbielak@nvidia.com> --------- Signed-off-by: Jan Bielak <jbielak@nvidia.com>
144e4888 · Jan Bielak · GitHub · 80825fde · 144e4888 · 144e4888
Unverified Commit 144e4888 authored Jun 02, 2023 by Jan Bielak Committed by GitHub Jun 02, 2023
7 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,11 @@ build/
 __pycache__
 .ycm_extra_conf.py
 .vimrc
+.vs
+.vscode
+.cache
+.hypothesis
+.devcontainer.json
 tests/cpp/build/
 docs/_build
 .ipynb_checkpoints

--- a/transformer_engine/jax/cpp_extensions.py
+++ b/transformer_engine/jax/cpp_extensions.py
@@ -1501,7 +1501,7 @@ class SoftmaxPrimitive(BasePrimitive):
        pow2 = 1 << (k_seqlen - 1).bit_length()
        warp_size = pow2 if pow2 < threads_per_warp else threads_per_warp
        batches_per_warp = 2 if pow2 <= 128 else 1
-        warps_per_block = threads_per_block / warp_size
+        warps_per_block = threads_per_block // warp_size
        batches_per_block = warps_per_block * batches_per_warp
        return batches_per_block

--- a/transformer_engine/jax/fp8.py
+++ b/transformer_engine/jax/fp8.py
@@ -281,7 +281,7 @@ class FP8Helper:
        return jnp.vstack([fp8_max_per_gemm] * num_of_gemm)
    @staticmethod
-    def get_fp8_meta_indices(gemm_idx: int) -> Tuple[int]:
+    def get_fp8_meta_indices(gemm_idx: int) -> Tuple[int, int, int]:
        """
        Obtain the index about FP8 metas by the given GEMM index.
        """
@@ -453,7 +453,7 @@ def get_delayed_scaling():
    """
    amax_compute_algo = "max" if FP8Helper.AMAX_COMPUTE_ALGO is AmaxComputeAlgo.MAX \
                        else "most_recent"
-    return DelayedScaling(margin=FP8Helper.MARGIN,
+    return DelayedScaling(margin=int(FP8Helper.MARGIN),
                          interval=FP8Helper.UPDATE_FP8META_INTERVAL,
                          fp8_format=FP8Helper.FP8_FORMAT,
                          amax_history_len=FP8Helper.AMAX_HISTORY_LEN,

--- a/transformer_engine/pytorch/constants.py
+++ b/transformer_engine/pytorch/constants.py
@@ -4,6 +4,7 @@
 """Enums for e2e transformer"""
 import torch
+import torch.distributed
 import transformer_engine_extensions as tex
@@ -29,4 +30,4 @@ LayerTypes = ("encoder", "decoder")
 GemmParallelModes = ("row", "column", None)
-dist_group_type = torch._C._distributed_c10d.ProcessGroup
+dist_group_type = torch.distributed.ProcessGroup
--- a/transformer_engine/pytorch/module/base.py
+++ b/transformer_engine/pytorch/module/base.py
@@ -7,7 +7,7 @@ import os
 import pickle
 import warnings
 from abc import ABC, abstractmethod
-from typing import Union, Optional, Tuple, Dict, Any, List
+from typing import Generator, Union, Optional, Tuple, Dict, Any, List
 from functools import partial
 from contextlib import contextmanager
@@ -86,7 +86,7 @@ def _prepare_backward(
    tp_group: dist_group_type,
    tp_size: int,
    name: str = ""
-) -> None:
+) -> Generator[None, None, None]:
    """Checks and prep for BWD."""
    if fp8:
        global _amax_reduce_handle_bwd
@@ -542,7 +542,7 @@ class TransformerEngineBaseModule(torch.nn.Module, ABC):
        inp: torch.Tensor,
        is_first_microbatch: Union[bool, None],
        num_gemms: int = 1,
-    ) -> None:
+    ) -> Generator[torch.Tensor, None, None]:
        """Checks and prep for FWD.
        The context manager is needed because there isn't a way for a module to know
        if it's the last FP8 module in the forward autocast. It is useful

--- a/transformer_engine/pytorch/softmax.py
+++ b/transformer_engine/pytorch/softmax.py
@@ -342,6 +342,6 @@ class FusedScaleMaskSoftmax(nn.Module):
        pow2 = 1 << (key_seq_len - 1).bit_length()
        warp_size = pow2 if pow2 < THREADS_PER_WARP else THREADS_PER_WARP
        batches_per_warp = 2 if pow2 <= 128 else 1
-        warps_per_block = THREADS_PER_BLOCK / warp_size
+        warps_per_block = THREADS_PER_BLOCK // warp_size
        batches_per_block = warps_per_block * batches_per_warp
        return batches_per_block
--- a/transformer_engine/tensorflow/fp8.py
+++ b/transformer_engine/tensorflow/fp8.py
@@ -4,7 +4,7 @@
 """FP8 utilies for TransformerEngine"""
 from contextlib import contextmanager
-from typing import Optional, Dict, Any
+from typing import Generator, Optional, Dict, Any
 import tensorflow as tf
 import transformer_engine_tensorflow as tex
@@ -69,7 +69,7 @@ def get_default_fp8_recipe():
 def fp8_autocast(
    enabled: bool = False,
    fp8_recipe: Optional[DelayedScaling] = None,
-) -> None:
+) -> Generator[None, None, None]:
    """
    Context manager for FP8 usage.