Fix FlashAttention tests (#99)

Signed-off-by: Tal Cherckez <tcherckez@nvidia.com> Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>

Fix FlashAttention tests (#99)
Signed-off-by: Tal Cherckez <tcherckez@nvidia.com> Signed-off-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com> Co-authored-by: Kirthi Shankar Sivamani <ksivamani@nvidia.com>
bcbd4be0 · tcherckez-nvidia · GitHub · f56e4fd0 · bcbd4be0 · bcbd4be0
Unverified Commit bcbd4be0 authored Mar 29, 2023 by tcherckez-nvidia Committed by GitHub Mar 29, 2023
7 changed files
--- a/docs/api/pytorch.rst
+++ b/docs/api/pytorch.rst
@@ -26,3 +26,5 @@ pyTorch
 .. autoapifunction:: transformer_engine.pytorch.fp8_autocast

 .. autoapifunction:: transformer_engine.pytorch.checkpoint
+
+.. autoapifunction:: transformer_engine.pytorch.onnx_export
--- a/qa/L0_unittest/test.sh
+++ b/qa/L0_unittest/test.sh
@@ -9,4 +9,4 @@ set -e
 pip install pytest==6.2.5 onnxruntime==1.13.1
 pytest -v -s $TE_PATH/tests/pytorch/test_sanity.py
 PYTORCH_JIT=0 NVTE_ALLOW_NONDETERMINISTIC_ALGO=0 pytest -v -s $TE_PATH/tests/pytorch/test_numerics.py
-NVTE_FLASH_ATTN=0 pytest -v -s $TE_PATH/tests/pytorch/test_onnx_export.py
+pytest -v -s $TE_PATH/tests/pytorch/test_onnx_export.py
--- a/tests/pytorch/test_onnx_export.py
+++ b/tests/pytorch/test_onnx_export.py
@@ -32,7 +32,7 @@ from transformer_engine.pytorch.module import get_workspace
 import transformer_engine.pytorch.cpp_extensions as texcpp
 import transformer_engine.pytorch.softmax as softmax_defs
 from transformer_engine.pytorch.utils import get_default_init_method
-
+from transformer_engine.pytorch.export import is_in_onnx_export_mode

 # Global test configuration knobs.

@@ -89,15 +89,16 @@ def do_export(
        os.makedirs(TEST_ARTIFACTS_DIR, exist_ok=True)
        fname = os.path.join(TEST_ARTIFACTS_DIR, fname)
        inps = inp if isinstance(inp, list) or isinstance(inp, tuple) else (inp,)
-        torch.onnx.export(model,
-                          inps,
-                          fname,
-                          verbose=False,
-                          opset_version=opset,
-                          input_names=input_names,
-                          output_names=output_names,
-                          do_constant_folding=False,
-                          operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH)
+        with te.onnx_export(True):
+            torch.onnx.export(model,
+                            inps,
+                            fname,
+                            verbose=False,
+                            opset_version=opset,
+                            input_names=input_names,
+                            output_names=output_names,
+                            do_constant_folding=False,
+                            operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH)


 def to_numpy(tensor):
@@ -1003,3 +1004,10 @@ def test_export_transformer_layer(
        validate_result(fname, inp, model, atol=1e-3)
    elif precision != torch.float16:
        validate_result(fname, inp, model, atol=5e-1, is_fp8=use_fp8)
+
+@pytest.mark.parametrize("enabled", [True, False])
+def test_export_ctx_manager(enabled):
+    assert is_in_onnx_export_mode() == False
+    with te.onnx_export(enabled):
+        assert is_in_onnx_export_mode() == enabled
+    assert is_in_onnx_export_mode() == False
--- a/transformer_engine/pytorch/__init__.py
+++ b/transformer_engine/pytorch/__init__.py
@@ -10,6 +10,7 @@ from .module import LayerNorm
 from .transformer import DotProductAttention
 from .transformer import TransformerLayer
 from .fp8 import fp8_autocast
+from .export import onnx_export
 from .distributed import checkpoint
 # Register custom op symbolic ONNX functions
 from .te_onnx_extensions import (

--- a/transformer_engine/pytorch/export.py
+++ b/transformer_engine/pytorch/export.py
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# See LICENSE for license information.
+
+"""Export utilities for TransformerEngine"""
+from contextlib import contextmanager
+
+_IN_ONNX_EXPORT_MODE = False
+
+@contextmanager
+def onnx_export(
+    enabled: bool = False,
+) -> None:
+    """
+    Context manager for exporting to ONNX.
+
+    .. code-block:: python
+
+        with onnx_export(enabled=True):
+            torch.onnx.export(model)
+
+    ----------
+    enabled: bool, default = `False`
+             whether or not to enable export
+    """
+
+    global _IN_ONNX_EXPORT_MODE
+    onnx_export_state = (_IN_ONNX_EXPORT_MODE)
+    try:
+        _IN_ONNX_EXPORT_MODE = enabled
+        yield
+    finally:
+        _IN_ONNX_EXPORT_MODE = onnx_export_state
+
+def is_in_onnx_export_mode() -> bool:
+    """Returns True if onnx export mode is enabled, False otherwise."""
+    return _IN_ONNX_EXPORT_MODE
--- a/transformer_engine/pytorch/fp8.py
+++ b/transformer_engine/pytorch/fp8.py
@@ -2,7 +2,7 @@
 #
 # See LICENSE for license information.

-"""FP8 utilies for TransformerEngine"""
+"""FP8 utilities for TransformerEngine"""
 from contextlib import contextmanager
 from collections import deque
 from typing import Callable, List, Optional, Dict, Any, Tuple, Union

--- a/transformer_engine/pytorch/transformer.py
+++ b/transformer_engine/pytorch/transformer.py
@@ -41,6 +41,7 @@ from transformer_engine.pytorch.distributed import (
    get_distributed_world_size,
    checkpoint,
 )
+from transformer_engine.pytorch.export import is_in_onnx_export_mode

 _flash_attn_version = version("flash-attn")
 warnings.filterwarnings("module", category=DeprecationWarning, module="transformer")
@@ -442,6 +443,9 @@ class DotProductAttention(torch.nn.Module):
        ):
            use_flash_attention = False

+        if is_in_onnx_export_mode():
+            use_flash_attention = False
+
        if use_flash_attention:
            if checkpoint_core_attention:
                return self._checkpointed_attention_forward(self.flash_attention,