"tools/vscode:/vscode.git/clone" did not exist on "5e4e0e51f4fbb6d6fbf7a94feef07bd3617bfa5f"
Unverified Commit 99e5539a authored by Artem Perevedentsev's avatar Artem Perevedentsev Committed by GitHub
Browse files

[Perf][GDN] Align TMA usage with upstream FLA (#38981)


Signed-off-by: default avatarArtem Perevedentsev <aperevedents@nvidia.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent a88ce94b
...@@ -154,9 +154,13 @@ is_nvidia_hopper = is_nvidia and ( ...@@ -154,9 +154,13 @@ is_nvidia_hopper = is_nvidia and (
) )
use_cuda_graph = is_nvidia and os.environ.get("FLA_USE_CUDA_GRAPH", "0") == "1" use_cuda_graph = is_nvidia and os.environ.get("FLA_USE_CUDA_GRAPH", "0") == "1"
is_gather_supported = hasattr(triton.language, "gather") is_gather_supported = hasattr(triton.language, "gather")
is_tma_supported = (is_nvidia and torch.cuda.get_device_capability(0)[0] >= 9) and ( is_tma_supported = (
is_nvidia_hopper
and os.getenv("FLA_USE_TMA", "0") == "1"
and (
hasattr(triton.language, "_experimental_make_tensor_descriptor") hasattr(triton.language, "_experimental_make_tensor_descriptor")
or hasattr(triton.language, "make_tensor_descriptor") or hasattr(triton.language, "make_tensor_descriptor")
)
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment