Unverified Commit 64deead7 authored by vllmellm's avatar vllmellm Committed by GitHub
Browse files

[Bugfix] [ROCm] [UX]: revert Flex attention backend (#29371)


Signed-off-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
parent 7992324f
...@@ -36,6 +36,12 @@ def mock_on_gfx9(): ...@@ -36,6 +36,12 @@ def mock_on_gfx9():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"env_vars, selected_backend, expected_backend_path", "env_vars, selected_backend, expected_backend_path",
[ [
# Test Case: Explicit FLEX_ATTENTION backend
(
{},
"FLEX_ATTENTION",
AttentionBackendEnum.FLEX_ATTENTION.get_path(),
),
# Test Case 1: Default (no env vars, no explicit backend) # Test Case 1: Default (no env vars, no explicit backend)
( (
{}, {},
......
...@@ -262,6 +262,10 @@ class RocmPlatform(Platform): ...@@ -262,6 +262,10 @@ class RocmPlatform(Platform):
f"is not MLA type while requested for MLA backend." f"is not MLA type while requested for MLA backend."
) )
if selected_backend == AttentionBackendEnum.FLEX_ATTENTION:
logger.info("Using FlexAttention backend.")
return AttentionBackendEnum.FLEX_ATTENTION.get_path()
if selected_backend == AttentionBackendEnum.TRITON_ATTN: if selected_backend == AttentionBackendEnum.TRITON_ATTN:
logger.info("Using Triton Attention backend on V1 engine.") logger.info("Using Triton Attention backend on V1 engine.")
return AttentionBackendEnum.TRITON_ATTN.get_path() return AttentionBackendEnum.TRITON_ATTN.get_path()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment