Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e79a12fc
Unverified
Commit
e79a12fc
authored
Aug 05, 2025
by
Michael Goin
Committed by
GitHub
Aug 04, 2025
Browse files
[UX] Fail if an invalid attention backend is specified (#22217)
Signed-off-by:
mgoin
<
michael@neuralmagic.com
>
parent
cdfd6871
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
15 deletions
+9
-15
tests/kernels/attention/test_attention_selector.py
tests/kernels/attention/test_attention_selector.py
+5
-15
vllm/attention/selector.py
vllm/attention/selector.py
+4
-0
No files found.
tests/kernels/attention/test_attention_selector.py
View file @
e79a12fc
...
...
@@ -278,23 +278,13 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
@
pytest
.
mark
.
parametrize
(
"use_v1"
,
[
True
,
False
])
def
test_invalid_env
(
use_v1
:
bool
,
monkeypatch
:
pytest
.
MonkeyPatch
):
"""Test that invalid attention backend names raise ValueError."""
with
monkeypatch
.
context
()
as
m
,
patch
(
"vllm.attention.selector.current_platform"
,
CudaPlatform
()):
m
.
setenv
(
"VLLM_USE_V1"
,
"1"
if
use_v1
else
"0"
)
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
STR_INVALID_VAL
)
# Test with head size 32
backend
=
get_attn_backend
(
32
,
torch
.
float16
,
None
,
16
,
False
)
EXPECTED
=
"FLASH_ATTN_VLLM_V1"
if
use_v1
else
"FLASH_ATTN"
assert
backend
.
get_name
()
==
EXPECTED
# when block size == 16, backend will fall back to XFORMERS
# this behavior is not yet supported on V1.
if
use_v1
:
# TODO: support fallback on V1!
# https://github.com/vllm-project/vllm/issues/14524
pass
else
:
backend
=
get_attn_backend
(
16
,
torch
.
float16
,
None
,
16
,
False
)
assert
backend
.
get_name
()
==
"XFORMERS"
# Should raise ValueError for invalid backend
with
pytest
.
raises
(
ValueError
)
as
exc_info
:
get_attn_backend
(
32
,
torch
.
float16
,
None
,
16
,
False
)
assert
"Invalid attention backend: 'INVALID'"
in
str
(
exc_info
.
value
)
vllm/attention/selector.py
View file @
e79a12fc
...
...
@@ -193,6 +193,10 @@ def _cached_get_attn_backend(
backend_by_env_var
:
Optional
[
str
]
=
envs
.
VLLM_ATTENTION_BACKEND
if
backend_by_env_var
is
not
None
:
selected_backend
=
backend_name_to_enum
(
backend_by_env_var
)
if
selected_backend
is
None
:
raise
ValueError
(
f
"Invalid attention backend: '
{
backend_by_env_var
}
'. "
f
"Valid backends are:
{
list
(
_Backend
.
__members__
.
keys
())
}
"
)
# get device-specific attn_backend
attention_cls
=
current_platform
.
get_attn_backend_cls
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment