Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9ad0a458
Unverified
Commit
9ad0a458
authored
Jul 14, 2025
by
Pavani Majety
Committed by
GitHub
Jul 15, 2025
Browse files
[Bugfix] Switch bailout logic for kv-cache-dtype with SM100 Flashinfer (#20934)
Signed-off-by:
Pavani Majety
<
pmajety@nvidia.com
>
parent
016b8d1b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
3 deletions
+4
-3
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+4
-3
No files found.
vllm/engine/arg_utils.py
View file @
9ad0a458
...
@@ -1418,14 +1418,15 @@ class EngineArgs:
...
@@ -1418,14 +1418,15 @@ class EngineArgs:
and
not
envs
.
is_set
(
"VLLM_ATTENTION_BACKEND"
)
and
not
envs
.
is_set
(
"VLLM_ATTENTION_BACKEND"
)
)
or
envs
.
VLLM_ATTENTION_BACKEND
==
"FLASH_ATTN_VLLM_V1"
)
or
envs
.
VLLM_ATTENTION_BACKEND
==
"FLASH_ATTN_VLLM_V1"
supported
=
False
supported
=
False
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
()
or
(
current_platform
.
is_cuda
()
and
current_platform
.
is_device_capability
(
100
)):
supported
=
True
supported
=
True
elif
fp8_attention
and
will_use_fa
:
elif
fp8_attention
and
will_use_fa
:
from
vllm.attention.utils.fa_utils
import
(
from
vllm.attention.utils.fa_utils
import
(
flash_attn_supports_fp8
)
flash_attn_supports_fp8
)
supported
=
flash_attn_supports_fp8
()
supported
=
flash_attn_supports_fp8
()
elif
envs
.
VLLM_USE_TRTLLM_DECODE_ATTENTION
:
supported
=
True
if
not
supported
:
if
not
supported
:
_raise_or_fallback
(
feature_name
=
"--kv-cache-dtype"
,
_raise_or_fallback
(
feature_name
=
"--kv-cache-dtype"
,
recommend_to_remove
=
False
)
recommend_to_remove
=
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment