Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0032903a
Unverified
Commit
0032903a
authored
Mar 20, 2025
by
Travis Johnson
Committed by
GitHub
Mar 20, 2025
Browse files
[Bugfix] detect alibi and revert to FA2 (#15231)
Signed-off-by:
Travis Johnson
<
tsjohnso@us.ibm.com
>
parent
47195057
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
4 deletions
+11
-4
vllm/attention/backends/flash_attn.py
vllm/attention/backends/flash_attn.py
+2
-1
vllm/fa_utils.py
vllm/fa_utils.py
+9
-3
No files found.
vllm/attention/backends/flash_attn.py
View file @
0032903a
...
...
@@ -630,7 +630,8 @@ class FlashAttentionImpl(AttentionImpl):
self
.
sliding_window
=
((
sliding_window
-
1
,
0
)
if
sliding_window
is
not
None
else
(
-
1
,
-
1
))
self
.
kv_cache_dtype
=
kv_cache_dtype
self
.
vllm_flash_attn_version
=
get_flash_attn_version
()
self
.
vllm_flash_attn_version
=
get_flash_attn_version
(
requires_alibi
=
self
.
alibi_slopes
is
not
None
)
if
(
is_quantized_kv_cache
(
self
.
kv_cache_dtype
)
and
self
.
vllm_flash_attn_version
!=
3
):
raise
NotImplementedError
(
...
...
vllm/fa_utils.py
View file @
0032903a
...
...
@@ -7,7 +7,7 @@ from vllm.logger import init_logger
logger
=
init_logger
(
__name__
)
def
get_flash_attn_version
()
->
Optional
[
int
]:
def
get_flash_attn_version
(
requires_alibi
:
bool
=
False
)
->
Optional
[
int
]:
# import here to avoid circular dependencies
from
vllm.platforms
import
current_platform
try
:
...
...
@@ -28,8 +28,14 @@ def get_flash_attn_version() -> Optional[int]:
# 3. fallback for unsupported combinations
if
device_capability
.
major
==
10
and
fa_version
==
3
:
logger
.
warning
(
"Cannot use FA version 3 on Blackwell platform"
,
"defaulting to FA version 2."
)
logger
.
warning_once
(
"Cannot use FA version 3 on Blackwell platform "
"defaulting to FA version 2."
)
fa_version
=
2
if
requires_alibi
and
fa_version
==
3
:
logger
.
warning_once
(
"Cannot use FA version 3 with ALiBi, "
"defaulting to FA version 2."
)
fa_version
=
2
if
not
is_fa_version_supported
(
fa_version
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment