Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7cf5d5c4
Commit
7cf5d5c4
authored
Apr 16, 2025
by
zhuwenwen
Browse files
update fa interface
parent
6b5ea53c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
vllm/attention/backends/mla/common.py
vllm/attention/backends/mla/common.py
+6
-6
No files found.
vllm/attention/backends/mla/common.py
View file @
7cf5d5c4
...
@@ -215,7 +215,7 @@ from vllm.multimodal import MultiModalPlaceholderMap
...
@@ -215,7 +215,7 @@ from vllm.multimodal import MultiModalPlaceholderMap
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.triton_utils
import
HAS_TRITON
from
vllm.triton_utils
import
HAS_TRITON
from
vllm.utils
import
async_tensor_h2d
,
cdiv
,
make_tensor_with_pad
,
round_down
from
vllm.utils
import
async_tensor_h2d
,
cdiv
,
make_tensor_with_pad
,
round_down
from
vllm.vllm_flash_attn.fa_utils
import
get_flash_attn_version
#
from vllm.vllm_flash_attn.fa_utils import get_flash_attn_version
if
HAS_TRITON
:
if
HAS_TRITON
:
from
vllm.attention.ops.triton_flash_attention
import
triton_attention
from
vllm.attention.ops.triton_flash_attention
import
triton_attention
...
@@ -1050,11 +1050,11 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
...
@@ -1050,11 +1050,11 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
# and the one from vllm_flash_attn. The former is used on RoCM and the
# and the one from vllm_flash_attn. The former is used on RoCM and the
# latter has an additional parameter to control FA2 vs FA3
# latter has an additional parameter to control FA2 vs FA3
self
.
flash_attn_varlen_func
=
flash_attn_varlen_func
self
.
flash_attn_varlen_func
=
flash_attn_varlen_func
self
.
vllm_flash_attn_version
=
get_flash_attn_version
()
#
self.vllm_flash_attn_version = get_flash_attn_version()
if
self
.
vllm_flash_attn_version
is
not
None
:
#
if self.vllm_flash_attn_version is not None:
self
.
flash_attn_varlen_func
=
\
#
self.flash_attn_varlen_func = \
functools
.
partial
(
flash_attn_varlen_func
,
#
functools.partial(flash_attn_varlen_func,
fa_version
=
self
.
vllm_flash_attn_version
)
#
fa_version=self.vllm_flash_attn_version)
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment