Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55137e8e
Unverified
Commit
55137e8e
authored
Oct 26, 2024
by
ErkinSagiroglu
Committed by
GitHub
Oct 26, 2024
Browse files
Fix: MI100 Support By Bypassing Custom Paged Attention (#9560)
parent
5cbdccd1
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
2 deletions
+6
-2
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/rocm_flash_attn.py
+6
-2
No files found.
vllm/attention/backends/rocm_flash_attn.py
View file @
55137e8e
...
@@ -21,7 +21,10 @@ if TYPE_CHECKING:
...
@@ -21,7 +21,10 @@ if TYPE_CHECKING:
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
_PARTITION_SIZE_ROCM
=
512
_PARTITION_SIZE_ROCM
=
512
_ON_NAVI
=
"gfx1"
in
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
_GPU_ARCH
=
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
_ON_NAVI
=
"gfx1"
in
_GPU_ARCH
_ON_MI250_MI300
=
any
(
arch
in
_GPU_ARCH
for
arch
in
[
"gfx90a"
,
"gfx940"
,
"gfx941"
,
"gfx942"
])
class
ROCmFlashAttentionBackend
(
AttentionBackend
):
class
ROCmFlashAttentionBackend
(
AttentionBackend
):
...
@@ -662,7 +665,8 @@ def _use_rocm_custom_paged_attention(qtype: torch.dtype, head_size: int,
...
@@ -662,7 +665,8 @@ def _use_rocm_custom_paged_attention(qtype: torch.dtype, head_size: int,
block_size
:
int
,
gqa_ratio
:
int
,
block_size
:
int
,
gqa_ratio
:
int
,
max_seq_len
:
int
)
->
bool
:
max_seq_len
:
int
)
->
bool
:
# rocm custom page attention not support on navi (gfx1*)
# rocm custom page attention not support on navi (gfx1*)
return
(
not
_ON_NAVI
and
(
qtype
==
torch
.
half
or
qtype
==
torch
.
bfloat16
)
return
(
_ON_MI250_MI300
and
not
_ON_NAVI
and
(
qtype
==
torch
.
half
or
qtype
==
torch
.
bfloat16
)
and
(
head_size
==
64
or
head_size
==
128
)
and
(
head_size
==
64
or
head_size
==
128
)
and
(
block_size
==
16
or
block_size
==
32
)
and
(
block_size
==
16
or
block_size
==
32
)
and
(
gqa_ratio
>=
1
and
gqa_ratio
<=
16
)
and
max_seq_len
<=
32768
)
and
(
gqa_ratio
>=
1
and
gqa_ratio
<=
16
)
and
max_seq_len
<=
32768
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment