Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
43a52016
"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "739b61a348afa5da297a80ff15f4e39d6e524b53"
Commit
43a52016
authored
Apr 29, 2025
by
zhuwenwen
Browse files
update rocm.py
parent
dcec1db7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
29 additions
and
32 deletions
+29
-32
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+29
-32
No files found.
vllm/platforms/rocm.py
View file @
43a52016
...
...
@@ -140,7 +140,6 @@ class RocmPlatform(Platform):
kv_cache_dtype
,
block_size
,
use_v1
,
use_mla
)
->
str
:
if
use_mla
:
<<<<<<<
HEAD
if
selected_backend
==
_Backend
.
TRITON_MLA
or
block_size
!=
64
:
if
use_v1
:
logger
.
info_once
(
"Using Triton MLA backend on V1 engine."
)
...
...
@@ -174,40 +173,38 @@ class RocmPlatform(Platform):
"flashmla.FlashMLABackend"
)
else
:
logger
.
info
(
"Using Triton MLA backend (block size 64)."
)
return
"vllm.attention.backends.triton_mla.TritonMLABackend"
=======
from
vllm.attention.backends.rocm_aiter_mla
import
(
is_aiter_mla_enabled
)
if
selected_backend
is
None
:
selected_backend
=
(
_Backend
.
ROCM_AITER_MLA
if
is_aiter_mla_enabled
()
or
block_size
==
1
else
_Backend
.
TRITON_MLA
)
if
selected_backend
==
_Backend
.
TRITON_MLA
:
if
block_size
!=
1
:
logger
.
info
(
"Using Triton MLA backend."
)
return
"vllm.attention.backends.triton_mla.TritonMLABackend"
# noqa: E501
else
:
raise
ValueError
(
f
" The selected backend,
{
selected_backend
.
name
}
,"
f
"does not support block size
{
block_size
}
."
)
elif
selected_backend
==
_Backend
.
ROCM_AITER_MLA
:
if
block_size
==
1
:
logger
.
info
(
"Using AITER MLA backend."
)
return
"vllm.attention.backends.rocm_aiter_mla.AiterMLABackend"
# noqa: E501
else
:
raise
ValueError
(
f
" The selected backend,
{
selected_backend
.
name
}
,"
f
"does not support block size
{
block_size
}
."
"(currently only supports block size 1)"
)
else
:
return
"vllm.attention.backends.triton_mla.TritonMLABackend"
# from vllm.attention.backends.rocm_aiter_mla import (
# is_aiter_mla_enabled)
# if selected_backend is None:
# selected_backend = (_Backend.ROCM_AITER_MLA if
# is_aiter_mla_enabled() or block_size == 1
# else _Backend.TRITON_MLA)
# if selected_backend == _Backend.TRITON_MLA:
# if block_size != 1:
# logger.info("Using Triton MLA backend.")
# return "vllm.attention.backends.triton_mla.TritonMLABackend" # noqa: E501
# else:
# raise ValueError(
# f" The selected backend, {selected_backend.name},"
# f"does not support block size {block_size}.")
# elif selected_backend == _Backend.ROCM_AITER_MLA:
# if block_size == 1:
# logger.info("Using AITER MLA backend.")
# return "vllm.attention.backends.rocm_aiter_mla.AiterMLABackend" # noqa: E501
# else:
# raise ValueError(
# f" The selected backend, {selected_backend.name},"
# f"does not support block size {block_size}."
# "(currently only supports block size 1)")
# else:
raise
ValueError
(
f
" The selected backend,
{
selected_backend
.
name
}
,"
f
"is not MLA type while requested for MLA backend."
)
>>>>>>>
v0
.
8.5
selected_backend
=
(
_Backend
.
ROCM_FLASH
if
selected_backend
==
_Backend
.
FLASH_ATTN
else
selected_backend
)
if
envs
.
VLLM_USE_V1
:
...
...
@@ -384,4 +381,4 @@ class RocmPlatform(Platform):
@
classmethod
def
get_cu_count
(
cls
,
device_id
:
int
=
0
)
->
int
:
return
torch
.
cuda
.
get_device_properties
(
device_id
).
multi_processor_count
device_id
).
multi_processor_count
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment