Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
10a836fb
Commit
10a836fb
authored
Oct 23, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev' into v0.9.2-step3v
parents
d9934804
86d92eb9
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
17 additions
and
4 deletions
+17
-4
tests/kernels/moe/test_moe.py
tests/kernels/moe/test_moe.py
+2
-2
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+4
-0
vllm/model_executor/models/vision.py
vllm/model_executor/models/vision.py
+3
-0
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+7
-2
vllm/utils/__init__.py
vllm/utils/__init__.py
+1
-0
No files found.
tests/kernels/moe/test_moe.py
View file @
10a836fb
...
...
@@ -114,9 +114,9 @@ def run_moe_test(
return
baseline_output
@
pytest
.
mark
.
parametrize
(
"m"
,
[
1
,
33
,
64
,
222
,
32768
,
40000
])
@
pytest
.
mark
.
parametrize
(
"m"
,
[
1
,
33
,
64
,
32768
,
40000
])
@
pytest
.
mark
.
parametrize
(
"n"
,
[
128
,
1024
,
2048
])
@
pytest
.
mark
.
parametrize
(
"k"
,
[
128
,
51
1
,
1024
])
@
pytest
.
mark
.
parametrize
(
"k"
,
[
128
,
51
2
,
1024
])
@
pytest
.
mark
.
parametrize
(
"e"
,
NUM_EXPERTS
)
@
pytest
.
mark
.
parametrize
(
"topk"
,
TOP_KS
)
@
pytest
.
mark
.
parametrize
(
"ep_size"
,
EP_SIZE
)
...
...
vllm/model_executor/model_loader/utils.py
View file @
10a836fb
...
...
@@ -260,6 +260,10 @@ def get_model_architecture(
os
.
environ
[
'VLLM_USE_LIGHTOP'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_OPT_CAT"
):
os
.
environ
[
'VLLM_USE_OPT_CAT'
]
=
'1'
if
not
envs
.
is_set
(
"USE_FUSED_RMS_QUANT"
):
os
.
environ
[
'USE_FUSED_RMS_QUANT'
]
=
'1'
if
not
envs
.
is_set
(
"USE_FUSED_SILU_MUL_QUANT"
):
os
.
environ
[
'USE_FUSED_SILU_MUL_QUANT'
]
=
'1'
# awq相关配置
try
:
if
os
.
getenv
(
'AWQ_MOE_SZ'
)
==
None
:
...
...
vllm/model_executor/models/vision.py
View file @
10a836fb
...
...
@@ -12,6 +12,7 @@ from vllm.attention.selector import (backend_name_to_enum,
get_global_forced_attn_backend
)
from
vllm.logger
import
init_logger
from
vllm.platforms
import
_Backend
,
current_platform
from
vllm.utils
import
SUPPORT_TC
logger
=
init_logger
(
__name__
)
...
...
@@ -82,6 +83,8 @@ def get_vit_attn_backend(support_fa: bool = False) -> _Backend:
selected_backend
=
backend_name_to_enum
(
backend_by_env_var
)
if
selected_backend
is
None
:
if
current_platform
.
is_cuda
()
or
current_platform
.
is_rocm
():
if
not
SUPPORT_TC
:
selected_backend
=
_Backend
.
TORCH_SDPA
device_available
=
current_platform
.
has_device_capability
(
80
)
if
device_available
and
support_fa
:
from
transformers.utils
import
is_flash_attn_2_available
...
...
vllm/platforms/rocm.py
View file @
10a836fb
...
...
@@ -16,14 +16,17 @@ from vllm.utils import cuda_device_count_stateless
from
.interface
import
DeviceCapability
,
Platform
,
PlatformEnum
,
_Backend
from
vllm.utils
import
SUPPORT_TC
from
vllm.utils
import
is_kme
,
SUPPORT_TC
if
not
SUPPORT_TC
:
os
.
environ
[
'VLLM_USE_V1'
]
=
'0'
os
.
environ
[
'VLLM_USE_FLASH_ATTN_PA'
]
=
'0'
os
.
environ
[
'VLLM_USE_FLASH_MLA'
]
=
'0'
if
is_kme
:
os
.
environ
[
'VLLM_USE_FLASH_ATTN_PA'
]
=
'0'
if
TYPE_CHECKING
:
from
vllm.config
import
ModelConfig
,
VllmConfig
...
...
@@ -301,6 +304,8 @@ class RocmPlatform(Platform):
logger
.
info
(
"flash_attn is not supported on NAVI GPUs."
)
else
:
logger
.
info
(
"%s is not supported in AMD GPUs."
,
selected_backend
)
if
is_kme
:
os
.
environ
[
'VLLM_USE_TRITON_FLASH_ATTN'
]
=
'1'
logger
.
info
(
"Using ROCmFlashAttention backend."
)
return
"vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend"
# noqa: E501
...
...
vllm/utils/__init__.py
View file @
10a836fb
...
...
@@ -85,6 +85,7 @@ POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768
MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS
=
5120
gpuname
=
torch
.
cuda
.
get_device_properties
(
torch
.
cuda
.
current_device
()).
name
is_kme
=
gpuname
.
startswith
(
'K100_AI'
)
or
gpuname
.
startswith
(
'K500SM_AI'
)
SUPPORT_TC
=
gpuname
.
startswith
(
'K100_AI'
)
or
gpuname
.
startswith
(
'K500SM_AI'
)
or
gpuname
.
startswith
(
'BW'
)
def
_generate_random_int8
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment