Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a3695a2b
Commit
a3695a2b
authored
Nov 03, 2025
by
zhuwenwen
Browse files
use apply_rotary_emb_torch for z100l&k100
parent
c77f17b9
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
9 deletions
+16
-9
vllm/envs.py
vllm/envs.py
+1
-1
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/rotary_embedding.py
+7
-3
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye.py
+6
-4
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+2
-1
No files found.
vllm/envs.py
View file @
a3695a2b
...
...
@@ -1116,7 +1116,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
(
"true"
,
"1"
)),
# vLLM will use lightop moe_sum_mul_add
"VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"
,
"
Tru
e"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_LIGHTOP_MOE_SUM_MUL_ADD"
,
"
Fals
e"
).
lower
()
in
(
"true"
,
"1"
)),
# vLLM will use lightop moe_sum
"VLLM_USE_LIGHTOP_MOE_SUM"
:
...
...
vllm/model_executor/layers/rotary_embedding.py
View file @
a3695a2b
...
...
@@ -39,10 +39,11 @@ from vllm.model_executor.custom_op import CustomOp
from
vllm.platforms
import
current_platform
import
vllm.envs
as
envs
from
vllm.utils
import
direct_register_custom_op
from
vllm.utils
import
SUPPORT_TC
if
current_platform
.
is_cuda
():
from
vllm.vllm_flash_attn.layers.rotary
import
apply_rotary_emb
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
()
and
SUPPORT_TC
:
from
flash_attn.layers.rotary
import
apply_rotary_emb
...
...
@@ -91,10 +92,13 @@ def _apply_rotary_emb(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor,
positional embeddings.
"""
if
current_platform
.
is_cuda
():
if
SUPPORT_TC
:
return
apply_rotary_emb
(
x
.
unsqueeze
(
0
),
cos
,
sin
,
not
is_neox_style
).
squeeze
(
0
)
else
:
return
_apply_rotary_emb_torch
(
x
,
cos
,
sin
,
is_neox_style
)
else
:
return
_apply_rotary_emb_torch
(
x
,
cos
,
sin
,
is_neox_style
)
@
CustomOp
.
register
(
"rotary_embedding"
)
...
...
vllm/model_executor/models/keye.py
View file @
a3695a2b
...
...
@@ -55,6 +55,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper,
maybe_prefix
,
merge_multimodal_embeddings
)
from
.vision
import
get_vit_attn_backend
from
vllm.platforms
import
current_platform
from
vllm.utils
import
SUPPORT_TC
logger
=
init_logger
(
__name__
)
...
...
@@ -331,6 +332,7 @@ def apply_rotary_pos_emb_flashatt(
cos
=
cos
.
chunk
(
2
,
dim
=-
1
)[
0
].
contiguous
()
sin
=
sin
.
chunk
(
2
,
dim
=-
1
)[
0
].
contiguous
()
if
SUPPORT_TC
:
if
not
current_platform
.
is_rocm
():
from
vllm.vllm_flash_attn.layers.rotary
import
apply_rotary_emb
else
:
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
a3695a2b
...
...
@@ -85,6 +85,7 @@ import re
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.utils
import
pad_weight
,
gemm_bank_conf
from
vllm.platforms
import
current_platform
from
vllm.utils
import
SUPPORT_TC
logger
=
init_logger
(
__name__
)
...
...
@@ -246,7 +247,7 @@ def apply_rotary_pos_emb_vision(t: torch.Tensor,
apply_rotary_emb
=
apply_rotary_emb_torch
if
current_platform
.
is_cuda
():
from
vllm.vllm_flash_attn.layers.rotary
import
apply_rotary_emb
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
()
and
SUPPORT_TC
:
from
flash_attn.layers.rotary
import
apply_rotary_emb
output
=
apply_rotary_emb
(
t_
,
cos
,
sin
).
type_as
(
t
)
return
output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment