Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0ff29dbf
Commit
0ff29dbf
authored
Oct 05, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds
parents
e0ba23b5
8c0143db
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
3 deletions
+10
-3
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/rotary_embedding.py
+2
-3
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+8
-0
No files found.
vllm/model_executor/layers/rotary_embedding.py
View file @
0ff29dbf
...
@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
...
@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
direct_register_custom_op
(
direct_register_custom_op
(
op_name
=
"rotary_embedding_deepseek_fuse"
,
op_name
=
"rotary_embedding_deepseek_fuse"
,
op_func
=
rotary_embedding_deepseek_fuse
,
op_func
=
rotary_embedding_deepseek_fuse
,
mutates_args
=
[],
mutates_args
=
[
"query"
,
"key"
],
fake_impl
=
rotary_embedding_deepseek_fuse_fake
,
fake_impl
=
rotary_embedding_deepseek_fuse_fake
,
)
)
...
@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
...
@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
BLOCK_SIZE
=
BLOCK_SIZE
,
BLOCK_SIZE
=
BLOCK_SIZE
,
num_warps
=
1
)
num_warps
=
1
)
# if envs.VLLM_USE_LIGHTOP:
if
envs
.
VLLM_USE_LIGHTOP
:
if
False
:
torch
.
ops
.
vllm
.
rotary_embedding_deepseek_fuse
(
positions
,
query
,
key
,
self
.
head_size
,
self
.
cos_sin_cache
,
self
.
is_neox_style
)
torch
.
ops
.
vllm
.
rotary_embedding_deepseek_fuse
(
positions
,
query
,
key
,
self
.
head_size
,
self
.
cos_sin_cache
,
self
.
is_neox_style
)
else
:
else
:
call
(
query
)
call
(
query
)
...
...
vllm/platforms/rocm.py
View file @
0ff29dbf
...
@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless
...
@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless
from
.interface
import
DeviceCapability
,
Platform
,
PlatformEnum
,
_Backend
from
.interface
import
DeviceCapability
,
Platform
,
PlatformEnum
,
_Backend
from
vllm.utils
import
SUPPORT_TC
if
not
SUPPORT_TC
:
os
.
environ
[
'VLLM_USE_V1'
]
=
'0'
os
.
environ
[
'VLLM_USE_FLASH_ATTN_PA'
]
=
'0'
os
.
environ
[
'VLLM_USE_FLASH_MLA'
]
=
'0'
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment