Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0d5dd2da
Commit
0d5dd2da
authored
Jan 21, 2026
by
zhuwenwen
Browse files
update VLLM_USE_FUSED_RMS_ROPE=0 (default)
for qwen3, VLLM_USE_FUSED_RMS_ROPE=1 (default)
parent
25e16eea
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
1 deletion
+5
-1
vllm/envs.py
vllm/envs.py
+1
-1
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+4
-0
No files found.
vllm/envs.py
View file @
0d5dd2da
...
@@ -1309,7 +1309,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -1309,7 +1309,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# vLLM will use fused RMS + RoPE kernel
# vLLM will use fused RMS + RoPE kernel
"VLLM_USE_FUSED_RMS_ROPE"
:
"VLLM_USE_FUSED_RMS_ROPE"
:
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_FUSED_RMS_ROPE"
,
"
Tru
e"
).
lower
()
in
lambda
:
(
os
.
environ
.
get
(
"VLLM_USE_FUSED_RMS_ROPE"
,
"
Fals
e"
).
lower
()
in
(
"true"
,
"1"
)),
(
"true"
,
"1"
)),
# vLLM will use Marlin W16A16 kernel for MoE experts
# vLLM will use Marlin W16A16 kernel for MoE experts
"VLLM_USE_MARLIN_W16A16_MOE"
:
"VLLM_USE_MARLIN_W16A16_MOE"
:
...
...
vllm/model_executor/model_loader/utils.py
View file @
0d5dd2da
...
@@ -287,6 +287,8 @@ def get_model_architecture(
...
@@ -287,6 +287,8 @@ def get_model_architecture(
os
.
environ
[
'VLLM_USE_FUSE_SILU_AND_MUL'
]
=
'1'
os
.
environ
[
'VLLM_USE_FUSE_SILU_AND_MUL'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_OPT_RESHAPE_AND_CACHE"
):
if
not
envs
.
is_set
(
"VLLM_USE_OPT_RESHAPE_AND_CACHE"
):
os
.
environ
[
'VLLM_USE_OPT_RESHAPE_AND_CACHE'
]
=
'1'
os
.
environ
[
'VLLM_USE_OPT_RESHAPE_AND_CACHE'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_FUSED_RMS_ROPE"
):
os
.
environ
[
'VLLM_USE_FUSED_RMS_ROPE'
]
=
'1'
if
architectures
in
[[
'DeepseekV32ForCausalLM'
]]:
if
architectures
in
[[
'DeepseekV32ForCausalLM'
]]:
if
not
envs
.
is_set
(
"VLLM_USE_V32_ENCODE"
):
if
not
envs
.
is_set
(
"VLLM_USE_V32_ENCODE"
):
...
@@ -334,6 +336,8 @@ def get_model_architecture(
...
@@ -334,6 +336,8 @@ def get_model_architecture(
os
.
environ
[
'VLLM_USE_FUSE_SILU_AND_MUL'
]
=
'1'
os
.
environ
[
'VLLM_USE_FUSE_SILU_AND_MUL'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_OPT_RESHAPE_AND_CACHE"
):
if
not
envs
.
is_set
(
"VLLM_USE_OPT_RESHAPE_AND_CACHE"
):
os
.
environ
[
'VLLM_USE_OPT_RESHAPE_AND_CACHE'
]
=
'1'
os
.
environ
[
'VLLM_USE_OPT_RESHAPE_AND_CACHE'
]
=
'1'
if
not
envs
.
is_set
(
"VLLM_USE_FUSED_RMS_ROPE"
):
os
.
environ
[
'VLLM_USE_FUSED_RMS_ROPE'
]
=
'1'
if
architectures
in
[[
'DeepseekV32ForCausalLM'
]]:
if
architectures
in
[[
'DeepseekV32ForCausalLM'
]]:
if
not
envs
.
is_set
(
"VLLM_USE_V32_ENCODE"
):
if
not
envs
.
is_set
(
"VLLM_USE_V32_ENCODE"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment