Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a014d6a5
Commit
a014d6a5
authored
Nov 12, 2025
by
zhuwenwen
Browse files
update qwen3_moe of layernorm and activation
parent
8d6b0b0a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
8 additions
and
8 deletions
+8
-8
vllm/attention/layer.py
vllm/attention/layer.py
+3
-3
vllm/envs.py
vllm/envs.py
+1
-1
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/activation.py
+1
-1
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/layernorm.py
+1
-1
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+2
-2
No files found.
vllm/attention/layer.py
View file @
a014d6a5
vllm/envs.py
View file @
a014d6a5
vllm/model_executor/layers/activation.py
View file @
a014d6a5
...
...
@@ -77,7 +77,7 @@ class SiluAndMul(CustomOp):
"""PyTorch-native implementation equivalent to forward()."""
if
not
torch
.
compiler
.
is_compiling
()
and
envs
.
VLLM_ENABLE_TBO
:
return
self
.
forward_cuda
(
x
)
elif
envs
.
VLLM_USE_OPT_OP
:
elif
not
torch
.
compiler
.
is_compiling
()
and
envs
.
VLLM_USE_OPT_OP
:
return
self
.
forward_cuda
(
x
)
else
:
d
=
x
.
shape
[
-
1
]
//
2
...
...
vllm/model_executor/layers/layernorm.py
View file @
a014d6a5
...
...
@@ -167,7 +167,7 @@ class RMSNorm(CustomOp):
)
->
Union
[
torch
.
Tensor
,
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]]:
if
not
torch
.
compiler
.
is_compiling
()
and
envs
.
VLLM_ENABLE_TBO
:
return
self
.
forward_cuda
(
x
,
residual
)
elif
envs
.
VLLM_USE_OPT_OP
:
elif
not
torch
.
compiler
.
is_compiling
()
and
envs
.
VLLM_USE_OPT_OP
:
return
self
.
forward_cuda
(
x
,
residual
)
else
:
orig_dtype
=
x
.
dtype
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
a014d6a5
...
...
@@ -234,7 +234,7 @@ class Qwen3MoeAttention(nn.Module):
if
envs
.
VLLM_USE_APEX_RN
:
q_by_head
=
self
.
q_norm
.
forward_apex
(
q_by_head
)
else
:
q_by_head
=
self
.
q_norm
(
q_by_head
)
q_by_head
=
self
.
q_norm
.
forward_cuda
(
q_by_head
)
q
=
q_by_head
.
view
(
q
.
shape
)
k_by_head
=
k
.
view
(
*
k
.
shape
[:
-
1
],
k
.
shape
[
-
1
]
//
self
.
head_dim
,
...
...
@@ -242,7 +242,7 @@ class Qwen3MoeAttention(nn.Module):
if
envs
.
VLLM_USE_APEX_RN
:
k_by_head
=
self
.
k_norm
.
forward_apex
(
k_by_head
)
else
:
k_by_head
=
self
.
k_norm
(
k_by_head
)
k_by_head
=
self
.
k_norm
.
forward_cuda
(
k_by_head
)
k
=
k_by_head
.
view
(
k
.
shape
)
q
,
k
=
self
.
rotary_emb
(
positions
,
q
,
k
)
attn_output
=
self
.
attn
(
q
,
k
,
v
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment