Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
89361181
Unverified
Commit
89361181
authored
Apr 20, 2026
by
Tao He
Committed by
GitHub
Apr 20, 2026
Browse files
[Qwen][Bugfix] Fixes sigmoid activation in torch impl of RMSNormGated. (#40245)
Signed-off-by:
Tao He
<
linzhu.ht@alibaba-inc.com
>
parent
67ed01c3
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
2 deletions
+13
-2
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/layernorm.py
+5
-2
vllm/model_executor/layers/mamba/gdn_linear_attn.py
vllm/model_executor/layers/mamba/gdn_linear_attn.py
+8
-0
No files found.
vllm/model_executor/layers/layernorm.py
View file @
89361181
...
...
@@ -478,9 +478,12 @@ class RMSNormGated(CustomOp):
weight
=
self
.
weight
.
float
()
z
=
z
.
float
()
if
z
is
not
None
else
None
assert
self
.
activation
in
[
"silu"
,
"sigmoid"
,
"swish"
]
act_fn
=
F
.
sigmoid
if
self
.
activation
==
"sigmoid"
else
F
.
silu
# Apply gating before normalization if needed
if
z
is
not
None
and
not
self
.
norm_before_gate
:
x
=
x
*
F
.
silu
(
z
)
x
=
x
*
act_fn
(
z
)
# RMS Normalization
if
self
.
group_size
is
None
:
...
...
@@ -499,7 +502,7 @@ class RMSNormGated(CustomOp):
# Apply gating after normalization if needed
if
z
is
not
None
and
self
.
norm_before_gate
:
out
=
out
*
F
.
silu
(
z
)
out
=
out
*
act_fn
(
z
)
return
out
.
to
(
orig_dtype
)
...
...
vllm/model_executor/layers/mamba/gdn_linear_attn.py
View file @
89361181
...
...
@@ -357,11 +357,19 @@ class GatedDeltaNetAttention(PluggableLayer, MambaBase):
set_weight_attrs
(
self
.
A_log
,
{
"weight_loader"
:
sharded_weight_loader
(
0
)})
set_weight_attrs
(
self
.
dt_bias
,
{
"weight_loader"
:
sharded_weight_loader
(
0
)})
output_gate_type
=
getattr
(
config
,
"output_gate_type"
,
"silu"
)
if
output_gate_type
==
"swish"
:
output_gate_type
=
"silu"
assert
output_gate_type
in
[
"silu"
,
"swish"
,
"sigmoid"
],
(
f
"unsupported
{
output_gate_type
=
}
"
)
self
.
norm
=
RMSNormGated
(
self
.
head_v_dim
,
eps
=
self
.
layer_norm_epsilon
,
group_size
=
None
,
norm_before_gate
=
True
,
activation
=
output_gate_type
,
device
=
current_platform
.
current_device
(),
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment