Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c4768dcf
Unverified
Commit
c4768dcf
authored
Nov 10, 2025
by
Jiangyun Zhu
Committed by
GitHub
Nov 09, 2025
Browse files
[Kernel] Fix fused_gdn_gating (#28343)
Signed-off-by:
zjy0516
<
riverclouds.zhu@qq.com
>
parent
a65a934e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
3 deletions
+5
-3
vllm/model_executor/models/qwen3_next.py
vllm/model_executor/models/qwen3_next.py
+5
-3
No files found.
vllm/model_executor/models/qwen3_next.py
View file @
c4768dcf
...
...
@@ -1367,8 +1367,10 @@ def fused_gdn_gating_kernel(
blk_g
=
-
tl
.
exp
(
blk_A_log
.
to
(
tl
.
float32
))
*
softplus_x
tl
.
store
(
g
+
off
,
blk_g
.
to
(
g
.
dtype
.
element_ty
),
mask
=
mask
)
# compute beta_output = sigmoid(b)
blk_beta
=
1.0
/
(
1.0
+
tl
.
exp
(
-
blk_b
.
to
(
tl
.
float32
)))
tl
.
store
(
beta_output
+
off
,
blk_beta
.
to
(
beta_output
.
dtype
.
element_ty
),
mask
=
mask
)
blk_beta_output
=
tl
.
sigmoid
(
blk_b
.
to
(
tl
.
float32
))
tl
.
store
(
beta_output
+
off
,
blk_beta_output
.
to
(
beta_output
.
dtype
.
element_ty
),
mask
=
mask
)
def
fused_gdn_gating
(
...
...
@@ -1389,7 +1391,7 @@ def fused_gdn_gating(
seq_len
=
1
grid
=
(
batch
,
seq_len
,
triton
.
cdiv
(
num_heads
,
8
))
g
=
torch
.
empty
(
1
,
batch
,
num_heads
,
dtype
=
torch
.
float32
,
device
=
a
.
device
)
beta_output
=
torch
.
empty
(
1
,
batch
,
num_heads
,
dtype
=
torch
.
float32
,
device
=
b
.
device
)
beta_output
=
torch
.
empty
(
1
,
batch
,
num_heads
,
dtype
=
b
.
dtype
,
device
=
b
.
device
)
fused_gdn_gating_kernel
[
grid
](
g
,
beta_output
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment