Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0dd5dee9
Unverified
Commit
0dd5dee9
authored
Jan 07, 2026
by
xuebwang-amd
Committed by
GitHub
Jan 07, 2026
Browse files
[Bugfix][Kernel] fix bias adding in triton kernel implemented fused moe (#31676)
Signed-off-by:
xuebwang-amd
<
xuebwang@amd.com
>
parent
4614c5a5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
5 deletions
+8
-5
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+8
-5
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
0dd5dee9
...
...
@@ -518,11 +518,7 @@ def fused_moe_kernel(
# Advance the ptrs to the next K block.
a_ptrs
+=
BLOCK_SIZE_K
*
stride_ak
b_ptrs
+=
BLOCK_SIZE_K
*
stride_bk
if
HAS_BIAS
:
accumulator
=
accumulator
+
bias
[
None
,
:]
if
MUL_ROUTED_WEIGHT
:
moe_weight
=
tl
.
load
(
topk_weights_ptr
+
offs_token
,
mask
=
token_mask
,
other
=
0
)
accumulator
=
accumulator
*
moe_weight
[:,
None
]
if
use_int8_w8a16
:
accumulator
=
(
accumulator
*
b_scale
).
to
(
compute_type
)
elif
use_fp8_w8a8
or
use_int8_w8a8
:
...
...
@@ -533,6 +529,13 @@ def fused_moe_kernel(
else
:
accumulator
=
accumulator
.
to
(
compute_type
)
# Since bias is typically not quantized, it's added after dequantization.
if
HAS_BIAS
:
accumulator
=
accumulator
+
bias
[
None
,
:]
if
MUL_ROUTED_WEIGHT
:
moe_weight
=
tl
.
load
(
topk_weights_ptr
+
offs_token
,
mask
=
token_mask
,
other
=
0
)
accumulator
=
accumulator
*
moe_weight
[:,
None
]
# -----------------------------------------------------------
# Write back the block of the output
offs_cn
=
pid_n
*
BLOCK_SIZE_N
+
tl
.
arange
(
0
,
BLOCK_SIZE_N
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment