Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d4dbb7af
Unverified
Commit
d4dbb7af
authored
Jan 24, 2026
by
yugong333
Committed by
GitHub
Jan 24, 2026
Browse files
Using max_loras + 1 to construct grid in fused_moe_lora (#32277)
Signed-off-by:
Yu Gong
<
yu3.gong@gmail.com
>
parent
203d0bc0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
3 deletions
+8
-3
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
+8
-3
No files found.
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
View file @
d4dbb7af
...
...
@@ -104,7 +104,10 @@ def _fused_moe_lora_kernel(
if
moe_enabled
==
0
:
# Early exit for the no moe lora case.
return
max_loras
=
tl
.
num_programs
(
axis
=
2
)
# The grid size on axis 2 is (max_loras + 1) to handle the no-lora case
# (lora_id == -1), but sorted_token_ids and expert_ids are allocated with
# shape (max_loras, ...). Use (num_programs - 1) for correct bounds checking.
max_loras
=
tl
.
num_programs
(
axis
=
2
)
-
1
grid_k
=
tl
.
cdiv
(
K
,
BLOCK_SIZE_K
*
SPLIT_K
)
# calculate pid_m,pid_n
...
...
@@ -255,7 +258,8 @@ def _fused_moe_lora_shrink(
*
triton
.
cdiv
(
EM
,
META
[
"BLOCK_SIZE_M"
])
*
triton
.
cdiv
(
N
,
META
[
"BLOCK_SIZE_N"
]),
len
(
lora_a_stacked
),
lora_a_stacked
[
0
].
shape
[
0
],
## max_loras + 1 to handle the no-lora case (lora_id == -1)
lora_a_stacked
[
0
].
shape
[
0
]
+
1
,
)
_fused_moe_lora_kernel
[
grid
](
qcurr_hidden_states
,
...
...
@@ -355,7 +359,8 @@ def _fused_moe_lora_expand(
grid
=
lambda
META
:
(
triton
.
cdiv
(
EM
,
META
[
"BLOCK_SIZE_M"
])
*
triton
.
cdiv
(
N
,
META
[
"BLOCK_SIZE_N"
]),
len
(
lora_b_stacked
),
lora_b_stacked
[
0
].
shape
[
0
],
## max_loras + 1 to handle the no-lora case (lora_id == -1)
lora_b_stacked
[
0
].
shape
[
0
]
+
1
,
)
_fused_moe_lora_kernel
[
grid
](
a_intermediate_cache1
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment