Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d5503ca7
Unverified
Commit
d5503ca7
authored
Jan 05, 2026
by
Jee Jee Li
Committed by
GitHub
Jan 05, 2026
Browse files
[LoRA] LoRA PDL improvement (#31660)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
a2ad15c0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
2 deletions
+4
-2
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
+4
-2
No files found.
vllm/lora/ops/triton_ops/fused_moe_lora_op.py
View file @
d5503ca7
...
@@ -163,15 +163,17 @@ def _fused_moe_lora_kernel(
...
@@ -163,15 +163,17 @@ def _fused_moe_lora_kernel(
# accumulator
# accumulator
accumulator
=
tl
.
zeros
((
BLOCK_SIZE_M
,
BLOCK_SIZE_N
),
dtype
=
tl
.
float32
)
accumulator
=
tl
.
zeros
((
BLOCK_SIZE_M
,
BLOCK_SIZE_N
),
dtype
=
tl
.
float32
)
# GDC wait waits for ALL programs in the prior kernel to complete
# before continuing.
if
USE_GDC
and
not
IS_PRIMARY
:
if
USE_GDC
and
not
IS_PRIMARY
:
tl
.
extra
.
cuda
.
gdc_wait
()
tl
.
extra
.
cuda
.
gdc_wait
()
for
k
in
range
(
0
,
grid_k
):
for
k
in
range
(
0
,
grid_k
):
k_remaining
=
K
-
k
*
(
BLOCK_SIZE_K
*
SPLIT_K
)
k_remaining
=
K
-
k
*
(
BLOCK_SIZE_K
*
SPLIT_K
)
# GDC wait waits for ALL programs in the prior kernel to complete
# before continuing.
# pre-fetch lora weight
# pre-fetch lora weight
b
=
tl
.
load
(
b_ptrs
,
mask
=
offs_k
[:,
None
]
<
k_remaining
,
other
=
0.0
)
b
=
tl
.
load
(
b_ptrs
,
mask
=
offs_k
[:,
None
]
<
k_remaining
,
other
=
0.0
)
if
USE_GDC
and
not
IS_PRIMARY
:
tl
.
extra
.
cuda
.
gdc_wait
()
a
=
tl
.
load
(
a
=
tl
.
load
(
a_ptrs
,
a_ptrs
,
mask
=
token_mask
[:,
None
]
&
(
offs_k
[
None
,
:]
<
k_remaining
),
mask
=
token_mask
[:,
None
]
&
(
offs_k
[
None
,
:]
<
k_remaining
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment