Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
48291484
Unverified
Commit
48291484
authored
Jan 07, 2026
by
ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟
Committed by
GitHub
Jan 07, 2026
Browse files
[BugFix] LoRA: Support loading base_layer of experts (#31104)
Signed-off-by:
Hollow Man
<
hollowman@opensuse.org
>
parent
efeaac92
Changes
35
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
31 additions
and
3 deletions
+31
-3
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+10
-3
vllm/model_executor/models/afmoe.py
vllm/model_executor/models/afmoe.py
+1
-0
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bailing_moe.py
+1
-0
vllm/model_executor/models/deepseek_eagle.py
vllm/model_executor/models/deepseek_eagle.py
+1
-0
vllm/model_executor/models/deepseek_mtp.py
vllm/model_executor/models/deepseek_mtp.py
+1
-0
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+2
-0
vllm/model_executor/models/dots1.py
vllm/model_executor/models/dots1.py
+1
-0
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+1
-0
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie45_vl_moe.py
+1
-0
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe.py
+1
-0
vllm/model_executor/models/glm4_moe_mtp.py
vllm/model_executor/models/glm4_moe_mtp.py
+1
-0
vllm/model_executor/models/gpt_oss.py
vllm/model_executor/models/gpt_oss.py
+1
-0
vllm/model_executor/models/granitemoe.py
vllm/model_executor/models/granitemoe.py
+1
-0
vllm/model_executor/models/grok1.py
vllm/model_executor/models/grok1.py
+1
-0
vllm/model_executor/models/hunyuan_v1.py
vllm/model_executor/models/hunyuan_v1.py
+1
-0
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jamba.py
+1
-0
vllm/model_executor/models/kimi_linear.py
vllm/model_executor/models/kimi_linear.py
+1
-0
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+1
-0
vllm/model_executor/models/lfm2_moe.py
vllm/model_executor/models/lfm2_moe.py
+1
-0
vllm/model_executor/models/llama4.py
vllm/model_executor/models/llama4.py
+2
-0
No files found.
vllm/model_executor/layers/fused_moe/layer.py
View file @
48291484
...
...
@@ -2007,6 +2007,7 @@ class FusedMoE(CustomOp):
@
classmethod
def
make_expert_params_mapping
(
cls
,
model
:
torch
.
nn
.
Module
,
ckpt_gate_proj_name
:
str
,
ckpt_down_proj_name
:
str
,
ckpt_up_proj_name
:
str
,
...
...
@@ -2025,13 +2026,19 @@ class FusedMoE(CustomOp):
)
)
base_layer
=
(
"base_layer."
if
any
(
".base_layer."
in
name
for
name
,
_
in
model
.
named_parameters
())
else
""
)
return
[
# (param_name, weight_name, expert_id, shard_id)
(
"experts.w13_"
f
"experts.
{
base_layer
}
w13_"
if
weight_name
in
[
ckpt_gate_proj_name
,
ckpt_up_proj_name
]
else
"experts.w2_"
,
f
"experts.
{
physical_to_logical_map
[
expert_id
]
}
.
{
weight_name
}
."
,
else
f
"experts.
{
base_layer
}
w2_"
,
f
"experts.
{
physical_to_logical_map
[
expert_id
]
}
.
{
weight_name
}
.
{
base_layer
}
"
,
expert_id
,
shard_id
,
)
...
...
vllm/model_executor/models/afmoe.py
View file @
48291484
...
...
@@ -475,6 +475,7 @@ class AfmoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/bailing_moe.py
View file @
48291484
...
...
@@ -476,6 +476,7 @@ class BailingMoeModel(nn.Module):
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/deepseek_eagle.py
View file @
48291484
...
...
@@ -106,6 +106,7 @@ class DeepseekV2Model(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/deepseek_mtp.py
View file @
48291484
...
...
@@ -245,6 +245,7 @@ class DeepSeekMTP(nn.Module, SupportsPP, DeepseekV2MixtureOfExperts):
]
expert_params_mapping
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
48291484
...
...
@@ -1486,6 +1486,7 @@ class DeepseekV2ForCausalLM(
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
@@ -1519,6 +1520,7 @@ class DeepseekV2ForCausalLM(
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/dots1.py
View file @
48291484
...
...
@@ -424,6 +424,7 @@ class Dots1Model(nn.Module):
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/ernie45_moe.py
View file @
48291484
...
...
@@ -497,6 +497,7 @@ class Ernie4_5_MoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/ernie45_vl_moe.py
View file @
48291484
...
...
@@ -675,6 +675,7 @@ class Ernie4_5_VLMoeForCausalLM(nn.Module, SupportsPP):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/glm4_moe.py
View file @
48291484
...
...
@@ -496,6 +496,7 @@ class Glm4MoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/glm4_moe_mtp.py
View file @
48291484
...
...
@@ -248,6 +248,7 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/gpt_oss.py
View file @
48291484
...
...
@@ -729,6 +729,7 @@ class GptOssForCausalLM(nn.Module, SupportsPP, SupportsEagle3, SupportsLoRA):
# Params for weights, weight scales, activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/granitemoe.py
View file @
48291484
...
...
@@ -353,6 +353,7 @@ class GraniteMoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/grok1.py
View file @
48291484
...
...
@@ -369,6 +369,7 @@ class Grok1Model(nn.Module):
# Grok1 uses "num_experts" in its config
num_experts
=
getattr
(
self
.
config
,
"num_experts"
,
8
)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"linear"
,
# Grok1 specific
ckpt_down_proj_name
=
"linear_1"
,
# Grok1 specific
ckpt_up_proj_name
=
"linear_v"
,
# Grok1 specific
...
...
vllm/model_executor/models/hunyuan_v1.py
View file @
48291484
...
...
@@ -706,6 +706,7 @@ class HunYuanModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/jamba.py
View file @
48291484
...
...
@@ -378,6 +378,7 @@ class JambaModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/kimi_linear.py
View file @
48291484
...
...
@@ -560,6 +560,7 @@ class KimiLinearForCausalLM(
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/kimi_vl.py
View file @
48291484
...
...
@@ -462,6 +462,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/lfm2_moe.py
View file @
48291484
...
...
@@ -486,6 +486,7 @@ class Lfm2MoeModel(nn.Module):
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/llama4.py
View file @
48291484
...
...
@@ -539,6 +539,7 @@ class Llama4Model(LlamaModel):
# Expert parameter mapping for the case where the expert weights are
# not fused into a single weight tensor.
expert_params_mapping
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
@@ -548,6 +549,7 @@ class Llama4Model(LlamaModel):
# Expert parameter mapping for the case where the expert weights are
# fused into a single weight tensor.
expert_params_mapping_fused
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_up_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"gate_up_proj"
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment