Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
48291484
Unverified
Commit
48291484
authored
Jan 07, 2026
by
ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟
Committed by
GitHub
Jan 07, 2026
Browse files
[BugFix] LoRA: Support loading base_layer of experts (#31104)
Signed-off-by:
Hollow Man
<
hollowman@opensuse.org
>
parent
efeaac92
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
15 additions
and
0 deletions
+15
-0
vllm/model_executor/models/longcat_flash.py
vllm/model_executor/models/longcat_flash.py
+1
-0
vllm/model_executor/models/mimo_v2_flash.py
vllm/model_executor/models/mimo_v2_flash.py
+1
-0
vllm/model_executor/models/minimax_m2.py
vllm/model_executor/models/minimax_m2.py
+1
-0
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+1
-0
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mllama4.py
+1
-0
vllm/model_executor/models/nemotron_h.py
vllm/model_executor/models/nemotron_h.py
+1
-0
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/olmoe.py
+1
-0
vllm/model_executor/models/openpangu.py
vllm/model_executor/models/openpangu.py
+1
-0
vllm/model_executor/models/openpangu_mtp.py
vllm/model_executor/models/openpangu_mtp.py
+1
-0
vllm/model_executor/models/phimoe.py
vllm/model_executor/models/phimoe.py
+1
-0
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+1
-0
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+1
-0
vllm/model_executor/models/qwen3_next.py
vllm/model_executor/models/qwen3_next.py
+1
-0
vllm/model_executor/models/qwen3_next_mtp.py
vllm/model_executor/models/qwen3_next_mtp.py
+1
-0
vllm/model_executor/models/transformers/moe.py
vllm/model_executor/models/transformers/moe.py
+1
-0
No files found.
vllm/model_executor/models/longcat_flash.py
View file @
48291484
...
...
@@ -626,6 +626,7 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/mimo_v2_flash.py
View file @
48291484
...
...
@@ -512,6 +512,7 @@ class MiMoV2Model(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/minimax_m2.py
View file @
48291484
...
...
@@ -392,6 +392,7 @@ class MiniMaxM2Model(nn.Module):
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/mixtral.py
View file @
48291484
...
...
@@ -366,6 +366,7 @@ class MixtralModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/mllama4.py
View file @
48291484
...
...
@@ -1084,6 +1084,7 @@ class Llama4ForConditionalGeneration(
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/nemotron_h.py
View file @
48291484
...
...
@@ -636,6 +636,7 @@ class NemotronHModel(nn.Module):
# what the activation is applied to
# - FusedMoe.w3 (aka up_proj) should be ignored since we're
# using non-gated MoE
self
,
ckpt_gate_proj_name
=
"up_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
""
,
...
...
vllm/model_executor/models/olmoe.py
View file @
48291484
...
...
@@ -338,6 +338,7 @@ class OlmoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/openpangu.py
View file @
48291484
...
...
@@ -1161,6 +1161,7 @@ class OpenPanguModel(nn.Module):
has_experts
=
hasattr
(
self
.
config
,
"n_routed_experts"
)
if
has_experts
:
expert_merge_mapping
=
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/openpangu_mtp.py
View file @
48291484
...
...
@@ -149,6 +149,7 @@ class OpenPanguMTP(nn.Module, SupportsPP):
]
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/phimoe.py
View file @
48291484
...
...
@@ -516,6 +516,7 @@ class PhiMoEModel(nn.Module):
def
get_expert_mapping
(
self
)
->
list
[
tuple
[
str
,
str
,
int
,
str
]]:
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"w1"
,
ckpt_down_proj_name
=
"w2"
,
ckpt_up_proj_name
=
"w3"
,
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
48291484
...
...
@@ -423,6 +423,7 @@ class Qwen2MoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
48291484
...
...
@@ -470,6 +470,7 @@ class Qwen3MoeModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/qwen3_next.py
View file @
48291484
...
...
@@ -1031,6 +1031,7 @@ class Qwen3NextModel(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
return
SharedFusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/qwen3_next_mtp.py
View file @
48291484
...
...
@@ -147,6 +147,7 @@ class Qwen3NextMultiTokenPredictor(nn.Module):
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping
=
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
"gate_proj"
,
ckpt_down_proj_name
=
"down_proj"
,
ckpt_up_proj_name
=
"up_proj"
,
...
...
vllm/model_executor/models/transformers/moe.py
View file @
48291484
...
...
@@ -165,6 +165,7 @@ class MoEMixin(MixtureOfExperts):
for
gate_proj
,
down_proj
,
up_proj
in
ckpt_names
:
expert_mapping
.
extend
(
FusedMoE
.
make_expert_params_mapping
(
self
,
ckpt_gate_proj_name
=
gate_proj
,
ckpt_down_proj_name
=
down_proj
,
ckpt_up_proj_name
=
up_proj
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment