Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a3a6c695
Unverified
Commit
a3a6c695
authored
Jul 18, 2025
by
Jee Jee Li
Committed by
GitHub
Jul 17, 2025
Browse files
[Misc] Qwen MoE model supports LoRA (#20932)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
90bd2ab6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
8 deletions
+20
-8
docs/models/supported_models.md
docs/models/supported_models.md
+2
-2
vllm/lora/models.py
vllm/lora/models.py
+13
-0
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+3
-4
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen3_moe.py
+2
-2
No files found.
docs/models/supported_models.md
View file @
a3a6c695
...
...
@@ -380,9 +380,9 @@ Specified using `--task generate`.
|
`Plamo2ForCausalLM`
| PLaMo2 |
`pfnet/plamo-2-1b`
,
`pfnet/plamo-2-8b`
, etc. | | | |
|
`QWenLMHeadModel`
| Qwen |
`Qwen/Qwen-7B`
,
`Qwen/Qwen-7B-Chat`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`Qwen2ForCausalLM`
| QwQ, Qwen2 |
`Qwen/QwQ-32B-Preview`
,
`Qwen/Qwen2-7B-Instruct`
,
`Qwen/Qwen2-7B`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`Qwen2MoeForCausalLM`
| Qwen2MoE |
`Qwen/Qwen1.5-MoE-A2.7B`
,
`Qwen/Qwen1.5-MoE-A2.7B-Chat`
, etc. | | ✅︎ | ✅︎ |
|
`Qwen2MoeForCausalLM`
| Qwen2MoE |
`Qwen/Qwen1.5-MoE-A2.7B`
,
`Qwen/Qwen1.5-MoE-A2.7B-Chat`
, etc. |
✅︎
| ✅︎ | ✅︎ |
|
`Qwen3ForCausalLM`
| Qwen3 |
`Qwen/Qwen3-8B`
, etc. | ✅︎ | ✅︎ | ✅︎ |
|
`Qwen3MoeForCausalLM`
| Qwen3MoE |
`Qwen/Qwen3-30B-A3B`
, etc. | | ✅︎ | ✅︎ |
|
`Qwen3MoeForCausalLM`
| Qwen3MoE |
`Qwen/Qwen3-30B-A3B`
, etc. |
✅︎
| ✅︎ | ✅︎ |
|
`StableLmForCausalLM`
| StableLM |
`stabilityai/stablelm-3b-4e1t`
,
`stabilityai/stablelm-base-alpha-7b-v2`
, etc. | | | ✅︎ |
|
`Starcoder2ForCausalLM`
| Starcoder2 |
`bigcode/starcoder2-3b`
,
`bigcode/starcoder2-7b`
,
`bigcode/starcoder2-15b`
, etc. | | ✅︎ | ✅︎ |
|
`SolarForCausalLM`
| Solar Pro |
`upstage/solar-pro-preview-instruct`
, etc. | ✅︎ | ✅︎ | ✅︎ |
...
...
vllm/lora/models.py
View file @
a3a6c695
...
...
@@ -29,6 +29,7 @@ from vllm.lora.utils import (from_layer, from_layer_logits_processor,
get_supported_lora_modules
,
is_regex_target_modules
,
parse_fine_tuned_lora_name
,
replace_submodule
)
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.model_loader.tensorizer
import
TensorizerConfig
from
vllm.model_executor.models
import
SupportsLoRA
,
supports_multimodal
from
vllm.model_executor.models.interfaces
import
is_pooling_model
...
...
@@ -60,6 +61,17 @@ def get_lora_id():
return
_GLOBAL_LORA_ID
def
is_moe_model
(
model
:
nn
.
Module
)
->
bool
:
"""Checks if the model contains FusedMoE layers and warns the user."""
if
any
(
isinstance
(
module
,
FusedMoE
)
for
module
in
model
.
modules
()):
logger
.
warning_once
(
"For MoE models, vLLM currently does not support fused MoE LoRA "
"inference. Please ensure that the loaded LoRA model does not "
"contain expert weights."
)
return
True
return
False
class
LoRAModel
(
AdapterModel
):
"""A LoRA fine-tuned model."""
...
...
@@ -375,6 +387,7 @@ class LoRAModelManager(AdapterModelManager):
# text modules (e.g. ChatGLM)
and
hasattr
(
self
.
model
,
"get_mm_mapping"
))
self
.
is_pooling_model
=
is_pooling_model
(
self
.
model
)
self
.
is_moe_model
=
is_moe_model
(
self
.
model
)
self
.
packed_modules
:
dict
[
str
,
list
[
str
]]
=
{}
self
.
modules
:
dict
[
str
,
BaseLayerWithLoRA
]
=
{}
# Dict instead of a set for compatibility with LRUCache.
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
a3a6c695
...
...
@@ -53,7 +53,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.sequence
import
IntermediateTensors
from
.interfaces
import
SupportsPP
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.utils
import
(
AutoWeightsLoader
,
extract_layer_index
,
is_pp_missing_parameter
,
make_empty_intermediate_tensors_factory
,
make_layers
,
...
...
@@ -448,8 +448,7 @@ class Qwen2MoeModel(nn.Module):
if
weight_name
not
in
name
:
continue
name
=
name
.
replace
(
weight_name
,
param_name
)
if
"layers.13.mlp.experts.w2_weight"
in
name
:
pass
# Skip layers on other devices.
if
is_pp_missing_parameter
(
name
,
self
):
continue
...
...
@@ -494,7 +493,7 @@ class Qwen2MoeModel(nn.Module):
return
loaded_params
class
Qwen2MoeForCausalLM
(
nn
.
Module
,
SupportsPP
):
class
Qwen2MoeForCausalLM
(
nn
.
Module
,
SupportsPP
,
SupportsLoRA
):
fall_back_to_pt_during_load
=
False
packed_modules_mapping
=
{
...
...
vllm/model_executor/models/qwen3_moe.py
View file @
a3a6c695
...
...
@@ -50,7 +50,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.sequence
import
IntermediateTensors
from
.interfaces
import
SupportsPP
from
.interfaces
import
SupportsLoRA
,
SupportsPP
from
.utils
import
(
AutoWeightsLoader
,
extract_layer_index
,
is_pp_missing_parameter
,
make_empty_intermediate_tensors_factory
,
make_layers
,
...
...
@@ -482,7 +482,7 @@ class Qwen3MoeModel(nn.Module):
return
loaded_params
class
Qwen3MoeForCausalLM
(
nn
.
Module
,
SupportsPP
):
class
Qwen3MoeForCausalLM
(
nn
.
Module
,
SupportsPP
,
SupportsLoRA
):
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment