Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
92feb999
Unverified
Commit
92feb999
authored
Apr 11, 2026
by
ShubyM
Committed by
GitHub
Apr 11, 2026
Browse files
[Gemma4][Bugfix]: Enable Gemma4ForCasualLM to load lora adapters correctly (#38844)
Signed-off-by:
ShubyM
<
shubymishra20@gmail.com
>
parent
d4cb783c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
0 deletions
+40
-0
tests/lora/test_lora_checkpoints.py
tests/lora/test_lora_checkpoints.py
+23
-0
vllm/model_executor/models/gemma4.py
vllm/model_executor/models/gemma4.py
+17
-0
No files found.
tests/lora/test_lora_checkpoints.py
View file @
92feb999
...
@@ -5,7 +5,9 @@ import pytest
...
@@ -5,7 +5,9 @@ import pytest
from
vllm.lora.lora_model
import
LoRAModel
from
vllm.lora.lora_model
import
LoRAModel
from
vllm.lora.peft_helper
import
PEFTHelper
from
vllm.lora.peft_helper
import
PEFTHelper
from
vllm.lora.utils
import
parse_fine_tuned_lora_name
from
vllm.model_executor.models.baichuan
import
BaiChuanBaseForCausalLM
from
vllm.model_executor.models.baichuan
import
BaiChuanBaseForCausalLM
from
vllm.model_executor.models.gemma4
import
Gemma4ForCausalLM
from
vllm.model_executor.models.utils
import
WeightsMapper
from
vllm.model_executor.models.utils
import
WeightsMapper
lora_lst
=
[
"baichuan7B"
,
"baichuan7B-zero"
,
"baichuan7B-zero-regex"
,
"chatglm3-6b"
]
lora_lst
=
[
"baichuan7B"
,
"baichuan7B-zero"
,
"baichuan7B-zero-regex"
,
"chatglm3-6b"
]
...
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
...
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
for
name
in
lora_model
.
loras
:
for
name
in
lora_model
.
loras
:
assert
name
.
startswith
(
hf_to_vllm_mapper
.
orig_to_new_prefix
[
"model."
])
assert
name
.
startswith
(
hf_to_vllm_mapper
.
orig_to_new_prefix
[
"model."
])
assert
".baichuan_layers."
in
name
assert
".baichuan_layers."
in
name
def
test_gemma4_lora_weights_mapping
():
mapper
=
Gemma4ForCausalLM
.
hf_to_vllm_mapper
name
=
"base_model.model.model.language_model.layers.9.mlp.down_proj.lora_A.weight"
assert
parse_fine_tuned_lora_name
(
name
,
mapper
)
==
(
"model.layers.9.mlp.down_proj"
,
True
,
)
def
test_gemma4_moe_lora_weights_mapping
():
mapper
=
Gemma4ForCausalLM
.
hf_to_vllm_mapper
name
=
(
"base_model.model.model.language_model.layers.9.moe.experts."
"gate_up_proj.lora_B.weight"
)
assert
parse_fine_tuned_lora_name
(
name
,
mapper
)
==
(
"model.layers.9.moe.gate_up_proj"
,
False
,
)
vllm/model_executor/models/gemma4.py
View file @
92feb999
...
@@ -69,6 +69,7 @@ from .interfaces import (
...
@@ -69,6 +69,7 @@ from .interfaces import (
)
)
from
.utils
import
(
from
.utils
import
(
AutoWeightsLoader
,
AutoWeightsLoader
,
WeightsMapper
,
extract_layer_index
,
extract_layer_index
,
is_pp_missing_parameter
,
is_pp_missing_parameter
,
make_layers
,
make_layers
,
...
@@ -1397,6 +1398,22 @@ class Gemma4Model(nn.Module, EagleModelMixin):
...
@@ -1397,6 +1398,22 @@ class Gemma4Model(nn.Module, EagleModelMixin):
class
Gemma4ForCausalLM
(
class
Gemma4ForCausalLM
(
nn
.
Module
,
SupportsLoRA
,
SupportsPP
,
MixtureOfExperts
,
SupportsEagle3
nn
.
Module
,
SupportsLoRA
,
SupportsPP
,
MixtureOfExperts
,
SupportsEagle3
):
):
hf_to_vllm_mapper
=
WeightsMapper
(
orig_to_new_prefix
=
{
# Gemma4ForConditionalGeneration already loads the text stack
# from `model.language_model.*`. We reuse that same checkpoint
# and adapter naming for the text-only Gemma4ForCausalLM path,
# so LoRA keys from the conditional wrapper map onto `model.*`.
"model.language_model."
:
"model."
,
},
orig_to_new_substr
=
{
# Gemma4ForConditionalGeneration names MoE adapter targets under
# `...moe.experts.*`, while the text-only model exposes them
# under `...moe.*`.
".moe.experts.gate_up_proj"
:
".moe.gate_up_proj"
,
".moe.experts.down_proj"
:
".moe.down_proj"
,
},
)
# Note: qkv_proj packing applies to non-k_eq_v layers (sliding
# Note: qkv_proj packing applies to non-k_eq_v layers (sliding
# attention and full attention without k_eq_v). k_eq_v layers use
# attention and full attention without k_eq_v). k_eq_v layers use
# separate q_proj + k_proj without packing.
# separate q_proj + k_proj without packing.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment