[Gemma4][Bugfix]: Enable Gemma4ForCasualLM to load lora adapters correctly (#38844)

Signed-off-by: ShubyM <shubymishra20@gmail.com>

[Gemma4][Bugfix]: Enable Gemma4ForCasualLM to load lora adapters correctly (#38844)
Signed-off-by: ShubyM <shubymishra20@gmail.com>
92feb999 · ShubyM · GitHub · d4cb783c · 92feb999 · 92feb999
Unverified Commit 92feb999 authored Apr 11, 2026 by ShubyM Committed by GitHub Apr 11, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 0 deletions

tests/lora/test_lora_checkpoints.py tests/lora/test_lora_checkpoints.py +23 -0

vllm/model_executor/models/gemma4.py vllm/model_executor/models/gemma4.py +17 -0

No files found.
--- a/tests/lora/test_lora_checkpoints.py
+++ b/tests/lora/test_lora_checkpoints.py
@@ -5,7 +5,9 @@ import pytest

 from vllm.lora.lora_model import LoRAModel
 from vllm.lora.peft_helper import PEFTHelper
+from vllm.lora.utils import parse_fine_tuned_lora_name
 from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
+from vllm.model_executor.models.gemma4 import Gemma4ForCausalLM
 from vllm.model_executor.models.utils import WeightsMapper

 lora_lst = ["baichuan7B", "baichuan7B-zero", "baichuan7B-zero-regex", "chatglm3-6b"]
@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
    for name in lora_model.loras:
        assert name.startswith(hf_to_vllm_mapper.orig_to_new_prefix["model."])
        assert ".baichuan_layers." in name
+
+
+def test_gemma4_lora_weights_mapping():
+    mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
+    name = "base_model.model.model.language_model.layers.9.mlp.down_proj.lora_A.weight"
+    assert parse_fine_tuned_lora_name(name, mapper) == (
+        "model.layers.9.mlp.down_proj",
+        True,
+    )
+
+
+def test_gemma4_moe_lora_weights_mapping():
+    mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
+    name = (
+        "base_model.model.model.language_model.layers.9.moe.experts."
+        "gate_up_proj.lora_B.weight"
+    )
+    assert parse_fine_tuned_lora_name(name, mapper) == (
+        "model.layers.9.moe.gate_up_proj",
+        False,
+    )
--- a/vllm/model_executor/models/gemma4.py
+++ b/vllm/model_executor/models/gemma4.py
@@ -69,6 +69,7 @@ from .interfaces import (
 )
 from .utils import (
    AutoWeightsLoader,
+    WeightsMapper,
    extract_layer_index,
    is_pp_missing_parameter,
    make_layers,
@@ -1397,6 +1398,22 @@ class Gemma4Model(nn.Module, EagleModelMixin):
 class Gemma4ForCausalLM(
    nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts, SupportsEagle3
 ):
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # Gemma4ForConditionalGeneration already loads the text stack
+            # from `model.language_model.*`. We reuse that same checkpoint
+            # and adapter naming for the text-only Gemma4ForCausalLM path,
+            # so LoRA keys from the conditional wrapper map onto `model.*`.
+            "model.language_model.": "model.",
+        },
+        orig_to_new_substr={
+            # Gemma4ForConditionalGeneration names MoE adapter targets under
+            # `...moe.experts.*`, while the text-only model exposes them
+            # under `...moe.*`.
+            ".moe.experts.gate_up_proj": ".moe.gate_up_proj",
+            ".moe.experts.down_proj": ".moe.down_proj",
+        },
+    )
    # Note: qkv_proj packing applies to non-k_eq_v layers (sliding
    # attention and full attention without k_eq_v). k_eq_v layers use
    # separate q_proj + k_proj without packing.