Unverified Commit 92feb999 authored by ShubyM's avatar ShubyM Committed by GitHub
Browse files

[Gemma4][Bugfix]: Enable Gemma4ForCasualLM to load lora adapters correctly (#38844)


Signed-off-by: default avatarShubyM <shubymishra20@gmail.com>
parent d4cb783c
......@@ -5,7 +5,9 @@ import pytest
from vllm.lora.lora_model import LoRAModel
from vllm.lora.peft_helper import PEFTHelper
from vllm.lora.utils import parse_fine_tuned_lora_name
from vllm.model_executor.models.baichuan import BaiChuanBaseForCausalLM
from vllm.model_executor.models.gemma4 import Gemma4ForCausalLM
from vllm.model_executor.models.utils import WeightsMapper
lora_lst = ["baichuan7B", "baichuan7B-zero", "baichuan7B-zero-regex", "chatglm3-6b"]
......@@ -128,3 +130,24 @@ def test_lora_weights_mapping(baichuan_lora_files):
for name in lora_model.loras:
assert name.startswith(hf_to_vllm_mapper.orig_to_new_prefix["model."])
assert ".baichuan_layers." in name
def test_gemma4_lora_weights_mapping():
mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
name = "base_model.model.model.language_model.layers.9.mlp.down_proj.lora_A.weight"
assert parse_fine_tuned_lora_name(name, mapper) == (
"model.layers.9.mlp.down_proj",
True,
)
def test_gemma4_moe_lora_weights_mapping():
mapper = Gemma4ForCausalLM.hf_to_vllm_mapper
name = (
"base_model.model.model.language_model.layers.9.moe.experts."
"gate_up_proj.lora_B.weight"
)
assert parse_fine_tuned_lora_name(name, mapper) == (
"model.layers.9.moe.gate_up_proj",
False,
)
......@@ -69,6 +69,7 @@ from .interfaces import (
)
from .utils import (
AutoWeightsLoader,
WeightsMapper,
extract_layer_index,
is_pp_missing_parameter,
make_layers,
......@@ -1397,6 +1398,22 @@ class Gemma4Model(nn.Module, EagleModelMixin):
class Gemma4ForCausalLM(
nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts, SupportsEagle3
):
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# Gemma4ForConditionalGeneration already loads the text stack
# from `model.language_model.*`. We reuse that same checkpoint
# and adapter naming for the text-only Gemma4ForCausalLM path,
# so LoRA keys from the conditional wrapper map onto `model.*`.
"model.language_model.": "model.",
},
orig_to_new_substr={
# Gemma4ForConditionalGeneration names MoE adapter targets under
# `...moe.experts.*`, while the text-only model exposes them
# under `...moe.*`.
".moe.experts.gate_up_proj": ".moe.gate_up_proj",
".moe.experts.down_proj": ".moe.down_proj",
},
)
# Note: qkv_proj packing applies to non-k_eq_v layers (sliding
# attention and full attention without k_eq_v). k_eq_v layers use
# separate q_proj + k_proj without packing.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment