Unverified Commit 201dc98a authored by Seungduk Kim's avatar Seungduk Kim Committed by GitHub
Browse files

Fix hard-coded parameter name in gemma3n.py (#27946)


Signed-off-by: default avatarSeungduk Kim <seungduk.kim@yanolja.com>
Signed-off-by: default avatarBiswa Panda <biswa.panda@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
Co-authored-by: default avatarNicolò Lucchesi <nlucches@redhat.com>
parent a404e2c0
......@@ -357,8 +357,27 @@ class Gemma3nAttention(nn.Module):
offset = 2 if self.sliding_window is not None else 1
kv_shared_layer_index = first_kv_shared_layer_idx - offset
if kv_shared_layer_index >= 0:
# Different model wrappers expose layer parameters under
# different parent attributes.
# For example:
# - Gemma3nForCausalLM → parameters live under "model.layers"
# - Gemma3nForConditionalGeneration →
# under "language_model.model.layers"
# This logic extracts the portion of the parameter name
# *before* ".layers."
# so downstream code can consistently reference the correct
# model root regardless of which wrapper class was used.
if ".layers." in prefix:
param_name_before_layers = prefix.split(".layers.")[0]
else:
raise ValueError(
"Unexpected prefix format for Gemma3nAttention: "
f"'{prefix}'. The prefix is expected to contain "
"'.layers.' to correctly determine the KV sharing "
"target layer."
)
# Only the greater layer is required to specify sharing.
kv_sharing_target_layer_name = f"language_model.model.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501
kv_sharing_target_layer_name = f"{param_name_before_layers}.layers.{kv_shared_layer_index}.self_attn.attn" # noqa: E501
self.rotary_emb = get_rope(
self.head_dim,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment