gemma: fix rope scaling for qat models (#12348)

* gemma: fix rope scaling for qat models * gofumpt yourself

gemma: fix rope scaling for qat models (#12348)
* gemma: fix rope scaling for qat models * gofumpt yourself
dba39b2e · Patrick Devine · GitHub · 9f3a37fd · dba39b2e · dba39b2e
Unverified Commit dba39b2e authored Sep 19, 2025 by Patrick Devine Committed by GitHub Sep 19, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

model/models/gemma2/model.go model/models/gemma2/model.go +1 -1

model/models/gemma3/model_text.go model/models/gemma3/model_text.go +5 -2

No files found.
--- a/model/models/gemma2/model.go
+++ b/model/models/gemma2/model.go
@@ -128,7 +128,7 @@ func (sa *SelfAttention) Forward(ctx ml.Context, hiddenState, positionIDs ml.Ten
 }

 func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
-	return fast.RoPE(ctx, key, shift, m.Options.attnKeyLen, m.Options.ropeBase, m.Options.ropeScale, rope.WithTypeNeoX()), nil
+	return fast.RoPE(ctx, key, shift, m.Options.attnKeyLen, m.Options.ropeBase, 1/m.Options.ropeScale, rope.WithTypeNeoX()), nil
 }

 type MLP struct {

--- a/model/models/gemma3/model_text.go
+++ b/model/models/gemma3/model_text.go
@@ -53,7 +53,10 @@ func newTextModel(c fs.Config) *TextModel {
 			eps:            c.Float("attention.layer_norm_rms_epsilon", 1e-06),
 			ropeLocalBase:  c.Float("rope.local.freq_base", 10000.0),
 			ropeGlobalBase: c.Float("rope.global.freq_base", 1000000.0),
-			ropeScale:      c.Float("rope.scaling.factor", 1.0),
+			ropeScale:      1,
+			// NOTE: the rope.scaling.factor is set incorrectly in the official QAT weights
+			//       (8 instead of 1)
+			// ropeScale:      c.Float("rope.scaling.factor", 1.0),
 		},
 	}

@@ -113,7 +116,7 @@ func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.T
 		ropeBase = m.TextConfig.ropeGlobalBase
 	}

-	return fast.RoPE(ctx, key, shift, m.TextConfig.attnKeyLen, ropeBase, m.TextConfig.ropeScale, rope.WithTypeNeoX()), nil
+	return fast.RoPE(ctx, key, shift, m.TextConfig.attnKeyLen, ropeBase, 1/m.TextConfig.ropeScale, rope.WithTypeNeoX()), nil
 }

 type TextMLP struct {