Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
1b308e1d
Unverified
Commit
1b308e1d
authored
Dec 12, 2025
by
Jeffrey Morgan
Committed by
GitHub
Dec 12, 2025
Browse files
model: fix global layer rope scale values for gemma 3 (#13452)
parent
bd6c1d6b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
12 deletions
+13
-12
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+13
-12
No files found.
model/models/gemma3/model_text.go
View file @
1b308e1d
...
...
@@ -28,10 +28,10 @@ type TextConfig struct {
finalLogitSoftcap
float32
}
func
(
o
TextConfig
)
applyRotaryPositionEmbeddings
(
ctx
ml
.
Context
,
states
,
positions
ml
.
Tensor
,
base
float32
)
ml
.
Tensor
{
func
(
o
TextConfig
)
applyRotaryPositionEmbeddings
(
ctx
ml
.
Context
,
states
,
positions
ml
.
Tensor
,
base
,
scale
float32
)
ml
.
Tensor
{
ropeOpts
:=
[]
func
(
*
rope
.
Options
){
rope
.
WithTypeNeoX
()}
if
o
.
ropeType
==
"yarn"
{
attnFactor
:=
float32
(
1.0
/
(
1.0
+
0.1
*
math
.
Log
(
float64
(
o
.
ropeS
cale
))))
attnFactor
:=
float32
(
1.0
/
(
1.0
+
0.1
*
math
.
Log
(
float64
(
s
cale
))))
ropeOpts
=
append
(
ropeOpts
,
rope
.
WithOriginalContextLength
(
o
.
ropeOriginalContext
),
rope
.
WithExtrapolationFactor
(
o
.
ropeExtrapolation
),
...
...
@@ -41,7 +41,7 @@ func (o TextConfig) applyRotaryPositionEmbeddings(ctx ml.Context, states, positi
)
}
return
nn
.
RoPE
(
ctx
,
states
,
positions
,
o
.
attnKeyLen
,
base
,
1.
/
o
.
ropeS
cale
,
ropeOpts
...
)
return
nn
.
RoPE
(
ctx
,
states
,
positions
,
o
.
attnKeyLen
,
base
,
1.
/
s
cale
,
ropeOpts
...
)
}
type
TextModel
struct
{
...
...
@@ -83,7 +83,7 @@ func newTextModel(c fs.Config) *TextModel {
ropeExtrapolation
:
c
.
Float
(
"rope.scaling.extrapolation_factor"
,
1.0
),
ropeBetaFast
:
c
.
Float
(
"rope.scaling.beta_fast"
,
64.0
),
ropeBetaSlow
:
c
.
Float
(
"rope.scaling.beta_slow"
,
1.0
),
ropeScale
:
c
.
Float
(
"rope.scaling.factor"
,
1
.0
),
ropeScale
:
c
.
Float
(
"rope.scaling.factor"
,
8
.0
),
finalLogitSoftcap
:
c
.
Float
(
"final_logit_softcapping"
,
0.0
),
},
}
...
...
@@ -117,31 +117,31 @@ type TextSelfAttention struct {
Output
*
nn
.
Linear
`gguf:"attn_output"`
}
func
(
opts
*
TextConfig
)
rope
Base
ForLayer
(
layer
int
)
float32
{
func
(
opts
*
TextConfig
)
rope
Values
ForLayer
(
layer
int
)
(
base
float32
,
scale
float32
)
{
if
opts
.
slidingWindowPattern
!=
nil
&&
opts
.
slidingWindowPattern
[
layer
]
{
return
opts
.
ropeLocalBase
return
opts
.
ropeLocalBase
,
1.0
}
// Standard Gemma3: only every n-th layer is global,
// where n = gemmaGlobalCacheCount, otherwise use
// the local rope base
if
(
layer
+
1
)
%
gemmaGlobalCacheCount
>
0
{
return
opts
.
ropeLocalBase
return
opts
.
ropeLocalBase
,
1.0
}
// default to global rope base
return
opts
.
ropeBase
return
opts
.
ropeBase
,
opts
.
ropeScale
}
func
(
sa
*
TextSelfAttention
)
Forward
(
ctx
ml
.
Context
,
layer
int
,
hiddenState
,
positionIDs
ml
.
Tensor
,
cache
kvcache
.
Cache
,
opts
*
TextConfig
)
ml
.
Tensor
{
batchSize
:=
hiddenState
.
Dim
(
1
)
ropeBase
:=
opts
.
rope
Base
ForLayer
(
layer
)
ropeBase
,
ropeScale
:=
opts
.
rope
Values
ForLayer
(
layer
)
q
:=
sa
.
Query
.
Forward
(
ctx
,
hiddenState
)
q
=
q
.
Reshape
(
ctx
,
opts
.
attnKeyLen
,
opts
.
numHeads
,
batchSize
)
q
=
sa
.
QueryNorm
.
Forward
(
ctx
,
q
,
opts
.
eps
)
q
=
opts
.
applyRotaryPositionEmbeddings
(
ctx
,
q
,
positionIDs
,
ropeBase
)
q
=
opts
.
applyRotaryPositionEmbeddings
(
ctx
,
q
,
positionIDs
,
ropeBase
,
ropeScale
)
if
opts
.
largeModelScaling
{
q
=
q
.
Scale
(
ctx
,
1.0
/
math
.
Sqrt
(
float64
(
opts
.
hiddenSize
/
opts
.
numHeads
)))
...
...
@@ -152,7 +152,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
k
:=
sa
.
Key
.
Forward
(
ctx
,
hiddenState
)
k
=
k
.
Reshape
(
ctx
,
opts
.
attnKeyLen
,
opts
.
numKVHeads
,
batchSize
)
k
=
sa
.
KeyNorm
.
Forward
(
ctx
,
k
,
opts
.
eps
)
k
=
opts
.
applyRotaryPositionEmbeddings
(
ctx
,
k
,
positionIDs
,
ropeBase
)
k
=
opts
.
applyRotaryPositionEmbeddings
(
ctx
,
k
,
positionIDs
,
ropeBase
,
ropeScale
)
v
:=
sa
.
Value
.
Forward
(
ctx
,
hiddenState
)
v
=
v
.
Reshape
(
ctx
,
opts
.
attnValLen
,
opts
.
numKVHeads
,
batchSize
)
...
...
@@ -165,7 +165,8 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
}
func
(
m
*
TextModel
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
return
m
.
applyRotaryPositionEmbeddings
(
ctx
,
key
,
shift
,
m
.
TextConfig
.
ropeBaseForLayer
(
layer
)),
nil
ropeBase
,
ropeScale
:=
m
.
TextConfig
.
ropeValuesForLayer
(
layer
)
return
m
.
applyRotaryPositionEmbeddings
(
ctx
,
key
,
shift
,
ropeBase
,
ropeScale
),
nil
}
type
TextMLP
struct
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment