Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
9b54267e
"vscode:/vscode.git/clone" did not exist on "76144badd130504abb519175a52c2f87c424bcba"
Commit
9b54267e
authored
Mar 08, 2025
by
Patrick Devine
Committed by
Michael Yang
Mar 11, 2025
Browse files
fix configs
parent
46bb0169
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
45 additions
and
15 deletions
+45
-15
convert/convert_gemma3.go
convert/convert_gemma3.go
+35
-7
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+10
-8
No files found.
convert/convert_gemma3.go
View file @
9b54267e
...
@@ -10,6 +10,7 @@ type gemma3Model struct {
...
@@ -10,6 +10,7 @@ type gemma3Model struct {
gemmaModel
gemmaModel
Architecture
string
Architecture
string
TextModel
struct
{
TextModel
struct
{
HeadDim
uint32
`json:"head_dim"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
IntermediateSize
uint32
`json:"intermediate_size"`
IntermediateSize
uint32
`json:"intermediate_size"`
...
@@ -36,15 +37,45 @@ type gemma3Model struct {
...
@@ -36,15 +37,45 @@ type gemma3Model struct {
SlidingWindow
uint32
`json:"sliding_window"`
SlidingWindow
uint32
`json:"sliding_window"`
}
}
const
(
gemma4BLayerCount
=
34
gemma12BLayerCount
=
48
gemma27BLayerCount
=
62
)
func
(
p
*
gemma3Model
)
KV
(
t
*
Tokenizer
)
ggml
.
KV
{
func
(
p
*
gemma3Model
)
KV
(
t
*
Tokenizer
)
ggml
.
KV
{
kv
:=
p
.
ModelParameters
.
KV
(
t
)
kv
:=
p
.
ModelParameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma3"
kv
[
"general.architecture"
]
=
"gemma3"
numBlocks
:=
cmp
.
Or
(
p
.
HiddenLayers
,
p
.
TextModel
.
HiddenLayers
)
kv
[
"gemma3.block_count"
]
=
numBlocks
var
(
numHeads
uint32
numKVHeads
uint32
)
switch
numBlocks
{
case
gemma4BLayerCount
:
numHeads
=
8
numKVHeads
=
4
case
gemma12BLayerCount
:
numHeads
=
16
numKVHeads
=
8
case
gemma27BLayerCount
:
numHeads
=
32
numKVHeads
=
16
default
:
numHeads
=
p
.
NumAttentionHeads
numKVHeads
=
p
.
NumKeyValueHeads
}
kv
[
"gemma3.attention.head_count"
]
=
numHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
numKVHeads
switch
p
.
Architecture
{
switch
p
.
Architecture
{
case
"Gemma3ForCausalLM"
:
case
"Gemma3ForCausalLM"
:
kv
[
"gemma3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma3.attention.head_count"
]
=
p
.
NumAttentionHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
p
.
NumKeyValueHeads
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
RMSNormEPS
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
RMSNormEPS
kv
[
"gemma3.attention.key_length"
]
=
p
.
HeadDim
kv
[
"gemma3.attention.key_length"
]
=
p
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
HeadDim
...
@@ -53,11 +84,9 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
...
@@ -53,11 +84,9 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
RopeLocalTheta
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
RopeLocalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
RopeGlobalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
RopeGlobalTheta
kv
[
"gemma3.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma3.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
IntermediateSize
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
IntermediateSize
default
:
default
:
kv
[
"gemma3.embedding_length"
]
=
p
.
TextModel
.
HiddenSize
kv
[
"gemma3.embedding_length"
]
=
p
.
TextModel
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
TextModel
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
TextModel
.
IntermediateSize
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
TextModel
.
IntermediateSize
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
TextModel
.
SlidingWindow
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
TextModel
.
SlidingWindow
kv
[
"gemma3.vision.block_count"
]
=
p
.
VisionModel
.
NumHiddenLayers
kv
[
"gemma3.vision.block_count"
]
=
p
.
VisionModel
.
NumHiddenLayers
...
@@ -68,11 +97,10 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
...
@@ -68,11 +97,10 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
kv
[
"gemma3.vision.num_channels"
]
=
cmp
.
Or
(
p
.
VisionModel
.
NumChannels
,
3
)
kv
[
"gemma3.vision.num_channels"
]
=
cmp
.
Or
(
p
.
VisionModel
.
NumChannels
,
3
)
kv
[
"gemma3.vision.attention.head_count"
]
=
p
.
VisionModel
.
NumAttentionHeads
kv
[
"gemma3.vision.attention.head_count"
]
=
p
.
VisionModel
.
NumAttentionHeads
kv
[
"gemma3.vision.attention.layer_norm_epsilon"
]
=
cmp
.
Or
(
p
.
VisionModel
.
LayerNormEpsilon
,
1e-6
)
kv
[
"gemma3.vision.attention.layer_norm_epsilon"
]
=
cmp
.
Or
(
p
.
VisionModel
.
LayerNormEpsilon
,
1e-6
)
kv
[
"gemma3.attention.key_length"
]
=
cmp
.
Or
(
p
.
TextModel
.
HeadDim
,
256
)
kv
[
"gemma3.attention.value_length"
]
=
cmp
.
Or
(
p
.
TextModel
.
HeadDim
,
256
)
}
}
kv
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
2
)
kv
[
"tokenizer.ggml.eot_token_id"
]
=
uint32
(
1
)
return
kv
return
kv
}
}
...
...
model/models/gemma3/model_text.go
View file @
9b54267e
...
@@ -33,7 +33,7 @@ type TextModel struct {
...
@@ -33,7 +33,7 @@ type TextModel struct {
const
(
const
(
gemmaGlobalCacheCount
=
6
gemmaGlobalCacheCount
=
6
gemma27BLayerCount
=
4
6
gemma27BLayerCount
=
6
2
)
)
const
(
const
(
...
@@ -42,6 +42,8 @@ const (
...
@@ -42,6 +42,8 @@ const (
)
)
func
newTextModel
(
c
ml
.
Config
)
*
TextModel
{
func
newTextModel
(
c
ml
.
Config
)
*
TextModel
{
numBlocks
:=
int
(
c
.
Uint
(
"block_count"
))
m
:=
TextModel
{
m
:=
TextModel
{
SentencePieceModel
:
model
.
NewSentencePieceModel
(
SentencePieceModel
:
model
.
NewSentencePieceModel
(
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
...
@@ -53,11 +55,11 @@ func newTextModel(c ml.Config) *TextModel {
...
@@ -53,11 +55,11 @@ func newTextModel(c ml.Config) *TextModel {
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
},
},
),
),
Layers
:
make
([]
TextLayer
,
c
.
Uint
(
"block_count"
)
),
Layers
:
make
([]
TextLayer
,
numBlocks
),
TextOptions
:
&
TextOptions
{
TextOptions
:
&
TextOptions
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
,
8
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
,
4
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
,
256
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
,
256
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
,
256
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
,
256
)),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
,
1e-06
),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
,
1e-06
),
...
@@ -68,6 +70,10 @@ func newTextModel(c ml.Config) *TextModel {
...
@@ -68,6 +70,10 @@ func newTextModel(c ml.Config) *TextModel {
},
},
}
}
if
numBlocks
==
gemma27BLayerCount
{
m
.
largeModelScaling
=
true
}
return
&
m
return
&
m
}
}
...
@@ -177,10 +183,6 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
...
@@ -177,10 +183,6 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
hiddenState
=
hiddenState
.
Set
(
ctx
,
visionOutputs
,
offset
*
hiddenState
.
Stride
(
0
))
hiddenState
=
hiddenState
.
Set
(
ctx
,
visionOutputs
,
offset
*
hiddenState
.
Stride
(
0
))
}
}
if
len
(
m
.
Layers
)
==
gemma27BLayerCount
{
m
.
TextOptions
.
largeModelScaling
=
true
}
for
i
,
layer
:=
range
m
.
Layers
{
for
i
,
layer
:=
range
m
.
Layers
{
// gemma alternates between the sliding window (local) and causal (global)
// gemma alternates between the sliding window (local) and causal (global)
// kv cache every 6 layers
// kv cache every 6 layers
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment