Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c62861f4
Commit
c62861f4
authored
Mar 07, 2025
by
Patrick Devine
Committed by
Michael Yang
Mar 11, 2025
Browse files
fix conversion
parent
0df18004
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
42 deletions
+57
-42
convert/convert.go
convert/convert.go
+5
-2
convert/convert_gemma3.go
convert/convert_gemma3.go
+41
-30
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+11
-10
No files found.
convert/convert.go
View file @
c62861f4
...
...
@@ -190,8 +190,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
conv
=
&
gemmaModel
{}
case
"Gemma2ForCausalLM"
:
conv
=
&
gemma2Model
{}
case
"Gemma3ForConditionalGeneration"
:
conv
=
&
gemma3Model
{}
case
"Gemma3ForCausalLM"
,
"Gemma3ForConditionalGeneration"
:
conv
=
&
gemma3Model
{
Architecture
:
p
.
Architectures
[
0
]
}
case
"Phi3ForCausalLM"
:
conv
=
&
phi3Model
{}
case
"Qwen2ForCausalLM"
:
...
...
@@ -226,6 +226,9 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
}
switch
{
case
vocabSize
==
0
:
slog
.
Warn
(
"vocabulary size was not explicitly set by the model"
,
"default size"
,
len
(
t
.
Vocabulary
.
Tokens
))
vocabSize
=
len
(
t
.
Vocabulary
.
Tokens
)
case
vocabSize
>
len
(
t
.
Vocabulary
.
Tokens
)
:
slog
.
Warn
(
"vocabulary is smaller than expected, padding with dummy tokens"
,
"expect"
,
vocabSize
,
"actual"
,
len
(
t
.
Vocabulary
.
Tokens
))
for
i
:=
range
vocabSize
-
len
(
t
.
Vocabulary
.
Tokens
)
{
...
...
convert/convert_gemma3.go
View file @
c62861f4
...
...
@@ -4,7 +4,13 @@ import "github.com/ollama/ollama/fs/ggml"
type
gemma3Model
struct
{
gemmaModel
TextModel
gemma3TextModel
`json:"text_config"`
Architecture
string
TextModel
struct
{
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
IntermediateSize
uint32
`json:"intermediate_size"`
SlidingWindow
uint32
`json:"sliding_window"`
}
`json:"text_config"`
VisionModel
struct
{
NumAttentionHeads
uint32
`json:"num_attention_heads"`
// attention.head_count 16
LayerNormEpsilon
float32
`json:"layer_norm_eps"`
// attention.layer_norm_epsilon 1e-05
...
...
@@ -15,41 +21,41 @@ type gemma3Model struct {
NumChannels
uint32
`json:"num_channels"`
// num_channels 3
PatchSize
uint32
`json:"patch_size"`
// patch_size 14
}
`json:"vision_config"`
}
type
gemma3TextModel
struct
{
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
IntermediateSize
uint32
`json:"intermediate_size"`
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
HeadDim
uint32
`json:"head_dim"`
SlidingWindow
uint32
`json:"sliding_window"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
RopeLocalTheta
float32
`json:"rope_local_base_freq"`
RopeGlobalTheta
float32
`json:"rope_global_base_freq"`
SlidingWindow
uint32
`json:"sliding_window"`
}
func
(
p
*
gemma3Model
)
KV
(
t
*
Tokenizer
)
ggml
.
KV
{
kv
:=
p
.
ModelParameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma3"
kv
[
"gemma3.context_length"
]
=
p
.
TextModel
.
MaxPositionEmbeddings
switch
p
.
Architecture
{
case
"Gemma3ForCausalLM"
:
kv
[
"gemma3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma3.attention.head_count"
]
=
p
.
NumAttentionHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
p
.
NumKeyValueHeads
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
RMSNormEPS
kv
[
"gemma3.attention.key_length"
]
=
p
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
HeadDim
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
SlidingWindow
kv
[
"gemma3.text.final_logit_softcapping"
]
=
p
.
FinalLogitSoftcap
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
RopeLocalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
RopeGlobalTheta
kv
[
"gemma3.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
IntermediateSize
default
:
kv
[
"gemma3.embedding_length"
]
=
p
.
TextModel
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
TextModel
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
TextModel
.
IntermediateSize
kv
[
"gemma3.attention.head_count"
]
=
p
.
TextModel
.
NumAttentionHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
p
.
TextModel
.
NumKeyValueHeads
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
TextModel
.
RMSNormEPS
kv
[
"gemma3.attention.key_length"
]
=
p
.
TextModel
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
TextModel
.
HeadDim
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
TextModel
.
SlidingWindow
kv
[
"gemma3.text.final_logit_softcapping"
]
=
p
.
TextModel
.
FinalLogitSoftcap
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
TextModel
.
RopeLocalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
TextModel
.
RopeGlobalTheta
kv
[
"gemma3.vision.block_count"
]
=
p
.
VisionModel
.
NumHiddenLayers
kv
[
"gemma3.vision.embedding_length"
]
=
p
.
VisionModel
.
HiddenSize
kv
[
"gemma3.vision.feed_forward_length"
]
=
p
.
VisionModel
.
IntermediateSize
...
...
@@ -58,6 +64,11 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
kv
[
"gemma3.vision.num_channels"
]
=
p
.
VisionModel
.
NumChannels
kv
[
"gemma3.vision.attention.head_count"
]
=
p
.
VisionModel
.
NumAttentionHeads
kv
[
"gemma3.vision.attention.layer_norm_epsilon"
]
=
p
.
VisionModel
.
LayerNormEpsilon
}
kv
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
2
)
kv
[
"tokenizer.ggml.eot_token_id"
]
=
uint32
(
1
)
return
kv
}
...
...
model/models/gemma3/model_text.go
View file @
c62861f4
...
...
@@ -32,6 +32,7 @@ type TextModel struct {
}
const
(
gemmaGlobalCacheCount
=
6
gemma27BLayerCount
=
46
)
...
...
@@ -55,15 +56,15 @@ func newTextModel(c ml.Config) *TextModel {
Layers
:
make
([]
TextLayer
,
c
.
Uint
(
"block_count"
)),
TextOptions
:
&
TextOptions
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
)),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
,
8
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
,
4
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
,
256
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
,
256
)),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
,
1e-06
),
ropeLocalBase
:
c
.
Float
(
"text.rope.local.freq_base"
,
10000.0
),
ropeGlobalBase
:
c
.
Float
(
"text.rope.global.freq_base"
,
1000000.0
),
ropeScale
:
c
.
Float
(
"text.rope.freq_scale"
,
1.0
),
finalLogitSoftcap
:
c
.
Float
(
"text.final_logit_softcapping"
),
finalLogitSoftcap
:
c
.
Float
(
"text.final_logit_softcapping"
,
30.0
),
},
}
...
...
@@ -84,7 +85,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
ropeType
:=
uint32
(
2
)
ropeBase
:=
opts
.
ropeLocalBase
if
(
layer
+
1
)
%
6
==
0
{
if
(
layer
+
1
)
%
gemmaGlobalCacheCount
==
0
{
ropeBase
=
opts
.
ropeGlobalBase
}
...
...
@@ -116,7 +117,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
func
(
m
*
TextModel
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
ropeBase
:=
m
.
TextOptions
.
ropeLocalBase
if
(
layer
+
1
)
%
6
==
0
{
if
(
layer
+
1
)
%
gemmaGlobalCacheCount
==
0
{
ropeBase
=
m
.
TextOptions
.
ropeGlobalBase
}
...
...
@@ -184,7 +185,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
// gemma alternates between the sliding window (local) and causal (global)
// kv cache every 6 layers
cacheType
:=
cacheTypeSWA
if
(
i
+
1
)
%
6
==
0
{
if
(
i
+
1
)
%
gemmaGlobalCacheCount
==
0
{
cacheType
=
cacheTypeCausal
}
cache
.
SetLayer
(
i
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment