Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c62861f4
"vscode:/vscode.git/clone" did not exist on "f2170e242cb3995ae4e7b5efedb6f2a5658445f3"
Commit
c62861f4
authored
Mar 07, 2025
by
Patrick Devine
Committed by
Michael Yang
Mar 11, 2025
Browse files
fix conversion
parent
0df18004
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
42 deletions
+57
-42
convert/convert.go
convert/convert.go
+5
-2
convert/convert_gemma3.go
convert/convert_gemma3.go
+41
-30
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+11
-10
No files found.
convert/convert.go
View file @
c62861f4
...
@@ -190,8 +190,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
...
@@ -190,8 +190,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
conv
=
&
gemmaModel
{}
conv
=
&
gemmaModel
{}
case
"Gemma2ForCausalLM"
:
case
"Gemma2ForCausalLM"
:
conv
=
&
gemma2Model
{}
conv
=
&
gemma2Model
{}
case
"Gemma3ForConditionalGeneration"
:
case
"Gemma3ForCausalLM"
,
"Gemma3ForConditionalGeneration"
:
conv
=
&
gemma3Model
{}
conv
=
&
gemma3Model
{
Architecture
:
p
.
Architectures
[
0
]
}
case
"Phi3ForCausalLM"
:
case
"Phi3ForCausalLM"
:
conv
=
&
phi3Model
{}
conv
=
&
phi3Model
{}
case
"Qwen2ForCausalLM"
:
case
"Qwen2ForCausalLM"
:
...
@@ -226,6 +226,9 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
...
@@ -226,6 +226,9 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
}
}
switch
{
switch
{
case
vocabSize
==
0
:
slog
.
Warn
(
"vocabulary size was not explicitly set by the model"
,
"default size"
,
len
(
t
.
Vocabulary
.
Tokens
))
vocabSize
=
len
(
t
.
Vocabulary
.
Tokens
)
case
vocabSize
>
len
(
t
.
Vocabulary
.
Tokens
)
:
case
vocabSize
>
len
(
t
.
Vocabulary
.
Tokens
)
:
slog
.
Warn
(
"vocabulary is smaller than expected, padding with dummy tokens"
,
"expect"
,
vocabSize
,
"actual"
,
len
(
t
.
Vocabulary
.
Tokens
))
slog
.
Warn
(
"vocabulary is smaller than expected, padding with dummy tokens"
,
"expect"
,
vocabSize
,
"actual"
,
len
(
t
.
Vocabulary
.
Tokens
))
for
i
:=
range
vocabSize
-
len
(
t
.
Vocabulary
.
Tokens
)
{
for
i
:=
range
vocabSize
-
len
(
t
.
Vocabulary
.
Tokens
)
{
...
...
convert/convert_gemma3.go
View file @
c62861f4
...
@@ -4,7 +4,13 @@ import "github.com/ollama/ollama/fs/ggml"
...
@@ -4,7 +4,13 @@ import "github.com/ollama/ollama/fs/ggml"
type
gemma3Model
struct
{
type
gemma3Model
struct
{
gemmaModel
gemmaModel
TextModel
gemma3TextModel
`json:"text_config"`
Architecture
string
TextModel
struct
{
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
IntermediateSize
uint32
`json:"intermediate_size"`
SlidingWindow
uint32
`json:"sliding_window"`
}
`json:"text_config"`
VisionModel
struct
{
VisionModel
struct
{
NumAttentionHeads
uint32
`json:"num_attention_heads"`
// attention.head_count 16
NumAttentionHeads
uint32
`json:"num_attention_heads"`
// attention.head_count 16
LayerNormEpsilon
float32
`json:"layer_norm_eps"`
// attention.layer_norm_epsilon 1e-05
LayerNormEpsilon
float32
`json:"layer_norm_eps"`
// attention.layer_norm_epsilon 1e-05
...
@@ -15,41 +21,41 @@ type gemma3Model struct {
...
@@ -15,41 +21,41 @@ type gemma3Model struct {
NumChannels
uint32
`json:"num_channels"`
// num_channels 3
NumChannels
uint32
`json:"num_channels"`
// num_channels 3
PatchSize
uint32
`json:"patch_size"`
// patch_size 14
PatchSize
uint32
`json:"patch_size"`
// patch_size 14
}
`json:"vision_config"`
}
`json:"vision_config"`
}
type
gemma3TextModel
struct
{
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
HiddenSize
uint32
`json:"hidden_size"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
IntermediateSize
uint32
`json:"intermediate_size"`
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumAttentionHeads
uint32
`json:"num_attention_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
HeadDim
uint32
`json:"head_dim"`
HeadDim
uint32
`json:"head_dim"`
SlidingWindow
uint32
`json:"sliding_window"`
AttentionLogitSoftcap
float32
`json:"attn_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
FinalLogitSoftcap
float32
`json:"final_logit_softcapping"`
RopeLocalTheta
float32
`json:"rope_local_base_freq"`
RopeLocalTheta
float32
`json:"rope_local_base_freq"`
RopeGlobalTheta
float32
`json:"rope_global_base_freq"`
RopeGlobalTheta
float32
`json:"rope_global_base_freq"`
SlidingWindow
uint32
`json:"sliding_window"`
}
}
func
(
p
*
gemma3Model
)
KV
(
t
*
Tokenizer
)
ggml
.
KV
{
func
(
p
*
gemma3Model
)
KV
(
t
*
Tokenizer
)
ggml
.
KV
{
kv
:=
p
.
ModelParameters
.
KV
(
t
)
kv
:=
p
.
ModelParameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma3"
kv
[
"general.architecture"
]
=
"gemma3"
kv
[
"gemma3.context_length"
]
=
p
.
TextModel
.
MaxPositionEmbeddings
switch
p
.
Architecture
{
case
"Gemma3ForCausalLM"
:
kv
[
"gemma3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma3.attention.head_count"
]
=
p
.
NumAttentionHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
p
.
NumKeyValueHeads
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
RMSNormEPS
kv
[
"gemma3.attention.key_length"
]
=
p
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
HeadDim
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
SlidingWindow
kv
[
"gemma3.text.final_logit_softcapping"
]
=
p
.
FinalLogitSoftcap
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
RopeLocalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
RopeGlobalTheta
kv
[
"gemma3.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
IntermediateSize
default
:
kv
[
"gemma3.embedding_length"
]
=
p
.
TextModel
.
HiddenSize
kv
[
"gemma3.embedding_length"
]
=
p
.
TextModel
.
HiddenSize
kv
[
"gemma3.block_count"
]
=
p
.
TextModel
.
HiddenLayers
kv
[
"gemma3.block_count"
]
=
p
.
TextModel
.
HiddenLayers
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
TextModel
.
IntermediateSize
kv
[
"gemma3.text.feed_forward_length"
]
=
p
.
TextModel
.
IntermediateSize
kv
[
"gemma3.attention.head_count"
]
=
p
.
TextModel
.
NumAttentionHeads
kv
[
"gemma3.attention.head_count_kv"
]
=
p
.
TextModel
.
NumKeyValueHeads
kv
[
"gemma3.text.attention.layer_norm_rms_epsilon"
]
=
p
.
TextModel
.
RMSNormEPS
kv
[
"gemma3.attention.key_length"
]
=
p
.
TextModel
.
HeadDim
kv
[
"gemma3.attention.value_length"
]
=
p
.
TextModel
.
HeadDim
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
TextModel
.
SlidingWindow
kv
[
"gemma3.text.attention.sliding_window"
]
=
p
.
TextModel
.
SlidingWindow
kv
[
"gemma3.text.final_logit_softcapping"
]
=
p
.
TextModel
.
FinalLogitSoftcap
kv
[
"gemma3.text.rope.local.freq_base"
]
=
p
.
TextModel
.
RopeLocalTheta
kv
[
"gemma3.text.rope.global.freq_base"
]
=
p
.
TextModel
.
RopeGlobalTheta
kv
[
"gemma3.vision.block_count"
]
=
p
.
VisionModel
.
NumHiddenLayers
kv
[
"gemma3.vision.block_count"
]
=
p
.
VisionModel
.
NumHiddenLayers
kv
[
"gemma3.vision.embedding_length"
]
=
p
.
VisionModel
.
HiddenSize
kv
[
"gemma3.vision.embedding_length"
]
=
p
.
VisionModel
.
HiddenSize
kv
[
"gemma3.vision.feed_forward_length"
]
=
p
.
VisionModel
.
IntermediateSize
kv
[
"gemma3.vision.feed_forward_length"
]
=
p
.
VisionModel
.
IntermediateSize
...
@@ -58,6 +64,11 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
...
@@ -58,6 +64,11 @@ func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
kv
[
"gemma3.vision.num_channels"
]
=
p
.
VisionModel
.
NumChannels
kv
[
"gemma3.vision.num_channels"
]
=
p
.
VisionModel
.
NumChannels
kv
[
"gemma3.vision.attention.head_count"
]
=
p
.
VisionModel
.
NumAttentionHeads
kv
[
"gemma3.vision.attention.head_count"
]
=
p
.
VisionModel
.
NumAttentionHeads
kv
[
"gemma3.vision.attention.layer_norm_epsilon"
]
=
p
.
VisionModel
.
LayerNormEpsilon
kv
[
"gemma3.vision.attention.layer_norm_epsilon"
]
=
p
.
VisionModel
.
LayerNormEpsilon
}
kv
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
2
)
kv
[
"tokenizer.ggml.eot_token_id"
]
=
uint32
(
1
)
return
kv
return
kv
}
}
...
...
model/models/gemma3/model_text.go
View file @
c62861f4
...
@@ -32,6 +32,7 @@ type TextModel struct {
...
@@ -32,6 +32,7 @@ type TextModel struct {
}
}
const
(
const
(
gemmaGlobalCacheCount
=
6
gemma27BLayerCount
=
46
gemma27BLayerCount
=
46
)
)
...
@@ -55,15 +56,15 @@ func newTextModel(c ml.Config) *TextModel {
...
@@ -55,15 +56,15 @@ func newTextModel(c ml.Config) *TextModel {
Layers
:
make
([]
TextLayer
,
c
.
Uint
(
"block_count"
)),
Layers
:
make
([]
TextLayer
,
c
.
Uint
(
"block_count"
)),
TextOptions
:
&
TextOptions
{
TextOptions
:
&
TextOptions
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
)),
numHeads
:
int
(
c
.
Uint
(
"attention.head_count"
,
8
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
)),
numKVHeads
:
int
(
c
.
Uint
(
"attention.head_count_kv"
,
4
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
)),
attnKeyLen
:
int
(
c
.
Uint
(
"attention.key_length"
,
256
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
)),
attnValLen
:
int
(
c
.
Uint
(
"attention.value_length"
,
256
)),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
),
eps
:
c
.
Float
(
"text.attention.layer_norm_rms_epsilon"
,
1e-06
),
ropeLocalBase
:
c
.
Float
(
"text.rope.local.freq_base"
,
10000.0
),
ropeLocalBase
:
c
.
Float
(
"text.rope.local.freq_base"
,
10000.0
),
ropeGlobalBase
:
c
.
Float
(
"text.rope.global.freq_base"
,
1000000.0
),
ropeGlobalBase
:
c
.
Float
(
"text.rope.global.freq_base"
,
1000000.0
),
ropeScale
:
c
.
Float
(
"text.rope.freq_scale"
,
1.0
),
ropeScale
:
c
.
Float
(
"text.rope.freq_scale"
,
1.0
),
finalLogitSoftcap
:
c
.
Float
(
"text.final_logit_softcapping"
),
finalLogitSoftcap
:
c
.
Float
(
"text.final_logit_softcapping"
,
30.0
),
},
},
}
}
...
@@ -84,7 +85,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
...
@@ -84,7 +85,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
ropeType
:=
uint32
(
2
)
ropeType
:=
uint32
(
2
)
ropeBase
:=
opts
.
ropeLocalBase
ropeBase
:=
opts
.
ropeLocalBase
if
(
layer
+
1
)
%
6
==
0
{
if
(
layer
+
1
)
%
gemmaGlobalCacheCount
==
0
{
ropeBase
=
opts
.
ropeGlobalBase
ropeBase
=
opts
.
ropeGlobalBase
}
}
...
@@ -116,7 +117,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
...
@@ -116,7 +117,7 @@ func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, pos
func
(
m
*
TextModel
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
func
(
m
*
TextModel
)
Shift
(
ctx
ml
.
Context
,
layer
int
,
key
,
shift
ml
.
Tensor
)
(
ml
.
Tensor
,
error
)
{
ropeBase
:=
m
.
TextOptions
.
ropeLocalBase
ropeBase
:=
m
.
TextOptions
.
ropeLocalBase
if
(
layer
+
1
)
%
6
==
0
{
if
(
layer
+
1
)
%
gemmaGlobalCacheCount
==
0
{
ropeBase
=
m
.
TextOptions
.
ropeGlobalBase
ropeBase
=
m
.
TextOptions
.
ropeGlobalBase
}
}
...
@@ -184,7 +185,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
...
@@ -184,7 +185,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
// gemma alternates between the sliding window (local) and causal (global)
// gemma alternates between the sliding window (local) and causal (global)
// kv cache every 6 layers
// kv cache every 6 layers
cacheType
:=
cacheTypeSWA
cacheType
:=
cacheTypeSWA
if
(
i
+
1
)
%
6
==
0
{
if
(
i
+
1
)
%
gemmaGlobalCacheCount
==
0
{
cacheType
=
cacheTypeCausal
cacheType
=
cacheTypeCausal
}
}
cache
.
SetLayer
(
i
)
cache
.
SetLayer
(
i
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment