convert_gemma3.go 4.09 KB
Newer Older
Patrick Devine's avatar
Patrick Devine committed
1
2
3
4
5
6
package convert

import "github.com/ollama/ollama/fs/ggml"

type gemma3Model struct {
	gemmaModel
Michael Yang's avatar
Michael Yang committed
7
8
9
10
11
12
13
14
15
16
17
	TextModel   gemma3TextModel `json:"text_config"`
	VisionModel struct {
		NumAttentionHeads uint32  `json:"num_attention_heads"` // attention.head_count 16
		LayerNormEpsilon  float32 `json:"layer_norm_eps"`      // attention.layer_norm_epsilon 1e-05
		NumHiddenLayers   uint32  `json:"num_hidden_layers"`   // block_count 32
		HiddenSize        uint32  `json:"hidden_size"`         // embedding_length 1280
		IntermediateSize  uint32  `json:"intermediate_size"`   // feed_forward_length 5120
		ImageSize         uint32  `json:"image_size"`          // image_size 560
		NumChannels       uint32  `json:"num_channels"`        // num_channels 3
		PatchSize         uint32  `json:"patch_size"`          // patch_size 14
	} `json:"vision_config"`
Patrick Devine's avatar
Patrick Devine committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
}

type gemma3TextModel struct {
	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
	HiddenSize            uint32  `json:"hidden_size"`
	HiddenLayers          uint32  `json:"num_hidden_layers"`
	IntermediateSize      uint32  `json:"intermediate_size"`
	NumAttentionHeads     uint32  `json:"num_attention_heads"`
	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
	RMSNormEPS            float32 `json:"rms_norm_eps"`
	HeadDim               uint32  `json:"head_dim"`
	SlidingWindow         uint32  `json:"sliding_window"`
	AttentionLogitSoftcap float32 `json:"attn_logit_softcapping"`
	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
	RopeLocalTheta        float32 `json:"rope_local_base_freq"`
	RopeGlobalTheta       float32 `json:"rope_global_base_freq"`
}

func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
	kv := p.ModelParameters.KV(t)
	kv["general.architecture"] = "gemma3"
	kv["gemma3.context_length"] = p.TextModel.MaxPositionEmbeddings
	kv["gemma3.embedding_length"] = p.TextModel.HiddenSize
	kv["gemma3.block_count"] = p.TextModel.HiddenLayers
	kv["gemma3.text.feed_forward_length"] = p.TextModel.IntermediateSize
	kv["gemma3.attention.head_count"] = p.TextModel.NumAttentionHeads
	kv["gemma3.attention.head_count_kv"] = p.TextModel.NumKeyValueHeads
	kv["gemma3.text.attention.layer_norm_rms_epsilon"] = p.TextModel.RMSNormEPS
	kv["gemma3.attention.key_length"] = p.TextModel.HeadDim
	kv["gemma3.attention.value_length"] = p.TextModel.HeadDim
	kv["gemma3.text.attention.sliding_window"] = p.TextModel.SlidingWindow
	kv["gemma3.text.final_logit_softcapping"] = p.TextModel.FinalLogitSoftcap
	kv["gemma3.text.rope.local.freq_base"] = p.TextModel.RopeLocalTheta
	kv["gemma3.text.rope.global.freq_base"] = p.TextModel.RopeGlobalTheta
Michael Yang's avatar
Michael Yang committed
52
53
54
55

	kv["gemma3.vision.block_count"] = p.VisionModel.NumHiddenLayers
	kv["gemma3.vision.embedding_length"] = p.VisionModel.HiddenSize
	kv["gemma3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
Patrick Devine's avatar
Patrick Devine committed
56
	kv["gemma3.vision.image_size"] = p.VisionModel.ImageSize
Michael Yang's avatar
Michael Yang committed
57
	kv["gemma3.vision.patch_size"] = p.VisionModel.PatchSize
Patrick Devine's avatar
Patrick Devine committed
58
	kv["gemma3.vision.num_channels"] = p.VisionModel.NumChannels
Michael Yang's avatar
Michael Yang committed
59
60
61
62
63
	kv["gemma3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
	kv["gemma3.vision.attention.layer_norm_epsilon"] = p.VisionModel.LayerNormEpsilon

	kv["tokenizer.ggml.bos_token_id"] = uint32(2)
	kv["tokenizer.ggml.eot_token_id"] = uint32(1)
Patrick Devine's avatar
Patrick Devine committed
64
65
66
67
68
69
70
71
	return kv
}

func (p *gemma3Model) Replacements() []string {
	return []string{
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.norm", "output_norm",
Michael Yang's avatar
Michael Yang committed
72
73
		"vision_tower.vision_model.embeddings", "v",
		"vision_tower.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
74
75
76
77
78
79
80
81
82
83
		"language_model.", "",
		"model.layers", "blk",
		"encoder.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.q_proj", "attn_q",
		"self_attn.q_norm", "attn_q_norm",
		"self_attn.k_proj", "attn_k",
		"self_attn.k_norm", "attn_k_norm",
		"self_attn.v_proj", "attn_v",
		"self_attn.o_proj", "attn_output",
Michael Yang's avatar
Michael Yang committed
84
		"self_attn.out_proj", "attn_output",
Patrick Devine's avatar
Patrick Devine committed
85
86
87
88
89
90
		"mlp.gate_proj", "ffn_gate",
		"mlp.down_proj", "ffn_down",
		"mlp.up_proj", "ffn_up",
		"post_attention_layernorm", "post_attention_norm",
		"pre_feedforward_layernorm", "ffn_norm",
		"post_feedforward_layernorm", "post_ffw_norm",
Michael Yang's avatar
Michael Yang committed
91
92
		"input_projection_weight", "input_projection.weight",
		"multi_modal_projector", "mm",
Patrick Devine's avatar
Patrick Devine committed
93
94
	}
}