convert_gemma3.go 4.3 KB
Newer Older
Patrick Devine's avatar
Patrick Devine committed
1
2
package convert

Michael Yang's avatar
Michael Yang committed
3
4
5
6
7
import (
	"cmp"

	"github.com/ollama/ollama/fs/ggml"
)
Patrick Devine's avatar
Patrick Devine committed
8
9
10

type gemma3Model struct {
	gemmaModel
Patrick Devine's avatar
Patrick Devine committed
11
12
13
14
15
16
17
	Architecture string
	TextModel    struct {
		HiddenSize       uint32 `json:"hidden_size"`
		HiddenLayers     uint32 `json:"num_hidden_layers"`
		IntermediateSize uint32 `json:"intermediate_size"`
		SlidingWindow    uint32 `json:"sliding_window"`
	} `json:"text_config"`
Michael Yang's avatar
Michael Yang committed
18
19
20
21
22
23
24
25
26
27
	VisionModel struct {
		NumAttentionHeads uint32  `json:"num_attention_heads"` // attention.head_count 16
		LayerNormEpsilon  float32 `json:"layer_norm_eps"`      // attention.layer_norm_epsilon 1e-05
		NumHiddenLayers   uint32  `json:"num_hidden_layers"`   // block_count 32
		HiddenSize        uint32  `json:"hidden_size"`         // embedding_length 1280
		IntermediateSize  uint32  `json:"intermediate_size"`   // feed_forward_length 5120
		ImageSize         uint32  `json:"image_size"`          // image_size 560
		NumChannels       uint32  `json:"num_channels"`        // num_channels 3
		PatchSize         uint32  `json:"patch_size"`          // patch_size 14
	} `json:"vision_config"`
Patrick Devine's avatar
Patrick Devine committed
28
29
30
31
32
33
34
35
	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
	NumAttentionHeads     uint32  `json:"num_attention_heads"`
	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
	RMSNormEPS            float32 `json:"rms_norm_eps"`
	HeadDim               uint32  `json:"head_dim"`
	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
	RopeLocalTheta        float32 `json:"rope_local_base_freq"`
	RopeGlobalTheta       float32 `json:"rope_global_base_freq"`
Patrick Devine's avatar
Patrick Devine committed
36
	SlidingWindow         uint32  `json:"sliding_window"`
Patrick Devine's avatar
Patrick Devine committed
37
38
39
40
41
}

func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
	kv := p.ModelParameters.KV(t)
	kv["general.architecture"] = "gemma3"
Michael Yang's avatar
Michael Yang committed
42

Patrick Devine's avatar
Patrick Devine committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
	switch p.Architecture {
	case "Gemma3ForCausalLM":
		kv["gemma3.context_length"] = p.MaxPositionEmbeddings
		kv["gemma3.attention.head_count"] = p.NumAttentionHeads
		kv["gemma3.attention.head_count_kv"] = p.NumKeyValueHeads
		kv["gemma3.text.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
		kv["gemma3.attention.key_length"] = p.HeadDim
		kv["gemma3.attention.value_length"] = p.HeadDim
		kv["gemma3.text.attention.sliding_window"] = p.SlidingWindow
		kv["gemma3.text.final_logit_softcapping"] = p.FinalLogitSoftcap
		kv["gemma3.text.rope.local.freq_base"] = p.RopeLocalTheta
		kv["gemma3.text.rope.global.freq_base"] = p.RopeGlobalTheta
		kv["gemma3.embedding_length"] = p.HiddenSize
		kv["gemma3.block_count"] = p.HiddenLayers
		kv["gemma3.text.feed_forward_length"] = p.IntermediateSize
	default:
		kv["gemma3.embedding_length"] = p.TextModel.HiddenSize
		kv["gemma3.block_count"] = p.TextModel.HiddenLayers
		kv["gemma3.text.feed_forward_length"] = p.TextModel.IntermediateSize
		kv["gemma3.text.attention.sliding_window"] = p.TextModel.SlidingWindow
		kv["gemma3.vision.block_count"] = p.VisionModel.NumHiddenLayers
		kv["gemma3.vision.embedding_length"] = p.VisionModel.HiddenSize
		kv["gemma3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
		kv["gemma3.vision.image_size"] = p.VisionModel.ImageSize
		kv["gemma3.vision.patch_size"] = p.VisionModel.PatchSize
Michael Yang's avatar
Michael Yang committed
68
		kv["gemma3.vision.num_channels"] = cmp.Or(p.VisionModel.NumChannels, 3)
Patrick Devine's avatar
Patrick Devine committed
69
		kv["gemma3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
Michael Yang's avatar
Michael Yang committed
70
		kv["gemma3.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, 1e-6)
Patrick Devine's avatar
Patrick Devine committed
71
72
73
74
75
	}

	kv["tokenizer.ggml.bos_token_id"] = uint32(2)
	kv["tokenizer.ggml.eot_token_id"] = uint32(1)

Patrick Devine's avatar
Patrick Devine committed
76
77
78
79
80
81
82
83
	return kv
}

func (p *gemma3Model) Replacements() []string {
	return []string{
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.norm", "output_norm",
Michael Yang's avatar
Michael Yang committed
84
85
		"vision_tower.vision_model.embeddings", "v",
		"vision_tower.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
86
87
88
89
90
91
92
93
94
95
		"language_model.", "",
		"model.layers", "blk",
		"encoder.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.q_proj", "attn_q",
		"self_attn.q_norm", "attn_q_norm",
		"self_attn.k_proj", "attn_k",
		"self_attn.k_norm", "attn_k_norm",
		"self_attn.v_proj", "attn_v",
		"self_attn.o_proj", "attn_output",
Michael Yang's avatar
Michael Yang committed
96
		"self_attn.out_proj", "attn_output",
Patrick Devine's avatar
Patrick Devine committed
97
98
99
100
101
102
		"mlp.gate_proj", "ffn_gate",
		"mlp.down_proj", "ffn_down",
		"mlp.up_proj", "ffn_up",
		"post_attention_layernorm", "post_attention_norm",
		"pre_feedforward_layernorm", "ffn_norm",
		"post_feedforward_layernorm", "post_ffw_norm",
Michael Yang's avatar
Michael Yang committed
103
104
		"input_projection_weight", "input_projection.weight",
		"multi_modal_projector", "mm",
Patrick Devine's avatar
Patrick Devine committed
105
106
	}
}