convert_gemma3.go 4.93 KB
Newer Older
Patrick Devine's avatar
Patrick Devine committed
1
2
package convert

Michael Yang's avatar
Michael Yang committed
3
4
5
6
7
import (
	"cmp"

	"github.com/ollama/ollama/fs/ggml"
)
Patrick Devine's avatar
Patrick Devine committed
8
9
10

type gemma3Model struct {
	gemmaModel
Patrick Devine's avatar
Patrick Devine committed
11
12
	Architecture string
	TextModel    struct {
Patrick Devine's avatar
Patrick Devine committed
13
		HeadDim          uint32 `json:"head_dim"`
Patrick Devine's avatar
Patrick Devine committed
14
15
16
17
18
		HiddenSize       uint32 `json:"hidden_size"`
		HiddenLayers     uint32 `json:"num_hidden_layers"`
		IntermediateSize uint32 `json:"intermediate_size"`
		SlidingWindow    uint32 `json:"sliding_window"`
	} `json:"text_config"`
Michael Yang's avatar
Michael Yang committed
19
20
21
22
23
24
25
26
27
28
	VisionModel struct {
		NumAttentionHeads uint32  `json:"num_attention_heads"` // attention.head_count 16
		LayerNormEpsilon  float32 `json:"layer_norm_eps"`      // attention.layer_norm_epsilon 1e-05
		NumHiddenLayers   uint32  `json:"num_hidden_layers"`   // block_count 32
		HiddenSize        uint32  `json:"hidden_size"`         // embedding_length 1280
		IntermediateSize  uint32  `json:"intermediate_size"`   // feed_forward_length 5120
		ImageSize         uint32  `json:"image_size"`          // image_size 560
		NumChannels       uint32  `json:"num_channels"`        // num_channels 3
		PatchSize         uint32  `json:"patch_size"`          // patch_size 14
	} `json:"vision_config"`
Patrick Devine's avatar
Patrick Devine committed
29
30
31
32
33
34
35
36
	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
	NumAttentionHeads     uint32  `json:"num_attention_heads"`
	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
	RMSNormEPS            float32 `json:"rms_norm_eps"`
	HeadDim               uint32  `json:"head_dim"`
	FinalLogitSoftcap     float32 `json:"final_logit_softcapping"`
	RopeLocalTheta        float32 `json:"rope_local_base_freq"`
	RopeGlobalTheta       float32 `json:"rope_global_base_freq"`
Patrick Devine's avatar
Patrick Devine committed
37
	SlidingWindow         uint32  `json:"sliding_window"`
Patrick Devine's avatar
Patrick Devine committed
38
39
}

Patrick Devine's avatar
Patrick Devine committed
40
41
42
43
44
45
const (
	gemma4BLayerCount  = 34
	gemma12BLayerCount = 48
	gemma27BLayerCount = 62
)

Patrick Devine's avatar
Patrick Devine committed
46
47
48
func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
	kv := p.ModelParameters.KV(t)
	kv["general.architecture"] = "gemma3"
Michael Yang's avatar
Michael Yang committed
49

Patrick Devine's avatar
Patrick Devine committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
	numBlocks := cmp.Or(p.HiddenLayers, p.TextModel.HiddenLayers)
	kv["gemma3.block_count"] = numBlocks

	var (
		numHeads   uint32
		numKVHeads uint32
	)

	switch numBlocks {
	case gemma4BLayerCount:
		numHeads = 8
		numKVHeads = 4
	case gemma12BLayerCount:
		numHeads = 16
		numKVHeads = 8
	case gemma27BLayerCount:
		numHeads = 32
		numKVHeads = 16
	default:
		numHeads = p.NumAttentionHeads
		numKVHeads = p.NumKeyValueHeads
	}

	kv["gemma3.attention.head_count"] = numHeads
	kv["gemma3.attention.head_count_kv"] = numKVHeads

Patrick Devine's avatar
Patrick Devine committed
76
77
78
	switch p.Architecture {
	case "Gemma3ForCausalLM":
		kv["gemma3.context_length"] = p.MaxPositionEmbeddings
Michael Yang's avatar
Michael Yang committed
79
		kv["gemma3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
Patrick Devine's avatar
Patrick Devine committed
80
81
		kv["gemma3.attention.key_length"] = p.HeadDim
		kv["gemma3.attention.value_length"] = p.HeadDim
Michael Yang's avatar
Michael Yang committed
82
		kv["gemma3.attention.sliding_window"] = p.SlidingWindow
Patrick Devine's avatar
Patrick Devine committed
83
84
85
		kv["gemma3.final_logit_softcapping"] = cmp.Or(p.FinalLogitSoftcap, 30)
		kv["gemma3.rope.local.freq_base"] = cmp.Or(p.RopeLocalTheta, 10000.0)
		kv["gemma3.rope.global.freq_base"] = cmp.Or(p.RopeGlobalTheta, 1000000.0)
Patrick Devine's avatar
Patrick Devine committed
86
		kv["gemma3.embedding_length"] = p.HiddenSize
Michael Yang's avatar
Michael Yang committed
87
		kv["gemma3.feed_forward_length"] = p.IntermediateSize
Patrick Devine's avatar
Patrick Devine committed
88
	default:
Patrick Devine's avatar
Patrick Devine committed
89
		kv["gemma3.context_length"] = cmp.Or(p.MaxPositionEmbeddings, 8192)
Patrick Devine's avatar
Patrick Devine committed
90
		kv["gemma3.embedding_length"] = p.TextModel.HiddenSize
Michael Yang's avatar
Michael Yang committed
91
92
		kv["gemma3.feed_forward_length"] = p.TextModel.IntermediateSize
		kv["gemma3.attention.sliding_window"] = p.TextModel.SlidingWindow
Patrick Devine's avatar
Patrick Devine committed
93
94
95
96
97
		kv["gemma3.vision.block_count"] = p.VisionModel.NumHiddenLayers
		kv["gemma3.vision.embedding_length"] = p.VisionModel.HiddenSize
		kv["gemma3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
		kv["gemma3.vision.image_size"] = p.VisionModel.ImageSize
		kv["gemma3.vision.patch_size"] = p.VisionModel.PatchSize
Michael Yang's avatar
Michael Yang committed
98
		kv["gemma3.vision.num_channels"] = cmp.Or(p.VisionModel.NumChannels, 3)
Patrick Devine's avatar
Patrick Devine committed
99
		kv["gemma3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
Michael Yang's avatar
Michael Yang committed
100
		kv["gemma3.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, 1e-6)
Patrick Devine's avatar
Patrick Devine committed
101
102
		kv["gemma3.attention.key_length"] = cmp.Or(p.TextModel.HeadDim, 256)
		kv["gemma3.attention.value_length"] = cmp.Or(p.TextModel.HeadDim, 256)
Patrick Devine's avatar
Patrick Devine committed
103
104
	}

Patrick Devine's avatar
Patrick Devine committed
105
106
107
108
109
110
111
112
	return kv
}

func (p *gemma3Model) Replacements() []string {
	return []string{
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.norm", "output_norm",
Michael Yang's avatar
Michael Yang committed
113
114
		"vision_tower.vision_model.embeddings", "v",
		"vision_tower.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
115
116
		"vision_model.vision_model.embeddings", "v",
		"vision_model.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
117
118
119
120
121
122
123
124
125
126
		"language_model.", "",
		"model.layers", "blk",
		"encoder.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.q_proj", "attn_q",
		"self_attn.q_norm", "attn_q_norm",
		"self_attn.k_proj", "attn_k",
		"self_attn.k_norm", "attn_k_norm",
		"self_attn.v_proj", "attn_v",
		"self_attn.o_proj", "attn_output",
Michael Yang's avatar
Michael Yang committed
127
		"self_attn.out_proj", "attn_output",
Patrick Devine's avatar
Patrick Devine committed
128
129
130
131
132
133
		"mlp.gate_proj", "ffn_gate",
		"mlp.down_proj", "ffn_down",
		"mlp.up_proj", "ffn_up",
		"post_attention_layernorm", "post_attention_norm",
		"pre_feedforward_layernorm", "ffn_norm",
		"post_feedforward_layernorm", "post_ffw_norm",
Michael Yang's avatar
Michael Yang committed
134
135
		"input_projection_weight", "input_projection.weight",
		"multi_modal_projector", "mm",
Patrick Devine's avatar
Patrick Devine committed
136
137
	}
}