"research/neural_programmer/neural_programmer.py" did not exist on "2fa6057af2bba62e434fc3205827ba585971d180"
convert_gemma3.go 6.92 KB
Newer Older
Patrick Devine's avatar
Patrick Devine committed
1
2
package convert

Michael Yang's avatar
Michael Yang committed
3
4
import (
	"cmp"
5
	"slices"
Michael Yang's avatar
Michael Yang committed
6
7
8

	"github.com/ollama/ollama/fs/ggml"
)
Patrick Devine's avatar
Patrick Devine committed
9
10
11

type gemma3Model struct {
	gemmaModel
Patrick Devine's avatar
Patrick Devine committed
12
13
	Architecture string
	TextModel    struct {
Patrick Devine's avatar
Patrick Devine committed
14
		HeadDim          uint32 `json:"head_dim"`
Patrick Devine's avatar
Patrick Devine committed
15
16
17
18
19
		HiddenSize       uint32 `json:"hidden_size"`
		HiddenLayers     uint32 `json:"num_hidden_layers"`
		IntermediateSize uint32 `json:"intermediate_size"`
		SlidingWindow    uint32 `json:"sliding_window"`
	} `json:"text_config"`
Michael Yang's avatar
Michael Yang committed
20
21
22
23
24
25
26
27
28
29
	VisionModel struct {
		NumAttentionHeads uint32  `json:"num_attention_heads"` // attention.head_count 16
		LayerNormEpsilon  float32 `json:"layer_norm_eps"`      // attention.layer_norm_epsilon 1e-05
		NumHiddenLayers   uint32  `json:"num_hidden_layers"`   // block_count 32
		HiddenSize        uint32  `json:"hidden_size"`         // embedding_length 1280
		IntermediateSize  uint32  `json:"intermediate_size"`   // feed_forward_length 5120
		ImageSize         uint32  `json:"image_size"`          // image_size 560
		NumChannels       uint32  `json:"num_channels"`        // num_channels 3
		PatchSize         uint32  `json:"patch_size"`          // patch_size 14
	} `json:"vision_config"`
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
	MaxPositionEmbeddings    uint32   `json:"max_position_embeddings"`
	NumAttentionHeads        uint32   `json:"num_attention_heads"`
	NumKeyValueHeads         uint32   `json:"num_key_value_heads"`
	RMSNormEPS               float32  `json:"rms_norm_eps"`
	HeadDim                  uint32   `json:"head_dim"`
	FinalLogitSoftcap        float32  `json:"final_logit_softcapping"`
	RopeLocalTheta           float32  `json:"rope_local_base_freq"`
	RopeTheta                float32  `json:"rope_theta"`
	SlidingWindow            uint32   `json:"sliding_window"`
	SlidingWindowPattern     *uint32  `json:"sliding_window_pattern"`
	LayerTypes               []string `json:"layer_types"`
	MultiModalTokensPerImage uint32   `json:"mm_tokens_per_image"`
	RopeScaling              *struct {
		Type                          string  `json:"rope_type"`
		Factor                        float32 `json:"factor"`
		OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
		ExtrapolationFactor           float32 `json:"extrapolation_factor"`
		BetaFast                      float32 `json:"beta_fast"`
		BetaSlow                      float32 `json:"beta_slow"`
	} `json:"rope_scaling"`
Patrick Devine's avatar
Patrick Devine committed
50
51
}

Patrick Devine's avatar
Patrick Devine committed
52
53
54
55
56
57
const (
	gemma4BLayerCount  = 34
	gemma12BLayerCount = 48
	gemma27BLayerCount = 62
)

Patrick Devine's avatar
Patrick Devine committed
58
59
60
func (p *gemma3Model) KV(t *Tokenizer) ggml.KV {
	kv := p.ModelParameters.KV(t)
	kv["general.architecture"] = "gemma3"
Michael Yang's avatar
Michael Yang committed
61

Patrick Devine's avatar
Patrick Devine committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
	numBlocks := cmp.Or(p.HiddenLayers, p.TextModel.HiddenLayers)
	kv["gemma3.block_count"] = numBlocks

	var (
		numHeads   uint32
		numKVHeads uint32
	)

	switch numBlocks {
	case gemma4BLayerCount:
		numHeads = 8
		numKVHeads = 4
	case gemma12BLayerCount:
		numHeads = 16
		numKVHeads = 8
	case gemma27BLayerCount:
		numHeads = 32
		numKVHeads = 16
	default:
		numHeads = p.NumAttentionHeads
		numKVHeads = p.NumKeyValueHeads
	}

	kv["gemma3.attention.head_count"] = numHeads
	kv["gemma3.attention.head_count_kv"] = numKVHeads

Patrick Devine's avatar
Patrick Devine committed
88
89
90
	switch p.Architecture {
	case "Gemma3ForCausalLM":
		kv["gemma3.context_length"] = p.MaxPositionEmbeddings
Michael Yang's avatar
Michael Yang committed
91
		kv["gemma3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
Patrick Devine's avatar
Patrick Devine committed
92
93
		kv["gemma3.attention.key_length"] = p.HeadDim
		kv["gemma3.attention.value_length"] = p.HeadDim
Michael Yang's avatar
Michael Yang committed
94
		kv["gemma3.attention.sliding_window"] = p.SlidingWindow
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

		// The sliding window pattern is either provided as the sliding_window_pattern
		// key (an int) or as the layer_types key (a list of strings).
		if p.SlidingWindowPattern != nil || len(p.LayerTypes) > 0 {
			kv["gemma3.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
				for i := range numBlocks {
					var isLocal bool
					if len(p.LayerTypes) > 0 && int(i) < len(p.LayerTypes) {
						isLocal = p.LayerTypes[i] == "sliding_attention"
					} else if p.SlidingWindowPattern != nil && *p.SlidingWindowPattern > 0 {
						isLocal = (i+1)%*p.SlidingWindowPattern != 0
					}
					if !yield(isLocal) {
						break
					}
				}
			})
		}
		if p.FinalLogitSoftcap > 0 {
			kv["gemma3.final_logit_softcapping"] = p.FinalLogitSoftcap
		}
Patrick Devine's avatar
Patrick Devine committed
116
		kv["gemma3.rope.local.freq_base"] = cmp.Or(p.RopeLocalTheta, 10000.0)
117
118
119
120
121
122
123
124
125
126
		kv["gemma3.rope.freq_base"] = cmp.Or(p.RopeTheta, 1000000.0)
		if p.RopeScaling != nil && p.RopeScaling.Type == "yarn" && p.RopeScaling.Factor > 0 {
			kv["gemma3.rope.scaling.type"] = "yarn"
			kv["gemma3.rope.scaling.factor"] = p.RopeScaling.Factor
			kv["gemma3.rope.scaling.original_context_length"] = p.RopeScaling.OriginalMaxPositionEmbeddings
			kv["gemma3.rope.scaling.extrapolation_factor"] = cmp.Or(p.RopeScaling.ExtrapolationFactor, float32(1.0))
			kv["gemma3.rope.scaling.beta_fast"] = cmp.Or(p.RopeScaling.BetaFast, float32(64.0))
			kv["gemma3.rope.scaling.beta_slow"] = cmp.Or(p.RopeScaling.BetaSlow, float32(1.0))
		}

Patrick Devine's avatar
Patrick Devine committed
127
		kv["gemma3.embedding_length"] = p.HiddenSize
Michael Yang's avatar
Michael Yang committed
128
		kv["gemma3.feed_forward_length"] = p.IntermediateSize
Patrick Devine's avatar
Patrick Devine committed
129
	default:
130
		kv["gemma3.context_length"] = cmp.Or(p.MaxPositionEmbeddings, 131072)
Patrick Devine's avatar
Patrick Devine committed
131
		kv["gemma3.embedding_length"] = p.TextModel.HiddenSize
Michael Yang's avatar
Michael Yang committed
132
133
		kv["gemma3.feed_forward_length"] = p.TextModel.IntermediateSize
		kv["gemma3.attention.sliding_window"] = p.TextModel.SlidingWindow
Patrick Devine's avatar
Patrick Devine committed
134
135
136
137
138
		kv["gemma3.vision.block_count"] = p.VisionModel.NumHiddenLayers
		kv["gemma3.vision.embedding_length"] = p.VisionModel.HiddenSize
		kv["gemma3.vision.feed_forward_length"] = p.VisionModel.IntermediateSize
		kv["gemma3.vision.image_size"] = p.VisionModel.ImageSize
		kv["gemma3.vision.patch_size"] = p.VisionModel.PatchSize
Michael Yang's avatar
Michael Yang committed
139
		kv["gemma3.vision.num_channels"] = cmp.Or(p.VisionModel.NumChannels, 3)
Patrick Devine's avatar
Patrick Devine committed
140
		kv["gemma3.vision.attention.head_count"] = p.VisionModel.NumAttentionHeads
Michael Yang's avatar
Michael Yang committed
141
		kv["gemma3.vision.attention.layer_norm_epsilon"] = cmp.Or(p.VisionModel.LayerNormEpsilon, 1e-6)
Patrick Devine's avatar
Patrick Devine committed
142
143
		kv["gemma3.attention.key_length"] = cmp.Or(p.TextModel.HeadDim, 256)
		kv["gemma3.attention.value_length"] = cmp.Or(p.TextModel.HeadDim, 256)
Patrick Devine's avatar
Patrick Devine committed
144
145
	}

Michael Yang's avatar
Michael Yang committed
146
147
148
149
	if p.MultiModalTokensPerImage > 0 {
		kv["gemma3.mm.tokens_per_image"] = p.MultiModalTokensPerImage
	}

Patrick Devine's avatar
Patrick Devine committed
150
151
152
153
154
155
156
157
	return kv
}

func (p *gemma3Model) Replacements() []string {
	return []string{
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.norm", "output_norm",
Michael Yang's avatar
Michael Yang committed
158
159
		"vision_tower.vision_model.embeddings", "v",
		"vision_tower.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
160
161
		"vision_model.vision_model.embeddings", "v",
		"vision_model.vision_model", "v",
Patrick Devine's avatar
Patrick Devine committed
162
163
164
165
166
167
168
169
170
171
		"language_model.", "",
		"model.layers", "blk",
		"encoder.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.q_proj", "attn_q",
		"self_attn.q_norm", "attn_q_norm",
		"self_attn.k_proj", "attn_k",
		"self_attn.k_norm", "attn_k_norm",
		"self_attn.v_proj", "attn_v",
		"self_attn.o_proj", "attn_output",
Michael Yang's avatar
Michael Yang committed
172
		"self_attn.out_proj", "attn_output",
Patrick Devine's avatar
Patrick Devine committed
173
174
175
176
177
178
		"mlp.gate_proj", "ffn_gate",
		"mlp.down_proj", "ffn_down",
		"mlp.up_proj", "ffn_up",
		"post_attention_layernorm", "post_attention_norm",
		"pre_feedforward_layernorm", "ffn_norm",
		"post_feedforward_layernorm", "post_ffw_norm",
Michael Yang's avatar
Michael Yang committed
179
180
		"input_projection_weight", "input_projection.weight",
		"multi_modal_projector", "mm",
Patrick Devine's avatar
Patrick Devine committed
181
182
	}
}