convert_qwen3.go 4.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package convert

import (
	"slices"
	"strings"

	"github.com/ollama/ollama/fs/ggml"
	"github.com/pdevine/tensor"
	"github.com/pdevine/tensor/native"
)

type qwen3Model struct {
	ModelParameters
	MaxPositionEmbeddings uint32  `json:"max_position_embeddings"`
	HiddenSize            uint32  `json:"hidden_size"`
	HiddenLayers          uint32  `json:"num_hidden_layers"`
	IntermediateSize      uint32  `json:"intermediate_size"`
	NumAttentionHeads     uint32  `json:"num_attention_heads"`
	NumKeyValueHeads      uint32  `json:"num_key_value_heads"`
	HeadDim               uint32  `json:"head_dim"`
	NumExperts            uint32  `json:"num_experts"`
	NumExpertsPerToken    uint32  `json:"num_experts_per_tok"`
	NormTopkProb          bool    `json:"norm_topk_prob"`
	RopeTheta             float32 `json:"rope_theta"`
	RopeScaling           struct {
		Type                          string     `json:"type"`
		Factor                        ropeFactor `json:"factor"`
		OriginalMaxPositionEmbeddings uint32     `json:"original_max_position_embeddings"`
		MropeSection                  []int32    `json:"mrope_section"`
	} `json:"rope_scaling"`
	RMSNormEPS float32 `json:"rms_norm_eps"`
}

// KV implements ModelConverter.
35
func (q *qwen3Model) KV(t *Tokenizer) KV {
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
	arch := "qwen3"
	if q.NumExperts > 0 {
		arch += "moe"
	}

	kv := q.ModelParameters.KV(t)
	kv["general.architecture"] = arch
	kv["block_count"] = q.HiddenLayers
	kv["context_length"] = q.MaxPositionEmbeddings
	kv["embedding_length"] = q.HiddenSize
	kv["feed_forward_length"] = q.IntermediateSize
	kv["attention.head_count"] = q.NumAttentionHeads
	kv["attention.head_count_kv"] = q.NumKeyValueHeads
	kv["attention.key_length"] = q.HeadDim
	kv["attention.value_length"] = q.HeadDim

	if q.NumExperts > 0 {
		kv["expert_count"] = q.NumExperts
		kv["expert_used_count"] = q.NumExpertsPerToken
		kv["norm_top_k_prob"] = q.NormTopkProb
	}

	kv["rope.freq_base"] = q.RopeTheta
	kv["attention.layer_norm_rms_epsilon"] = q.RMSNormEPS

	switch q.RopeScaling.Type {
	case "":
		// no scaling
	case "yarn":
		kv["rope.scaling.type"] = q.RopeScaling.Type
		kv["rope.scaling.factor"] = q.RopeScaling.Factor
	case "mrope", "default":
		kv["rope.mrope_section"] = q.RopeScaling.MropeSection
	default:
		panic("unknown rope scaling type")
	}
	return kv
}

// Tensors implements ModelConverter.
func (q *qwen3Model) Tensors(ts []Tensor) []*ggml.Tensor {
	var out []*ggml.Tensor

	// TODO: handle split experts

	for _, t := range ts {
		switch {
		case strings.Contains(t.Name(), "ffn_gate_up_exps"):
			afterFunc := func(t tensor.Tensor) (tensor.Tensor, error) { return tensor.Transpose(t, 0, 2, 1) }
			for t := range splitDim(t, 2,
				split{Replacer: strings.NewReplacer("gate_up", "gate"), afterFunc: afterFunc},
				split{Replacer: strings.NewReplacer("gate_up", "up"), afterFunc: afterFunc},
			) {
				t.Shape[1], t.Shape[2] = t.Shape[2], t.Shape[1]
				out = append(out, t)
			}
		case strings.Contains(t.Name(), "ffn_down_exps"):
			shape := slices.Clone(t.Shape())
			shape[1], shape[2] = shape[2], shape[1]
			t.SetRepacker(func(_ string, data []float32, shape []uint64) ([]float32, error) {
				dims := make([]int, len(shape))
				for i := range shape {
					dims[i] = int(shape[i])
				}

				var tt tensor.Tensor = tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
				tt, err := tensor.Transpose(tt, 0, 2, 1)
				if err != nil {
					return nil, err
				}

				// flatten tensor so it can be written as a vector
				if err := tt.Reshape(tt.Shape().TotalSize()); err != nil {
					return nil, err
				}

				return native.VectorF32(tt.(*tensor.Dense))
			})
			out = append(out, &ggml.Tensor{
				Name:     t.Name(),
				Kind:     t.Kind(),
				Shape:    shape,
				WriterTo: t,
			})
		default:
			out = append(out, &ggml.Tensor{
				Name:     t.Name(),
				Kind:     t.Kind(),
				Shape:    t.Shape(),
				WriterTo: t,
			})
		}
	}

	return out
}

// Replacements implements ModelConverter.
func (q *qwen3Model) Replacements() []string {
	return []string{
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.k_proj", "attn_k",
		"self_attn.k_norm", "attn_k_norm",
		"self_attn.v_proj", "attn_v",
		"self_attn.q_proj", "attn_q",
		"self_attn.q_norm", "attn_q_norm",
		"self_attn.o_proj", "attn_output",
		"mlp.down_proj", "ffn_down",
		"mlp.gate_proj", "ffn_gate",
		"mlp.up_proj", "ffn_up",
		"mlp.gate.weight", "ffn_gate_inp.weight",
		"mlp.experts.down_proj", "ffn_down_exps.weight",
		"mlp.experts.gate_up_proj", "ffn_gate_up_exps.weight",
		"post_attention_layernorm", "ffn_norm",
		"model.norm", "output_norm",
	}
}

var _ ModelConverter = (*qwen3Model)(nil)