convert.go 9.79 KB
Newer Older
1
2
3
package convert

import (
4
	"cmp"
5
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
6
	"errors"
7
	"fmt"
8
	"io/fs"
9
	"iter"
10
	"log/slog"
11
	"maps"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"slices"
Michael Yang's avatar
Michael Yang committed
14
	"strings"
15

16
	ofs "github.com/ollama/ollama/fs"
Michael Yang's avatar
Michael Yang committed
17
	"github.com/ollama/ollama/fs/ggml"
18
19
)

20
type ModelParameters struct {
21
22
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
Patrick Devine's avatar
Patrick Devine committed
23

24
25
26
	// TODO is this needed?
	ModelType string `json:"model_type"`

27
	TextModel struct {
28
29
30
		VocabSize  uint32 `json:"vocab_size"`
		HiddenSize uint32 `json:"hidden_size"`
		ModelType  string `json:"model_type"`
31
	} `json:"text_config"`
32
33
}

34
35
36
37
38
39
40
41
42
43
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
type KV map[string]any

func (kv KV) Architecture() string {
	return kv.String("general.architecture", "unknown")
}

type valueTypes interface {
	uint8 | int8 | uint16 | int16 |
		uint32 | int32 | uint64 | int64 |
		string | float32 | float64 | bool
}

type arrayValueTypes interface {
	[]uint8 | []int8 | []uint16 | []int16 |
		[]uint32 | []int32 | []uint64 | []int64 |
		[]string | []float32 | []float64 | []bool
}

func keyValue[T valueTypes | arrayValueTypes](kv KV, key string, defaultValue ...T) (T, bool) {
	if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
		key = kv.Architecture() + "." + key
	}

	if val, ok := kv[key].(T); ok {
		return val, true
	}
	return defaultValue[0], false
}

func (kv KV) String(key string, defaultValue ...string) string {
	val, _ := keyValue(kv, key, append(defaultValue, "")...)
	return val
}

func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
	val, _ := keyValue(kv, key, append(defaultValue, 0)...)
	return val
}

func (kv KV) Float(key string, defaultValue ...float32) float32 {
	val, _ := keyValue(kv, key, append(defaultValue, 0)...)
	return val
}

func (kv KV) Bool(key string, defaultValue ...bool) bool {
	val, _ := keyValue(kv, key, append(defaultValue, false)...)
	return val
}

func (kv KV) Strings(key string, defaultValue ...[]string) []string {
	val, _ := keyValue(kv, key, append(defaultValue, []string{""})...)
	return val
}

func (kv KV) Ints(key string, defaultValue ...[]int32) []int32 {
	val, _ := keyValue(kv, key, append(defaultValue, []int32{0})...)
	return val
}

func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
	val, _ := keyValue(kv, key, append(defaultValue, []uint32{0})...)
	return val
}

func (kv KV) Floats(key string, defaultValue ...[]float32) []float32 {
	val, _ := keyValue(kv, key, append(defaultValue, []float32{0})...)
	return val
}

func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
	val, _ := keyValue(kv, key, append(defaultValue, []bool{false})...)
	return val
}

func (kv KV) Len() int {
	return len(kv)
}

func (kv KV) Keys() iter.Seq[string] {
	return maps.Keys(kv)
}

func (kv KV) Value(key string) any {
	return kv[key]
}

func (ModelParameters) KV(t *Tokenizer) KV {
	kv := KV{
Michael Yang's avatar
Michael Yang committed
132
133
134
135
136
137
138
139
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
140

141
142
143
144
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
145
146
147
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
148

Michael Yang's avatar
Michael Yang committed
149
150
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
151
152
153
154
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		if len(sv.IDs) > 0 {
			kv[fmt.Sprintf("tokenizer.ggml.%s_token_ids", sv.Key())] = sv.IDs
		}
Michael Yang's avatar
Michael Yang committed
155
	}
156

Michael Yang's avatar
Michael Yang committed
157
	return kv
158
159
}

160
func (p AdapterParameters) KV() KV {
161
162
163
164
165
166
167
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

168
	kv := KV{
169
170
171
172
173
174
175
176
177
178
179
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
180
181
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
182
	}
Michael Yang's avatar
Michael Yang committed
183
}
184

185
type ModelKV interface {
Michael Yang's avatar
Michael Yang committed
186
	// KV maps parameters to LLM key-values
187
188
189
190
191
192
	KV(*Tokenizer) KV
}

type ModelConverter interface {
	ModelKV

Michael Yang's avatar
Michael Yang committed
193
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
194
	Tensors([]Tensor) []*ggml.Tensor
Michael Yang's avatar
Michael Yang committed
195
196
197
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
198

Michael Yang's avatar
Michael Yang committed
199
200
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
201
202
}

Michael Yang's avatar
bert  
Michael Yang committed
203
204
205
206
type moreParser interface {
	parseMore(fs.FS) error
}

207
208
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
209
	KV(ofs.Config) KV
210
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
211
	Tensors([]Tensor) []*ggml.Tensor
212
213
214
215
216
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
}

217
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ofs.Config) error {
218
219
220
221
222
223
224
225
226
227
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

228
229
	arch := baseKV.Architecture()
	if arch == "" {
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

252
	return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
253
254
}

255
func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
256
	bts, err := fs.ReadFile(fsys, "config.json")
257
	if err != nil {
258
		return nil, nil, err
259
260
	}

261
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
262
	if err := json.Unmarshal(bts, &p); err != nil {
263
		return nil, nil, err
264
265
	}

Michael Yang's avatar
Michael Yang committed
266
	if len(p.Architectures) < 1 {
267
		return nil, nil, errors.New("unknown architecture")
268
269
	}

270
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
271
	switch p.Architectures[0] {
272
	case "LlamaForCausalLM":
273
		conv = &llamaModel{}
274
275
	case "MllamaForConditionalGeneration":
		conv = &mllamaModel{}
Michael Yang's avatar
llama4  
Michael Yang committed
276
277
	case "Llama4ForConditionalGeneration":
		conv = &llama4Model{}
278
279
	case "Mistral3ForConditionalGeneration":
		conv = &mistral3Model{}
280
281
	case "Ministral3ForCausalLM":
		conv = &mistral3CausalModel{}
Michael Yang's avatar
Michael Yang committed
282
	case "MixtralForCausalLM":
283
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
284
	case "GemmaForCausalLM":
285
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
286
	case "Gemma2ForCausalLM":
287
		conv = &gemma2Model{}
Patrick Devine's avatar
Patrick Devine committed
288
289
	case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
		conv = &gemma3Model{Architecture: p.Architectures[0]}
Michael Yang's avatar
Michael Yang committed
290
291
	case "Gemma3nForConditionalGeneration":
		conv = &gemma3nModel{}
292
	case "Phi3ForCausalLM":
293
		conv = &phi3Model{}
294
295
	case "Qwen2ForCausalLM":
		conv = &qwen2Model{}
296
297
	case "Qwen2_5_VLForConditionalGeneration":
		conv = &qwen25VLModel{}
298
299
	case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
		conv = &qwen3VLModel{}
300
301
	case "Olmo3ForCausalLM":
		conv = &olmoModel{}
Michael Yang's avatar
bert  
Michael Yang committed
302
	case "BertModel":
303
		conv = &bertModel{}
304
305
	case "NomicBertModel", "NomicBertMoEModel":
		conv = &nomicbertModel{}
306
307
	case "CohereForCausalLM":
		conv = &commandrModel{}
Michael Yang's avatar
Michael Yang committed
308
309
	case "GptOssForCausalLM":
		conv = &gptossModel{}
Michael Yang's avatar
Michael Yang committed
310
311
	case "DeepseekOCRForCausalLM":
		conv = &deepseekocr{}
312
313
	case "DeepseekV3ForCausalLM":
		conv = &deepseek2Model{}
Michael Yang's avatar
Michael Yang committed
314
	default:
315
		return nil, nil, fmt.Errorf("unsupported architecture %q", p.Architectures[0])
316
317
	}

Michael Yang's avatar
Michael Yang committed
318
	if err := json.Unmarshal(bts, conv); err != nil {
319
		return nil, nil, err
320
321
	}

Michael Yang's avatar
bert  
Michael Yang committed
322
323
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
324
			return nil, nil, err
Michael Yang's avatar
bert  
Michael Yang committed
325
326
327
		}
	}

328
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
329
	if err != nil {
330
		return nil, nil, err
331
332
	}

333
	vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
Patrick Devine's avatar
Patrick Devine committed
334

335
	switch {
Patrick Devine's avatar
Patrick Devine committed
336
	case vocabSize == 0:
337
		slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
338
	case vocabSize > len(t.Vocabulary.Tokens):
339
		slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
Michael Yang's avatar
Michael Yang committed
340
341
342
343
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
344
		}
345
	case vocabSize < len(t.Vocabulary.Tokens):
346
347
348
		slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
		p.VocabSize = uint32(len(t.Vocabulary.Tokens))
		p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
349
	default:
Michael Yang's avatar
Michael Yang committed
350
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
351
	}
352
353
354
355
356
357
358
359
360
361
362
363
364
	return conv, t, nil
}

// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func ConvertModel(fsys fs.FS, f *os.File) error {
	kv, t, err := LoadModelMetadata(fsys)
	if err != nil {
		return err
	}
	conv := kv.(ModelConverter)
Michael Yang's avatar
Michael Yang committed
365

Michael Yang's avatar
Michael Yang committed
366
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
367
368
	if err != nil {
		return err
369
370
	}

371
	return writeFile(f, conv.KV(t), conv.Tensors(ts))
Michael Yang's avatar
Michael Yang committed
372
373
}

374
func writeFile(f *os.File, kv KV, ts []*ggml.Tensor) error {
Michael Yang's avatar
Michael Yang committed
375
376
377
378
	for i := range ts {
		ts[i].Shape = slices.Clone(ts[i].Shape)
		slices.Reverse(ts[i].Shape)
	}
379
	return ggml.WriteGGUF(f, kv, ts)
380
}