convert.go 7.2 KB
Newer Older
1
2
3
package convert

import (
4
	"cmp"
5
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
6
	"errors"
7
	"fmt"
8
	"io/fs"
9
	"log/slog"
10
	"os"
Michael Yang's avatar
Michael Yang committed
11
	"slices"
Michael Yang's avatar
Michael Yang committed
12
	"strings"
13

Michael Yang's avatar
Michael Yang committed
14
	"github.com/ollama/ollama/fs/ggml"
15
16
)

17
type ModelParameters struct {
18
19
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
Patrick Devine's avatar
Patrick Devine committed
20

21
22
23
	TextModel struct {
		VocabSize uint32 `json:"vocab_size"`
	} `json:"text_config"`
24
25
}

26
27
28
29
30
31
32
33
34
35
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

Michael Yang's avatar
Michael Yang committed
36
37
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
	kv := ggml.KV{
Michael Yang's avatar
Michael Yang committed
38
39
40
41
42
43
44
45
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
46

47
48
49
50
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
51
52
53
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
54

Michael Yang's avatar
Michael Yang committed
55
56
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
57
58
59
60
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		if len(sv.IDs) > 0 {
			kv[fmt.Sprintf("tokenizer.ggml.%s_token_ids", sv.Key())] = sv.IDs
		}
Michael Yang's avatar
Michael Yang committed
61
	}
62

Michael Yang's avatar
Michael Yang committed
63
	return kv
64
65
}

Michael Yang's avatar
Michael Yang committed
66
func (p AdapterParameters) KV() ggml.KV {
67
68
69
70
71
72
73
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

Michael Yang's avatar
Michael Yang committed
74
	kv := ggml.KV{
75
76
77
78
79
80
81
82
83
84
85
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
86
87
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
88
	}
Michael Yang's avatar
Michael Yang committed
89
}
90

91
type ModelConverter interface {
Michael Yang's avatar
Michael Yang committed
92
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
93
	KV(*Tokenizer) ggml.KV
Michael Yang's avatar
Michael Yang committed
94
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
95
	Tensors([]Tensor) []*ggml.Tensor
Michael Yang's avatar
Michael Yang committed
96
97
98
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
99

Michael Yang's avatar
Michael Yang committed
100
101
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
102
103
}

Michael Yang's avatar
bert  
Michael Yang committed
104
105
106
107
type moreParser interface {
	parseMore(fs.FS) error
}

108
109
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
110
	KV(ggml.KV) ggml.KV
111
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
112
	Tensors([]Tensor) []*ggml.Tensor
113
114
115
116
117
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
}

118
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

153
	return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
154
155
}

Michael Yang's avatar
Michael Yang committed
156
157
158
159
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
160
func ConvertModel(fsys fs.FS, f *os.File) error {
161
	bts, err := fs.ReadFile(fsys, "config.json")
162
	if err != nil {
Michael Yang's avatar
Michael Yang committed
163
		return err
164
165
	}

166
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
167
	if err := json.Unmarshal(bts, &p); err != nil {
Michael Yang's avatar
Michael Yang committed
168
		return err
169
170
	}

Michael Yang's avatar
Michael Yang committed
171
172
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
173
174
	}

175
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
176
	switch p.Architectures[0] {
177
	case "LlamaForCausalLM":
178
		conv = &llamaModel{}
179
180
	case "MllamaForConditionalGeneration":
		conv = &mllamaModel{}
Michael Yang's avatar
llama4  
Michael Yang committed
181
182
	case "Llama4ForConditionalGeneration":
		conv = &llama4Model{}
183
184
	case "Mistral3ForConditionalGeneration":
		conv = &mistral3Model{}
185
186
	case "Ministral3ForCausalLM":
		conv = &mistral3CausalModel{}
Michael Yang's avatar
Michael Yang committed
187
	case "MixtralForCausalLM":
188
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
189
	case "GemmaForCausalLM":
190
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
191
	case "Gemma2ForCausalLM":
192
		conv = &gemma2Model{}
Patrick Devine's avatar
Patrick Devine committed
193
194
	case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
		conv = &gemma3Model{Architecture: p.Architectures[0]}
Michael Yang's avatar
Michael Yang committed
195
196
	case "Gemma3nForConditionalGeneration":
		conv = &gemma3nModel{}
197
	case "Phi3ForCausalLM":
198
		conv = &phi3Model{}
199
200
	case "Qwen2ForCausalLM":
		conv = &qwen2Model{}
201
202
	case "Qwen2_5_VLForConditionalGeneration":
		conv = &qwen25VLModel{}
203
204
	case "Qwen3VLForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration":
		conv = &qwen3VLModel{}
205
206
	case "Olmo3ForCausalLM":
		conv = &olmoModel{}
Michael Yang's avatar
bert  
Michael Yang committed
207
	case "BertModel":
208
		conv = &bertModel{}
209
210
	case "NomicBertModel", "NomicBertMoEModel":
		conv = &nomicbertModel{}
211
212
	case "CohereForCausalLM":
		conv = &commandrModel{}
Michael Yang's avatar
Michael Yang committed
213
214
	case "GptOssForCausalLM":
		conv = &gptossModel{}
Michael Yang's avatar
Michael Yang committed
215
216
	case "DeepseekOCRForCausalLM":
		conv = &deepseekocr{}
217
218
	case "DeepseekV3ForCausalLM":
		conv = &deepseek2Model{}
Michael Yang's avatar
Michael Yang committed
219
	default:
220
		return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
221
222
	}

Michael Yang's avatar
Michael Yang committed
223
	if err := json.Unmarshal(bts, conv); err != nil {
Michael Yang's avatar
Michael Yang committed
224
		return err
225
226
	}

Michael Yang's avatar
bert  
Michael Yang committed
227
228
229
230
231
232
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

233
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
234
235
	if err != nil {
		return err
236
237
	}

238
	vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
Patrick Devine's avatar
Patrick Devine committed
239

240
	switch {
Patrick Devine's avatar
Patrick Devine committed
241
	case vocabSize == 0:
242
		slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
243
	case vocabSize > len(t.Vocabulary.Tokens):
244
		slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
Michael Yang's avatar
Michael Yang committed
245
246
247
248
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
249
		}
250
	case vocabSize < len(t.Vocabulary.Tokens):
251
252
253
		slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
		p.VocabSize = uint32(len(t.Vocabulary.Tokens))
		p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
254
	default:
Michael Yang's avatar
Michael Yang committed
255
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
256
	}
Michael Yang's avatar
Michael Yang committed
257

Michael Yang's avatar
Michael Yang committed
258
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
259
260
	if err != nil {
		return err
261
262
	}

263
	return writeFile(f, conv.KV(t), conv.Tensors(ts))
Michael Yang's avatar
Michael Yang committed
264
265
}

266
func writeFile(f *os.File, kv ggml.KV, ts []*ggml.Tensor) error {
Michael Yang's avatar
Michael Yang committed
267
268
269
270
	for i := range ts {
		ts[i].Shape = slices.Clone(ts[i].Shape)
		slices.Reverse(ts[i].Shape)
	}
271
	return ggml.WriteGGUF(f, kv, ts)
272
}