convert.go 6.6 KB
Newer Older
1
2
3
package convert

import (
4
	"cmp"
5
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
6
	"errors"
7
	"fmt"
8
	"io/fs"
9
	"log/slog"
10
	"os"
Michael Yang's avatar
Michael Yang committed
11
	"slices"
Michael Yang's avatar
Michael Yang committed
12
	"strings"
13

Michael Yang's avatar
Michael Yang committed
14
	"github.com/ollama/ollama/fs/ggml"
15
16
)

17
type ModelParameters struct {
18
19
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
Patrick Devine's avatar
Patrick Devine committed
20

21
22
23
	TextModel struct {
		VocabSize uint32 `json:"vocab_size"`
	} `json:"text_config"`
24
25
}

26
27
28
29
30
31
32
33
34
35
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

Michael Yang's avatar
Michael Yang committed
36
37
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
	kv := ggml.KV{
Michael Yang's avatar
Michael Yang committed
38
39
40
41
42
43
44
45
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
46

47
48
49
50
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
51
52
53
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
54

Michael Yang's avatar
Michael Yang committed
55
56
57
58
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
59

Michael Yang's avatar
Michael Yang committed
60
	return kv
61
62
}

Michael Yang's avatar
Michael Yang committed
63
func (p AdapterParameters) KV() ggml.KV {
64
65
66
67
68
69
70
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

Michael Yang's avatar
Michael Yang committed
71
	kv := ggml.KV{
72
73
74
75
76
77
78
79
80
81
82
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
83
84
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
85
	}
Michael Yang's avatar
Michael Yang committed
86
}
87

88
type ModelConverter interface {
Michael Yang's avatar
Michael Yang committed
89
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
90
	KV(*Tokenizer) ggml.KV
Michael Yang's avatar
Michael Yang committed
91
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
92
	Tensors([]Tensor) []*ggml.Tensor
Michael Yang's avatar
Michael Yang committed
93
94
95
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
96

Michael Yang's avatar
Michael Yang committed
97
98
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
99
100
}

Michael Yang's avatar
bert  
Michael Yang committed
101
102
103
104
type moreParser interface {
	parseMore(fs.FS) error
}

105
106
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
107
	KV(ggml.KV) ggml.KV
108
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
109
	Tensors([]Tensor) []*ggml.Tensor
110
111
112
113
114
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
}

115
func ConvertAdapter(fsys fs.FS, f *os.File, baseKV ggml.KV) error {
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

150
	return writeFile(f, conv.KV(baseKV), conv.Tensors(ts))
151
152
}

Michael Yang's avatar
Michael Yang committed
153
154
155
156
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
157
func ConvertModel(fsys fs.FS, f *os.File) error {
158
	bts, err := fs.ReadFile(fsys, "config.json")
159
	if err != nil {
Michael Yang's avatar
Michael Yang committed
160
		return err
161
162
	}

163
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
164
	if err := json.Unmarshal(bts, &p); err != nil {
Michael Yang's avatar
Michael Yang committed
165
		return err
166
167
	}

Michael Yang's avatar
Michael Yang committed
168
169
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
170
171
	}

172
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
173
	switch p.Architectures[0] {
174
	case "LlamaForCausalLM":
175
		conv = &llamaModel{}
176
177
	case "MllamaForConditionalGeneration":
		conv = &mllamaModel{}
Michael Yang's avatar
llama4  
Michael Yang committed
178
179
	case "Llama4ForConditionalGeneration":
		conv = &llama4Model{}
180
181
	case "Mistral3ForConditionalGeneration":
		conv = &mistral3Model{}
Michael Yang's avatar
Michael Yang committed
182
	case "MixtralForCausalLM":
183
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
184
	case "GemmaForCausalLM":
185
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
186
	case "Gemma2ForCausalLM":
187
		conv = &gemma2Model{}
Patrick Devine's avatar
Patrick Devine committed
188
189
	case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
		conv = &gemma3Model{Architecture: p.Architectures[0]}
190
	case "Phi3ForCausalLM":
191
		conv = &phi3Model{}
192
193
	case "Qwen2ForCausalLM":
		conv = &qwen2Model{}
194
195
	case "Qwen2_5_VLForConditionalGeneration":
		conv = &qwen25VLModel{}
Michael Yang's avatar
bert  
Michael Yang committed
196
	case "BertModel":
197
		conv = &bertModel{}
198
199
	case "CohereForCausalLM":
		conv = &commandrModel{}
Michael Yang's avatar
Michael Yang committed
200
	default:
201
		return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
202
203
	}

Michael Yang's avatar
Michael Yang committed
204
	if err := json.Unmarshal(bts, conv); err != nil {
Michael Yang's avatar
Michael Yang committed
205
		return err
206
207
	}

Michael Yang's avatar
bert  
Michael Yang committed
208
209
210
211
212
213
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

214
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
215
216
	if err != nil {
		return err
217
218
	}

219
	vocabSize := int(cmp.Or(p.VocabSize, p.TextModel.VocabSize))
Patrick Devine's avatar
Patrick Devine committed
220

221
	switch {
Patrick Devine's avatar
Patrick Devine committed
222
	case vocabSize == 0:
223
		slog.Debug("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
224
	case vocabSize > len(t.Vocabulary.Tokens):
225
		slog.Debug("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
Michael Yang's avatar
Michael Yang committed
226
227
228
229
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
230
		}
231
	case vocabSize < len(t.Vocabulary.Tokens):
232
233
234
		slog.Debug("vocabulary is larger than expected", "want", vocabSize, "got", len(t.Vocabulary.Tokens))
		p.VocabSize = uint32(len(t.Vocabulary.Tokens))
		p.TextModel.VocabSize = uint32(len(t.Vocabulary.Tokens))
235
	default:
Michael Yang's avatar
Michael Yang committed
236
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
237
	}
Michael Yang's avatar
Michael Yang committed
238

Michael Yang's avatar
Michael Yang committed
239
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
240
241
	if err != nil {
		return err
242
243
	}

244
	return writeFile(f, conv.KV(t), conv.Tensors(ts))
Michael Yang's avatar
Michael Yang committed
245
246
}

247
func writeFile(f *os.File, kv ggml.KV, ts []*ggml.Tensor) error {
Michael Yang's avatar
Michael Yang committed
248
249
250
251
	for i := range ts {
		ts[i].Shape = slices.Clone(ts[i].Shape)
		slices.Reverse(ts[i].Shape)
	}
252
	return ggml.WriteGGUF(f, kv, ts)
253
}