convert.go 6.66 KB
Newer Older
1
2
3
4
package convert

import (
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
5
	"errors"
6
	"fmt"
Michael Yang's avatar
Michael Yang committed
7
	"io"
8
	"io/fs"
9
	"log/slog"
Michael Yang's avatar
Michael Yang committed
10
	"strings"
11

Michael Yang's avatar
Michael Yang committed
12
	"github.com/ollama/ollama/fs/ggml"
13
14
)

15
type ModelParameters struct {
Michael Yang's avatar
Michael Yang committed
16
17
	Architectures []string       `json:"architectures"`
	VocabSize     uint32         `json:"vocab_size"`
Patrick Devine's avatar
Patrick Devine committed
18
19
20
21
	TextModel     TextParameters `json:"text_config"`
}

type TextParameters struct {
Michael Yang's avatar
Michael Yang committed
22
	VocabSize uint32 `json:"vocab_size"`
23
24
}

25
26
27
28
29
30
31
32
33
34
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

Michael Yang's avatar
Michael Yang committed
35
36
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
	kv := ggml.KV{
Michael Yang's avatar
Michael Yang committed
37
38
39
40
41
42
43
44
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
45

46
47
48
49
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
50
51
52
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
53

Michael Yang's avatar
Michael Yang committed
54
55
56
57
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
58

Michael Yang's avatar
Michael Yang committed
59
	return kv
60
61
}

Michael Yang's avatar
Michael Yang committed
62
func (p AdapterParameters) KV() ggml.KV {
63
64
65
66
67
68
69
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

Michael Yang's avatar
Michael Yang committed
70
	kv := ggml.KV{
71
72
73
74
75
76
77
78
79
80
81
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
82
83
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
84
	}
Michael Yang's avatar
Michael Yang committed
85
}
86

Michael Yang's avatar
Michael Yang committed
87
88
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
	return ggml.WriteGGUF(ws, kv, ts)
89
90
}

Michael Yang's avatar
Michael Yang committed
91
92
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
	return ggml.WriteGGUF(ws, kv, ts)
93
94
95
}

type ModelConverter interface {
Michael Yang's avatar
Michael Yang committed
96
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
97
	KV(*Tokenizer) ggml.KV
Michael Yang's avatar
Michael Yang committed
98
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Michael Yang's avatar
Michael Yang committed
99
	Tensors([]Tensor) []ggml.Tensor
Michael Yang's avatar
Michael Yang committed
100
101
102
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
103

Michael Yang's avatar
Michael Yang committed
104
105
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
Michael Yang's avatar
Michael Yang committed
106
	// writeFile writes the model to the provided io.WriteSeeker
Michael Yang's avatar
Michael Yang committed
107
	writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
108
109
}

Michael Yang's avatar
bert  
Michael Yang committed
110
111
112
113
type moreParser interface {
	parseMore(fs.FS) error
}

114
115
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
Michael Yang's avatar
Michael Yang committed
116
	KV(ggml.KV) ggml.KV
117
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Michael Yang's avatar
Michael Yang committed
118
	Tensors([]Tensor) []ggml.Tensor
119
120
121
122
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

Michael Yang's avatar
Michael Yang committed
123
	writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
124
125
}

Michael Yang's avatar
Michael Yang committed
126
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}

Michael Yang's avatar
Michael Yang committed
164
165
166
167
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
168
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
169
	bts, err := fs.ReadFile(fsys, "config.json")
170
	if err != nil {
Michael Yang's avatar
Michael Yang committed
171
		return err
172
173
	}

174
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
175
	if err := json.Unmarshal(bts, &p); err != nil {
Michael Yang's avatar
Michael Yang committed
176
		return err
177
178
	}

Michael Yang's avatar
Michael Yang committed
179
180
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
181
182
	}

183
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
184
	switch p.Architectures[0] {
185
	case "LlamaForCausalLM":
186
		conv = &llamaModel{}
187
188
	case "Mistral3ForConditionalGeneration":
		conv = &mistral3Model{}
Michael Yang's avatar
Michael Yang committed
189
	case "MixtralForCausalLM":
190
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
191
	case "GemmaForCausalLM":
192
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
193
	case "Gemma2ForCausalLM":
194
		conv = &gemma2Model{}
Patrick Devine's avatar
Patrick Devine committed
195
196
	case "Gemma3ForCausalLM", "Gemma3ForConditionalGeneration":
		conv = &gemma3Model{Architecture: p.Architectures[0]}
197
	case "Phi3ForCausalLM":
198
		conv = &phi3Model{}
199
200
	case "Qwen2ForCausalLM":
		conv = &qwen2Model{}
Michael Yang's avatar
bert  
Michael Yang committed
201
	case "BertModel":
202
		conv = &bertModel{}
203
204
	case "CohereForCausalLM":
		conv = &commandrModel{}
Michael Yang's avatar
Michael Yang committed
205
	default:
206
		return fmt.Errorf("unsupported architecture %q", p.Architectures[0])
207
208
	}

Michael Yang's avatar
Michael Yang committed
209
	if err := json.Unmarshal(bts, conv); err != nil {
Michael Yang's avatar
Michael Yang committed
210
		return err
211
212
	}

Michael Yang's avatar
bert  
Michael Yang committed
213
214
215
216
217
218
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

219
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
220
221
	if err != nil {
		return err
222
223
	}

224
	vocabSize := int(p.VocabSize)
Patrick Devine's avatar
Patrick Devine committed
225
226
227
228
229
	if vocabSize == 0 {
		tVocabSize := int(p.TextModel.VocabSize)
		vocabSize = tVocabSize
	}

230
	switch {
Patrick Devine's avatar
Patrick Devine committed
231
232
	case vocabSize == 0:
		slog.Warn("vocabulary size was not explicitly set by the model", "default size", len(t.Vocabulary.Tokens))
233
234
	case vocabSize > len(t.Vocabulary.Tokens):
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
Michael Yang's avatar
Michael Yang committed
235
236
237
238
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
239
		}
240
241
242
	case vocabSize < len(t.Vocabulary.Tokens):
		return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
	default:
Michael Yang's avatar
Michael Yang committed
243
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
244
	}
Michael Yang's avatar
Michael Yang committed
245

Michael Yang's avatar
Michael Yang committed
246
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
247
248
	if err != nil {
		return err
249
250
	}

Michael Yang's avatar
Michael Yang committed
251
	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
252
}