convert.go 6.05 KB
Newer Older
1
2
3
4
package convert

import (
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
5
	"errors"
6
	"fmt"
Michael Yang's avatar
Michael Yang committed
7
	"io"
8
	"io/fs"
9
	"log/slog"
Michael Yang's avatar
Michael Yang committed
10
	"strings"
11

12
	"github.com/ollama/ollama/llm"
13
14
)

15
type ModelParameters struct {
Michael Yang's avatar
Michael Yang committed
16
17
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
18
19
}

20
21
22
23
24
25
26
27
28
29
30
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

func (ModelParameters) KV(t *Tokenizer) llm.KV {
Michael Yang's avatar
Michael Yang committed
31
32
33
34
35
36
37
38
39
	kv := llm.KV{
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
40

41
42
43
44
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
45
46
47
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
48

Michael Yang's avatar
Michael Yang committed
49
50
51
52
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
53

Michael Yang's avatar
Michael Yang committed
54
	return kv
55
56
}

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
func (p AdapterParameters) KV() llm.KV {
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

	kv := llm.KV{
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
77
78
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
79
	}
Michael Yang's avatar
Michael Yang committed
80
}
81

82
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
Michael Yang's avatar
Michael Yang committed
83
	return llm.WriteGGUF(ws, kv, ts)
84
85
}

86
87
88
89
90
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
}

type ModelConverter interface {
Michael Yang's avatar
Michael Yang committed
91
92
93
	// KV maps parameters to LLM key-values
	KV(*Tokenizer) llm.KV
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Michael Yang's avatar
Michael Yang committed
94
	Tensors([]Tensor) []llm.Tensor
Michael Yang's avatar
Michael Yang committed
95
96
97
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
98

Michael Yang's avatar
Michael Yang committed
99
100
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
Michael Yang's avatar
Michael Yang committed
101
	// writeFile writes the model to the provided io.WriteSeeker
Michael Yang's avatar
Michael Yang committed
102
	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
103
104
}

Michael Yang's avatar
bert  
Michael Yang committed
105
106
107
108
type moreParser interface {
	parseMore(fs.FS) error
}

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
	KV(llm.KV) llm.KV
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}

func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}

Michael Yang's avatar
Michael Yang committed
159
160
161
162
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
163
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
164
	bts, err := fs.ReadFile(fsys, "config.json")
165
	if err != nil {
Michael Yang's avatar
Michael Yang committed
166
		return err
167
168
	}

169
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
170
	if err := json.Unmarshal(bts, &p); err != nil {
Michael Yang's avatar
Michael Yang committed
171
		return err
172
173
	}

Michael Yang's avatar
Michael Yang committed
174
175
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
176
177
	}

178
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
179
180
	switch p.Architectures[0] {
	case "LlamaForCausalLM", "MistralForCausalLM":
181
		conv = &llamaModel{}
Michael Yang's avatar
Michael Yang committed
182
	case "MixtralForCausalLM":
183
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
184
	case "GemmaForCausalLM":
185
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
186
	case "Gemma2ForCausalLM":
187
		conv = &gemma2Model{}
188
	case "Phi3ForCausalLM":
189
		conv = &phi3Model{}
190
191
	case "Qwen2ForCausalLM":
		conv = &qwen2Model{}
Michael Yang's avatar
bert  
Michael Yang committed
192
	case "BertModel":
193
		conv = &bertModel{}
Michael Yang's avatar
Michael Yang committed
194
195
	default:
		return errors.New("unsupported architecture")
196
197
	}

Michael Yang's avatar
Michael Yang committed
198
	if err := json.Unmarshal(bts, conv); err != nil {
Michael Yang's avatar
Michael Yang committed
199
		return err
200
201
	}

Michael Yang's avatar
bert  
Michael Yang committed
202
203
204
205
206
207
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

208
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
209
210
	if err != nil {
		return err
211
212
	}

213
214
215
216
	vocabSize := int(p.VocabSize)
	switch {
	case vocabSize > len(t.Vocabulary.Tokens):
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
Michael Yang's avatar
Michael Yang committed
217
218
219
220
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
221
		}
222
223
224
	case vocabSize < len(t.Vocabulary.Tokens):
		return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
	default:
Michael Yang's avatar
Michael Yang committed
225
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
226
	}
Michael Yang's avatar
Michael Yang committed
227

Michael Yang's avatar
Michael Yang committed
228
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
229
230
	if err != nil {
		return err
231
232
	}

Michael Yang's avatar
Michael Yang committed
233
	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
234
}