convert.go 5.84 KB
Newer Older
1
2
3
4
package convert

import (
	"encoding/json"
Michael Yang's avatar
Michael Yang committed
5
	"errors"
6
	"fmt"
Michael Yang's avatar
Michael Yang committed
7
	"io"
8
	"io/fs"
9
	"log/slog"
Michael Yang's avatar
Michael Yang committed
10
	"strings"
11

12
	"github.com/ollama/ollama/llm"
13
14
)

15
type ModelParameters struct {
Michael Yang's avatar
Michael Yang committed
16
17
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
18
19
}

20
21
22
23
24
25
26
27
28
29
30
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

func (ModelParameters) KV(t *Tokenizer) llm.KV {
Michael Yang's avatar
Michael Yang committed
31
32
33
34
35
36
37
38
39
	kv := llm.KV{
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
40

41
42
43
44
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

Michael Yang's avatar
Michael Yang committed
45
46
47
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
48

Michael Yang's avatar
Michael Yang committed
49
50
51
52
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
53

Michael Yang's avatar
Michael Yang committed
54
	return kv
55
56
}

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
func (p AdapterParameters) KV() llm.KV {
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

	kv := llm.KV{
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
Michael Yang's avatar
Michael Yang committed
77
78
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
79
	}
Michael Yang's avatar
Michael Yang committed
80
}
81

82
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
Michael Yang's avatar
Michael Yang committed
83
	return llm.WriteGGUF(ws, kv, ts)
84
85
}

86
87
88
89
90
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
}

type ModelConverter interface {
Michael Yang's avatar
Michael Yang committed
91
92
93
	// KV maps parameters to LLM key-values
	KV(*Tokenizer) llm.KV
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Michael Yang's avatar
Michael Yang committed
94
	Tensors([]Tensor) []llm.Tensor
Michael Yang's avatar
Michael Yang committed
95
96
97
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
Michael Yang's avatar
Michael Yang committed
98

Michael Yang's avatar
Michael Yang committed
99
100
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
Michael Yang's avatar
Michael Yang committed
101
	// writeFile writes the model to the provided io.WriteSeeker
Michael Yang's avatar
Michael Yang committed
102
	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
103
104
}

Michael Yang's avatar
bert  
Michael Yang committed
105
106
107
108
type moreParser interface {
	parseMore(fs.FS) error
}

109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
type AdapterConverter interface {
	// KV maps parameters to LLM key-values
	KV(llm.KV) llm.KV
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}

func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}

Michael Yang's avatar
Michael Yang committed
159
160
161
162
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
163
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
164
	bts, err := fs.ReadFile(fsys, "config.json")
165
	if err != nil {
Michael Yang's avatar
Michael Yang committed
166
		return err
167
168
	}

169
	var p ModelParameters
Michael Yang's avatar
Michael Yang committed
170
	if err := json.Unmarshal(bts, &p); err != nil {
Michael Yang's avatar
Michael Yang committed
171
		return err
172
173
	}

Michael Yang's avatar
Michael Yang committed
174
175
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
176
177
	}

178
	var conv ModelConverter
Michael Yang's avatar
Michael Yang committed
179
180
	switch p.Architectures[0] {
	case "LlamaForCausalLM", "MistralForCausalLM":
181
		conv = &llamaModel{}
Michael Yang's avatar
Michael Yang committed
182
	case "MixtralForCausalLM":
183
		conv = &mixtralModel{}
Michael Yang's avatar
Michael Yang committed
184
	case "GemmaForCausalLM":
185
		conv = &gemmaModel{}
Michael Yang's avatar
Michael Yang committed
186
	case "Gemma2ForCausalLM":
187
		conv = &gemma2Model{}
188
	case "Phi3ForCausalLM":
189
		conv = &phi3Model{}
Michael Yang's avatar
bert  
Michael Yang committed
190
	case "BertModel":
191
		conv = &bertModel{}
Michael Yang's avatar
Michael Yang committed
192
193
	default:
		return errors.New("unsupported architecture")
194
195
	}

Michael Yang's avatar
Michael Yang committed
196
	if err := json.Unmarshal(bts, conv); err != nil {
Michael Yang's avatar
Michael Yang committed
197
		return err
198
199
	}

Michael Yang's avatar
bert  
Michael Yang committed
200
201
202
203
204
205
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

206
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
Michael Yang's avatar
Michael Yang committed
207
208
	if err != nil {
		return err
209
210
	}

Michael Yang's avatar
Michael Yang committed
211
212
213
214
215
216
	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
217
		}
Michael Yang's avatar
Michael Yang committed
218
219
	} else {
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
220
	}
Michael Yang's avatar
Michael Yang committed
221

Michael Yang's avatar
Michael Yang committed
222
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
Michael Yang's avatar
Michael Yang committed
223
224
	if err != nil {
		return err
225
226
	}

Michael Yang's avatar
Michael Yang committed
227
	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
228
}