"networks/MPN.py" did not exist on "33c0366d219c556fdabff8e601eeb2f91650db1f"
convert.go 6.01 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
package convert

import (
	"encoding/json"
xuxzh1's avatar
init  
xuxzh1 committed
5
	"errors"
mashun1's avatar
v1  
mashun1 committed
6
7
	"fmt"
	"io"
xuxzh1's avatar
init  
xuxzh1 committed
8
	"io/fs"
mashun1's avatar
v1  
mashun1 committed
9
	"log/slog"
xuxzh1's avatar
update  
xuxzh1 committed
10
	"strings"
mashun1's avatar
v1  
mashun1 committed
11
12
13
14

	"github.com/ollama/ollama/llm"
)

xuxzh1's avatar
update  
xuxzh1 committed
15
type ModelParameters struct {
xuxzh1's avatar
init  
xuxzh1 committed
16
17
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
mashun1's avatar
v1  
mashun1 committed
18
19
}

xuxzh1's avatar
update  
xuxzh1 committed
20
21
22
23
24
25
26
27
28
29
30
type AdapterParameters struct {
	Alpha          uint32 `json:"lora_alpha"`
	LoraLayers     uint32 `json:"lora_layers"`
	LoraParameters struct {
		Rank  uint32  `json:"rank"`
		Alpha float32 `json:"alpha"`
		Scale float32 `json:"scale"`
	} `json:"lora_parameters"`
}

func (ModelParameters) KV(t *Tokenizer) llm.KV {
xuxzh1's avatar
init  
xuxzh1 committed
31
32
33
34
35
36
37
38
39
	kv := llm.KV{
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
mashun1's avatar
v1  
mashun1 committed
40

xuxzh1's avatar
update  
xuxzh1 committed
41
42
43
44
	if len(t.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = t.Merges
	}

xuxzh1's avatar
init  
xuxzh1 committed
45
46
47
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
mashun1's avatar
v1  
mashun1 committed
48

xuxzh1's avatar
init  
xuxzh1 committed
49
50
51
52
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
mashun1's avatar
v1  
mashun1 committed
53

xuxzh1's avatar
init  
xuxzh1 committed
54
	return kv
mashun1's avatar
v1  
mashun1 committed
55
56
}

xuxzh1's avatar
update  
xuxzh1 committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
func (p AdapterParameters) KV() llm.KV {
	var alpha float32
	if p.LoraParameters.Alpha == 0 {
		alpha = float32(p.Alpha)
	} else {
		alpha = p.LoraParameters.Alpha
	}

	kv := llm.KV{
		"adapter.lora.alpha": alpha,
		"adapter.type":       "lora",
		"general.file_type":  uint32(1),
		"general.type":       "adapter",
		"general.version":    "v0.2",
	}

	return kv
}

func (ModelParameters) specialTokenTypes() []string {
xuxzh1's avatar
init  
xuxzh1 committed
77
78
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
mashun1's avatar
v1  
mashun1 committed
79
	}
xuxzh1's avatar
init  
xuxzh1 committed
80
}
mashun1's avatar
v1  
mashun1 committed
81

xuxzh1's avatar
update  
xuxzh1 committed
82
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
xuxzh1's avatar
init  
xuxzh1 committed
83
	return llm.WriteGGUF(ws, kv, ts)
mashun1's avatar
v1  
mashun1 committed
84
85
}

xuxzh1's avatar
update  
xuxzh1 committed
86
87
88
89
90
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
}

type ModelConverter interface {
xuxzh1's avatar
init  
xuxzh1 committed
91
92
93
94
	// KV maps parameters to LLM key-values
	KV(*Tokenizer) llm.KV
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
xuxzh1's avatar
update  
xuxzh1 committed
95
96
97
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string
xuxzh1's avatar
init  
xuxzh1 committed
98
99
100

	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
xuxzh1's avatar
update  
xuxzh1 committed
101
	// writeFile writes the model to the provided io.WriteSeeker
xuxzh1's avatar
init  
xuxzh1 committed
102
	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
mashun1's avatar
v1  
mashun1 committed
103
104
}

xuxzh1's avatar
update  
xuxzh1 committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
type moreParser interface {
	parseMore(fs.FS) error
}

type AdapterConverter interface {
	// KV maps parameters to LLM key-values
	KV(llm.KV) llm.KV
	// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor
	// Replacements returns a list of string pairs to replace in tensor names.
	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
	Replacements() []string

	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
}

func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
	bts, err := fs.ReadFile(fsys, "adapter_config.json")
	if err != nil {
		return err
	}

	var p AdapterParameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
	}

	arch, ok := baseKV["general.architecture"]
	if !ok {
		return errors.New("architecture not set for the base model")
	}

	var conv AdapterConverter
	switch arch {
	case "llama":
		conv = &llamaAdapter{}
	case "gemma2":
		conv = &gemma2Adapter{}
	default:
		return errors.New("unsupported architecture")
	}

	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
	if err != nil {
		return err
	}

	if err := json.Unmarshal(bts, conv); err != nil {
		return err
	}

	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
}

xuxzh1's avatar
init  
xuxzh1 committed
159
160
161
162
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
xuxzh1's avatar
update  
xuxzh1 committed
163
func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
xuxzh1's avatar
init  
xuxzh1 committed
164
	bts, err := fs.ReadFile(fsys, "config.json")
mashun1's avatar
v1  
mashun1 committed
165
	if err != nil {
xuxzh1's avatar
init  
xuxzh1 committed
166
		return err
mashun1's avatar
v1  
mashun1 committed
167
168
	}

xuxzh1's avatar
update  
xuxzh1 committed
169
	var p ModelParameters
xuxzh1's avatar
init  
xuxzh1 committed
170
171
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
172
173
	}

xuxzh1's avatar
init  
xuxzh1 committed
174
175
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
mashun1's avatar
v1  
mashun1 committed
176
177
	}

xuxzh1's avatar
update  
xuxzh1 committed
178
	var conv ModelConverter
xuxzh1's avatar
init  
xuxzh1 committed
179
180
	switch p.Architectures[0] {
	case "LlamaForCausalLM", "MistralForCausalLM":
xuxzh1's avatar
update  
xuxzh1 committed
181
		conv = &llamaModel{}
xuxzh1's avatar
init  
xuxzh1 committed
182
	case "MixtralForCausalLM":
xuxzh1's avatar
update  
xuxzh1 committed
183
		conv = &mixtralModel{}
xuxzh1's avatar
init  
xuxzh1 committed
184
	case "GemmaForCausalLM":
xuxzh1's avatar
update  
xuxzh1 committed
185
186
187
188
189
190
191
		conv = &gemmaModel{}
	case "Gemma2ForCausalLM":
		conv = &gemma2Model{}
	case "Phi3ForCausalLM":
		conv = &phi3Model{}
	case "BertModel":
		conv = &bertModel{}
xuxzh1's avatar
init  
xuxzh1 committed
192
193
	default:
		return errors.New("unsupported architecture")
mashun1's avatar
v1  
mashun1 committed
194
195
	}

xuxzh1's avatar
init  
xuxzh1 committed
196
197
	if err := json.Unmarshal(bts, conv); err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
198
199
	}

xuxzh1's avatar
update  
xuxzh1 committed
200
201
202
203
204
205
	if t, ok := conv.(moreParser); ok {
		if err := t.parseMore(fsys); err != nil {
			return err
		}
	}

xuxzh1's avatar
init  
xuxzh1 committed
206
207
208
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
	if err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
209
210
	}

xuxzh1's avatar
update  
xuxzh1 committed
211
212
213
214
	vocabSize := int(p.VocabSize)
	switch {
	case vocabSize > len(t.Vocabulary.Tokens):
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", vocabSize, "actual", len(t.Vocabulary.Tokens))
xuxzh1's avatar
init  
xuxzh1 committed
215
216
217
218
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
mashun1's avatar
v1  
mashun1 committed
219
		}
xuxzh1's avatar
update  
xuxzh1 committed
220
221
222
	case vocabSize < len(t.Vocabulary.Tokens):
		return fmt.Errorf("vocabulary is larger than expected '%d' instead of '%d'", len(t.Vocabulary.Tokens), vocabSize)
	default:
xuxzh1's avatar
init  
xuxzh1 committed
223
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
mashun1's avatar
v1  
mashun1 committed
224
	}
xuxzh1's avatar
init  
xuxzh1 committed
225

xuxzh1's avatar
update  
xuxzh1 committed
226
	ts, err := parseTensors(fsys, strings.NewReplacer(conv.Replacements()...))
xuxzh1's avatar
init  
xuxzh1 committed
227
228
	if err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
229
230
	}

xuxzh1's avatar
init  
xuxzh1 committed
231
	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
mashun1's avatar
v1  
mashun1 committed
232
}