convert.go 3.26 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
package convert

import (
	"encoding/json"
xuxzh1's avatar
init  
xuxzh1 committed
5
	"errors"
mashun1's avatar
v1  
mashun1 committed
6
7
	"fmt"
	"io"
xuxzh1's avatar
init  
xuxzh1 committed
8
	"io/fs"
mashun1's avatar
v1  
mashun1 committed
9
10
11
12
13
	"log/slog"

	"github.com/ollama/ollama/llm"
)

xuxzh1's avatar
init  
xuxzh1 committed
14
15
16
type Parameters struct {
	Architectures []string `json:"architectures"`
	VocabSize     uint32   `json:"vocab_size"`
mashun1's avatar
v1  
mashun1 committed
17
18
}

xuxzh1's avatar
init  
xuxzh1 committed
19
20
21
22
23
24
25
26
27
28
func (Parameters) KV(t *Tokenizer) llm.KV {
	kv := llm.KV{
		"general.file_type":            uint32(1),
		"general.quantization_version": uint32(2),
		"tokenizer.ggml.pre":           t.Pre,
		"tokenizer.ggml.model":         t.Vocabulary.Model,
		"tokenizer.ggml.tokens":        t.Vocabulary.Tokens,
		"tokenizer.ggml.scores":        t.Vocabulary.Scores,
		"tokenizer.ggml.token_type":    t.Vocabulary.Types,
	}
mashun1's avatar
v1  
mashun1 committed
29

xuxzh1's avatar
init  
xuxzh1 committed
30
31
32
	if t.Template != "" {
		kv["tokenizer.chat_template"] = t.Template
	}
mashun1's avatar
v1  
mashun1 committed
33

xuxzh1's avatar
init  
xuxzh1 committed
34
35
36
37
	for _, sv := range t.SpecialVocabulary {
		kv[fmt.Sprintf("tokenizer.ggml.%s_token_id", sv.Key())] = uint32(sv.ID)
		kv[fmt.Sprintf("tokenizer.ggml.add_%s_token", sv.Key())] = sv.AddToken
	}
mashun1's avatar
v1  
mashun1 committed
38

xuxzh1's avatar
init  
xuxzh1 committed
39
	return kv
mashun1's avatar
v1  
mashun1 committed
40
41
}

xuxzh1's avatar
init  
xuxzh1 committed
42
43
44
func (Parameters) specialTokenTypes() []string {
	return []string{
		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
mashun1's avatar
v1  
mashun1 committed
45
	}
xuxzh1's avatar
init  
xuxzh1 committed
46
}
mashun1's avatar
v1  
mashun1 committed
47

xuxzh1's avatar
init  
xuxzh1 committed
48
49
func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
	return llm.WriteGGUF(ws, kv, ts)
mashun1's avatar
v1  
mashun1 committed
50
51
}

xuxzh1's avatar
init  
xuxzh1 committed
52
53
54
55
56
57
58
59
60
61
62
type Converter interface {
	// KV maps parameters to LLM key-values
	KV(*Tokenizer) llm.KV
	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
	Tensors([]Tensor) []llm.Tensor

	// tensorName returns the LLM tensor name for a specific input name
	tensorName(string) string
	// specialTokenTypes returns any special token types the model uses
	specialTokenTypes() []string
	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
mashun1's avatar
v1  
mashun1 committed
63
64
}

xuxzh1's avatar
init  
xuxzh1 committed
65
66
67
68
69
70
// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
// and files it finds in the input path.
// Supported input model formats include safetensors.
// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
func Convert(fsys fs.FS, ws io.WriteSeeker) error {
	bts, err := fs.ReadFile(fsys, "config.json")
mashun1's avatar
v1  
mashun1 committed
71
	if err != nil {
xuxzh1's avatar
init  
xuxzh1 committed
72
		return err
mashun1's avatar
v1  
mashun1 committed
73
74
	}

xuxzh1's avatar
init  
xuxzh1 committed
75
76
77
	var p Parameters
	if err := json.Unmarshal(bts, &p); err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
78
79
	}

xuxzh1's avatar
init  
xuxzh1 committed
80
81
	if len(p.Architectures) < 1 {
		return errors.New("unknown architecture")
mashun1's avatar
v1  
mashun1 committed
82
83
	}

xuxzh1's avatar
init  
xuxzh1 committed
84
85
86
87
88
89
90
91
92
93
	var conv Converter
	switch p.Architectures[0] {
	case "LlamaForCausalLM", "MistralForCausalLM":
		conv = &llama{}
	case "MixtralForCausalLM":
		conv = &mixtral{}
	case "GemmaForCausalLM":
		conv = &gemma{}
	default:
		return errors.New("unsupported architecture")
mashun1's avatar
v1  
mashun1 committed
94
95
	}

xuxzh1's avatar
init  
xuxzh1 committed
96
97
	if err := json.Unmarshal(bts, conv); err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
98
99
	}

xuxzh1's avatar
init  
xuxzh1 committed
100
101
102
	t, err := parseTokenizer(fsys, conv.specialTokenTypes())
	if err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
103
104
	}

xuxzh1's avatar
init  
xuxzh1 committed
105
106
107
108
109
110
	if vocabSize := int(p.VocabSize); vocabSize > len(t.Vocabulary.Tokens) {
		slog.Warn("vocabulary is smaller than expected, padding with dummy tokens", "expect", p.VocabSize, "actual", len(t.Vocabulary.Tokens))
		for i := range vocabSize - len(t.Vocabulary.Tokens) {
			t.Vocabulary.Tokens = append(t.Vocabulary.Tokens, fmt.Sprintf("[PAD%d]", i))
			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
mashun1's avatar
v1  
mashun1 committed
111
		}
xuxzh1's avatar
init  
xuxzh1 committed
112
113
	} else {
		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
mashun1's avatar
v1  
mashun1 committed
114
	}
xuxzh1's avatar
init  
xuxzh1 committed
115
116
117
118

	ts, err := parseTensors(fsys)
	if err != nil {
		return err
mashun1's avatar
v1  
mashun1 committed
119
120
	}

xuxzh1's avatar
init  
xuxzh1 committed
121
	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
mashun1's avatar
v1  
mashun1 committed
122
}