llama.go 4.03 KB
Newer Older
1
2
3
package convert

import (
4
	"cmp"
Michael Yang's avatar
Michael Yang committed
5
	"errors"
6
7
	"fmt"
	"io"
Patrick Devine's avatar
Patrick Devine committed
8
9
	"os"
	"path/filepath"
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
	"regexp"
	"strings"

	"github.com/pdevine/tensor"
	"github.com/pdevine/tensor/native"

	"github.com/ollama/ollama/llm"
)

type LlamaModel struct {
	ModelData
}

func (m *LlamaModel) GetTensors() error {
	t, err := m.Format.GetTensors(m.Path, m.Params)
	if err != nil {
		return err
	}

	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
	re, err := regexp.Compile(pattern)
	if err != nil {
		return err
	}

	for _, l := range t {
		matches := re.FindAllStringSubmatch(l.Name, -1)
		if len(matches) > 0 {
Patrick Devine's avatar
Patrick Devine committed
38
39
			switch m.Format.(type) {
			case *TorchFormat:
Patrick Devine's avatar
Patrick Devine committed
40
				wt := l.WriterTo.(torchWriterTo)
41
				wt.repacker = m.Repack
Patrick Devine's avatar
Patrick Devine committed
42
				l.WriterTo = wt
Patrick Devine's avatar
Patrick Devine committed
43
			case *SafetensorFormat:
Patrick Devine's avatar
Patrick Devine committed
44
				wt := l.WriterTo.(safetensorWriterTo)
45
				wt.repacker = m.Repack
Patrick Devine's avatar
Patrick Devine committed
46
47
				l.WriterTo = wt
			}
48
49
50
51
52
53
54
		}
		m.Tensors = append(m.Tensors, l)
	}

	return nil
}

Michael Yang's avatar
cleanup  
Michael Yang committed
55
56
func (m *LlamaModel) LoadVocab() (err error) {
	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
Michael Yang's avatar
Michael Yang committed
57
	if errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
cleanup  
Michael Yang committed
58
		return nil
Michael Yang's avatar
Michael Yang committed
59
60
	} else if err != nil {
		return err
Patrick Devine's avatar
Patrick Devine committed
61
	}
Michael Yang's avatar
Michael Yang committed
62

Michael Yang's avatar
cleanup  
Michael Yang committed
63
64
65
66
67
	m.Vocab = &Vocab{}
	for _, t := range ts {
		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
		m.Vocab.Types = append(m.Vocab.Types, t.Type())
	}
Patrick Devine's avatar
Patrick Devine committed
68

Michael Yang's avatar
cleanup  
Michael Yang committed
69
70
	m.Vocab.Merges = merges
	m.Params.PreTokenizer = pre
71
72
73
	return nil
}

Michael Yang's avatar
Michael Yang committed
74
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
75
76
77
78
79
80
81
82
	kv := llm.KV{
		"general.architecture":                   "llama",
		"general.name":                           m.Name,
		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
		"llama.context_length":                   uint32(m.Params.ContextSize),
		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
		"llama.block_count":                      uint32(m.Params.HiddenLayers),
		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
Patrick Devine's avatar
Patrick Devine committed
83
		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
84
85
86
87
		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
Michael Yang's avatar
cleanup  
Michael Yang committed
88
		"general.file_type":                      uint32(1),
Patrick Devine's avatar
Patrick Devine committed
89
		"tokenizer.ggml.model":                   "gpt2",
90

Michael Yang's avatar
Michael Yang committed
91
		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
92
93
94
95
96
97
		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
		"tokenizer.ggml.token_type": m.Vocab.Types,

		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
		"tokenizer.ggml.unknown_token_id": uint32(0),
Patrick Devine's avatar
Patrick Devine committed
98
99
100
101
102
103
	}

	if len(m.Vocab.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
	} else {
		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
104
105
	}

Michael Yang's avatar
Michael Yang committed
106
	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
107
}
108
109
110
111
112
113
114
115
116
117
118
119
120
121

func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
	return llamaRepack(name, m.Params, data, shape)
}

func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
	var dims []int
	for _, dim := range shape {
		if dim != 0 {
			dims = append(dims, int(dim))
		}
	}

	var heads int
Michael Yang's avatar
Michael Yang committed
122
123
	switch {
	case strings.HasSuffix(name, "attn_q.weight"):
124
		heads = params.AttentionHeads
Michael Yang's avatar
Michael Yang committed
125
	case strings.HasSuffix(name, "attn_k.weight"):
126
		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
Michael Yang's avatar
Michael Yang committed
127
	default:
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
		return nil, fmt.Errorf("unknown tensor name: %s", name)
	}

	n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
	if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
		return nil, err
	}

	if err := n.T(0, 2, 1, 3); err != nil {
		return nil, err
	}

	if err := n.Reshape(dims...); err != nil {
		return nil, err
	}

	if err := n.Transpose(); err != nil {
		return nil, err
	}

	ts, err := native.SelectF32(n, 1)
	if err != nil {
		return nil, err
	}

	var f32s []float32
	for _, t := range ts {
		f32s = append(f32s, t...)
	}

	return f32s, nil
}