llama.go 4.78 KB
Newer Older
1
2
3
4
package convert

import (
	"encoding/binary"
Michael Yang's avatar
Michael Yang committed
5
	"errors"
6
7
	"fmt"
	"io"
Patrick Devine's avatar
Patrick Devine committed
8
9
	"os"
	"path/filepath"
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
	"regexp"
	"strings"

	"github.com/nlpodyssey/gopickle/pytorch"
	"github.com/pdevine/tensor"
	"github.com/pdevine/tensor/native"
	"github.com/x448/float16"

	"github.com/ollama/ollama/llm"
)

type LlamaModel struct {
	ModelData
}

Patrick Devine's avatar
Patrick Devine committed
25
func llamaTorchLayerHandler(w io.Writer, r torchWriterTo) error {
26

Patrick Devine's avatar
Patrick Devine committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	var tData []uint16
	switch r.storage.(type) {
	case *pytorch.HalfStorage:
		data := r.storage.(*pytorch.HalfStorage).Data
		tData = make([]uint16, len(data))
		for cnt, v := range data {
			tData[cnt] = uint16(float16.Fromfloat32(v))
		}
	case *pytorch.BFloat16Storage:
		data := r.storage.(*pytorch.BFloat16Storage).Data
		tData = make([]uint16, len(data))

		for cnt, v := range data {
			tData[cnt] = uint16(float16.Fromfloat32(v))
		}
	default:
		return fmt.Errorf("unknown storage type for torch")
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	}

	var err error
	var heads uint32
	if strings.Contains(r.t.Name, "attn_q") {
		heads = uint32(r.params.AttentionHeads)
	} else if strings.Contains(r.t.Name, "attn_k") {
		heads = uint32(r.params.KeyValHeads)
		if heads == 0 {
			heads = uint32(r.params.AttentionHeads)
		}
	} else {
		return fmt.Errorf("unknown layer type")
	}

	tData, err = llamaRepack(tData, int(heads), r.t.Shape)
	if err != nil {
		return err
	}

	if err = binary.Write(w, r.bo, tData); err != nil {
		return err
	}
	return nil
}

func llamaRepack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
	n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
	origShape := n.Shape().Clone()

	// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
	if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
		return nil, err
	}

	if err := n.T(0, 2, 1, 3); err != nil {
		return nil, err
	}

	if err := n.Reshape(origShape...); err != nil {
		return nil, err
	}

	if err := n.Transpose(); err != nil {
		return nil, err
	}
	newN, err := native.SelectU16(n, 1)
	if err != nil {
		return nil, err
	}

	var fullTensor []uint16
	for _, v := range newN {
		fullTensor = append(fullTensor, v...)
	}
	return fullTensor, nil
}

func (m *LlamaModel) GetTensors() error {
	t, err := m.Format.GetTensors(m.Path, m.Params)
	if err != nil {
		return err
	}

	pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
	re, err := regexp.Compile(pattern)
	if err != nil {
		return err
	}

	for _, l := range t {
		matches := re.FindAllStringSubmatch(l.Name, -1)
		if len(matches) > 0 {
Patrick Devine's avatar
Patrick Devine committed
117
118
			switch m.Format.(type) {
			case *TorchFormat:
Patrick Devine's avatar
Patrick Devine committed
119
120
121
				wt := l.WriterTo.(torchWriterTo)
				wt.handler = llamaTorchLayerHandler
				l.WriterTo = wt
Patrick Devine's avatar
Patrick Devine committed
122
			case *SafetensorFormat:
Patrick Devine's avatar
Patrick Devine committed
123
124
125
126
				wt := l.WriterTo.(safetensorWriterTo)
				wt.handler = mistralLayerHandler
				l.WriterTo = wt
			}
127
128
129
130
131
132
133
		}
		m.Tensors = append(m.Tensors, l)
	}

	return nil
}

Michael Yang's avatar
cleanup  
Michael Yang committed
134
135
func (m *LlamaModel) LoadVocab() (err error) {
	pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
Michael Yang's avatar
Michael Yang committed
136
	if errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
cleanup  
Michael Yang committed
137
		return nil
Michael Yang's avatar
Michael Yang committed
138
139
	} else if err != nil {
		return err
Patrick Devine's avatar
Patrick Devine committed
140
	}
Michael Yang's avatar
Michael Yang committed
141

Michael Yang's avatar
cleanup  
Michael Yang committed
142
143
144
145
146
	m.Vocab = &Vocab{}
	for _, t := range ts {
		m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
		m.Vocab.Types = append(m.Vocab.Types, t.Type())
	}
Patrick Devine's avatar
Patrick Devine committed
147

Michael Yang's avatar
cleanup  
Michael Yang committed
148
149
	m.Vocab.Merges = merges
	m.Params.PreTokenizer = pre
150
151
152
	return nil
}

Michael Yang's avatar
Michael Yang committed
153
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
154
155
156
157
158
159
160
161
	kv := llm.KV{
		"general.architecture":                   "llama",
		"general.name":                           m.Name,
		"llama.vocab_size":                       uint32(len(m.Vocab.Tokens)),
		"llama.context_length":                   uint32(m.Params.ContextSize),
		"llama.embedding_length":                 uint32(m.Params.HiddenSize),
		"llama.block_count":                      uint32(m.Params.HiddenLayers),
		"llama.feed_forward_length":              uint32(m.Params.IntermediateSize),
Patrick Devine's avatar
Patrick Devine committed
162
		"llama.rope.freq_base":                   float32(m.Params.RopeFrequencyBase),
163
164
165
166
		"llama.rope.dimension_count":             uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
Michael Yang's avatar
cleanup  
Michael Yang committed
167
		"general.file_type":                      uint32(1),
Patrick Devine's avatar
Patrick Devine committed
168
		"tokenizer.ggml.model":                   "gpt2",
169

Michael Yang's avatar
Michael Yang committed
170
		"tokenizer.ggml.pre":        m.Params.PreTokenizer,
171
172
173
174
175
176
		"tokenizer.ggml.tokens":     m.Vocab.Tokens,
		"tokenizer.ggml.token_type": m.Vocab.Types,

		"tokenizer.ggml.bos_token_id":     uint32(m.Params.BoSTokenID),
		"tokenizer.ggml.eos_token_id":     uint32(m.Params.EoSTokenID),
		"tokenizer.ggml.unknown_token_id": uint32(0),
Patrick Devine's avatar
Patrick Devine committed
177
178
179
180
181
182
	}

	if len(m.Vocab.Merges) > 0 {
		kv["tokenizer.ggml.merges"] = m.Vocab.Merges
	} else {
		kv["tokenizer.ggml.scores"] = m.Vocab.Scores
183
184
	}

Michael Yang's avatar
Michael Yang committed
185
	return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
186
}