gguf.go 9.33 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
9

	"github.com/jmorganca/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
)

type containerGGUF struct {
Michael Yang's avatar
ggufv3  
Michael Yang committed
13
14
	bo binary.ByteOrder

Bruce MacDonald's avatar
Bruce MacDonald committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
}

func (c *containerGGUF) Name() string {
	return "gguf"
}

Michael Yang's avatar
Michael Yang committed
32
33
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
	binary.Read(rso, c.bo, &c.Version)
Bruce MacDonald's avatar
Bruce MacDonald committed
34
35
36

	switch c.Version {
	case 1:
Michael Yang's avatar
Michael Yang committed
37
		binary.Read(rso, c.bo, &c.V1)
Bruce MacDonald's avatar
Bruce MacDonald committed
38
	default:
Michael Yang's avatar
Michael Yang committed
39
		binary.Read(rso, c.bo, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
	}

	model := newGGUFModel(c)
Michael Yang's avatar
Michael Yang committed
43
	if err := model.Decode(rso); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
		return nil, err
	}

	return model, nil
}

const (
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
)

type kv map[string]any

68
69
70
71
72
73
74
75
76
77
type tensor struct {
	name   string
	kind   uint32
	offset uint64
	size   uint64

	// shape is the number of elements in each dimension
	shape [4]uint64
}

Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
type ggufModel struct {
	*containerGGUF
80

Bruce MacDonald's avatar
Bruce MacDonald committed
81
	kv
82
83
84
	tensors []tensor

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
88
89
90
91
92
93
}

func newGGUFModel(container *containerGGUF) *ggufModel {
	return &ggufModel{
		containerGGUF: container,
		kv:            make(kv),
	}
}

94
95
96
97
98
99
100
101
func (llm *ggufModel) NumTensor() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumTensor)
	}

	return llm.V2.NumTensor
}

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
func (llm *ggufModel) NumKV() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumKV)
	}

	return llm.V2.NumKV
}

Michael Yang's avatar
Michael Yang committed
110
func (llm *ggufModel) ModelFamily() string {
Michael Yang's avatar
Michael Yang committed
111
	if t, ok := llm.kv["general.architecture"].(string); ok {
Michael Yang's avatar
Michael Yang committed
112
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
	}

Michael Yang's avatar
Michael Yang committed
115
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
}

Michael Yang's avatar
Michael Yang committed
118
func (llm *ggufModel) ModelType() string {
119
120
121
122
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Michael Yang's avatar
Michael Yang committed
123
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
124
125
}

Michael Yang's avatar
Michael Yang committed
126
func (llm *ggufModel) FileType() string {
Michael Yang's avatar
Michael Yang committed
127
	if t, ok := llm.kv["general.file_type"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
128
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
129
130
	}

Michael Yang's avatar
Michael Yang committed
131
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
132
133
}

Michael Yang's avatar
Michael Yang committed
134
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
135
	// decode key-values
Bruce MacDonald's avatar
Bruce MacDonald committed
136
	for i := 0; uint64(i) < llm.NumKV(); i++ {
Michael Yang's avatar
Michael Yang committed
137
		k, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
140
141
		if err != nil {
			return err
		}

Michael Yang's avatar
Michael Yang committed
142
		vtype := llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
143
144
145
146

		var v any
		switch vtype {
		case ggufTypeUint8:
Michael Yang's avatar
Michael Yang committed
147
			v = llm.readU8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
148
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
149
			v = llm.readI8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
150
		case ggufTypeUint16:
Michael Yang's avatar
Michael Yang committed
151
			v = llm.readU16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
		case ggufTypeInt16:
Michael Yang's avatar
Michael Yang committed
153
			v = llm.readI16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
154
		case ggufTypeUint32:
Michael Yang's avatar
Michael Yang committed
155
			v = llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
156
		case ggufTypeInt32:
Michael Yang's avatar
Michael Yang committed
157
			v = llm.readI32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
158
		case ggufTypeUint64:
Michael Yang's avatar
Michael Yang committed
159
			v = llm.readU64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
160
		case ggufTypeInt64:
Michael Yang's avatar
Michael Yang committed
161
			v = llm.readI64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
162
		case ggufTypeFloat32:
Michael Yang's avatar
Michael Yang committed
163
			v = llm.readF32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
164
		case ggufTypeFloat64:
Michael Yang's avatar
Michael Yang committed
165
			v = llm.readF64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
166
		case ggufTypeBool:
Michael Yang's avatar
Michael Yang committed
167
			v = llm.readBool(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
168
		case ggufTypeString:
Michael Yang's avatar
Michael Yang committed
169
			s, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
170
171
172
173
174
175
			if err != nil {
				return err
			}

			v = s
		case ggufTypeArray:
Michael Yang's avatar
Michael Yang committed
176
			a, err := llm.readArray(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
177
178
179
180
181
182
183
184
185
186
187
188
			if err != nil {
				return err
			}

			v = a
		default:
			return fmt.Errorf("invalid type: %d", vtype)
		}

		llm.kv[k] = v
	}

189
190
	// decode tensors
	for i := 0; uint64(i) < llm.NumTensor(); i++ {
Michael Yang's avatar
Michael Yang committed
191
		name, err := llm.readString(rso)
192
		if err != nil {
193
194
195
			return err
		}

Michael Yang's avatar
Michael Yang committed
196
		// dims is the number of dimensions in the tensor
Michael Yang's avatar
Michael Yang committed
197
		dims := llm.readU32(rso)
198
199
200

		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
Michael Yang's avatar
Michael Yang committed
201
			shape[i] = llm.readU64(rso)
202
203
		}

Michael Yang's avatar
Michael Yang committed
204
205
		kind := llm.readU32(rso)
		offset := llm.readU64(rso)
206
207
208
209
210
211
212
213
214
215

		var blockSize uint64
		switch {
		case kind < 2:
			blockSize = 1
		case kind < 10:
			blockSize = 32
		default:
			blockSize = 256
		}
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
		var typeSize uint64
		switch kind {
		case 0: // FP32
			typeSize = 4
		case 1: // FP16
			typeSize = 2
		case 2: // Q4_0
			typeSize = 2 + blockSize/2
		case 3: // Q4_1
			typeSize = 2 + 2 + blockSize/2
		case 6: // Q5_0
			typeSize = 2 + 4 + blockSize/2
		case 7: // Q5_1
			typeSize = 2 + 2 + 4 + blockSize/2
		case 8: // Q8_0
			typeSize = 2 + blockSize
		case 9: // Q8_1
			typeSize = 4 + 4 + blockSize
		case 10: // Q2_K
			typeSize = blockSize/16 + blockSize/4 + 2 + 2
		case 11: // Q3_K
			typeSize = blockSize/8 + blockSize/4 + 12 + 2
		case 12: // Q4_K
			typeSize = 2 + 2 + 12 + blockSize/2
		case 13: // Q5_K
			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
		case 14: // Q6_K
			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
245
246
		}

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
		parameters := shape[0] * shape[1] * shape[2] * shape[3]
		size := parameters * typeSize / blockSize

		llm.tensors = append(llm.tensors, tensor{
			name:   name,
			kind:   kind,
			offset: offset,
			size:   size,
			shape:  shape,
		})

		llm.parameters += parameters
	}

	alignment, ok := llm.kv["general.alignment"].(uint32)
	if !ok {
		alignment = 32
	}
265

Michael Yang's avatar
Michael Yang committed
266
	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
267
268
	for _, tensor := range llm.tensors {
		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
269
		rso.Seek(padded, io.SeekCurrent)
270
271
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
274
	return nil
}

275
func (llm *ggufModel) NumLayers() uint32 {
276
277
278
279
280
	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
	return value.(uint32)
}

func (llm *ggufModel) NumHead() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

func (llm *ggufModel) NumEmbed() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

func (llm *ggufModel) NumHeadKv() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

func (llm *ggufModel) NumGQA() uint32 {
	numHeadKv := llm.NumHeadKv()
	if numHeadKv == 0 {
		return 0
	}

	return llm.NumHead() / numHeadKv
318
319
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
320
func (llm ggufModel) readU8(r io.Reader) uint8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
321
	var u8 uint8
Michael Yang's avatar
ggufv3  
Michael Yang committed
322
	binary.Read(r, llm.bo, &u8)
Bruce MacDonald's avatar
Bruce MacDonald committed
323
324
325
	return u8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
326
func (llm ggufModel) readI8(r io.Reader) int8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
327
	var i8 int8
Michael Yang's avatar
ggufv3  
Michael Yang committed
328
	binary.Read(r, llm.bo, &i8)
Bruce MacDonald's avatar
Bruce MacDonald committed
329
330
331
	return i8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
332
func (llm ggufModel) readU16(r io.Reader) uint16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
333
	var u16 uint16
Michael Yang's avatar
ggufv3  
Michael Yang committed
334
	binary.Read(r, llm.bo, &u16)
Bruce MacDonald's avatar
Bruce MacDonald committed
335
336
337
	return u16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
338
func (llm ggufModel) readI16(r io.Reader) int16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
339
	var i16 int16
Michael Yang's avatar
ggufv3  
Michael Yang committed
340
	binary.Read(r, llm.bo, &i16)
Bruce MacDonald's avatar
Bruce MacDonald committed
341
342
343
	return i16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
344
func (llm ggufModel) readU32(r io.Reader) uint32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
345
	var u32 uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
346
	binary.Read(r, llm.bo, &u32)
Bruce MacDonald's avatar
Bruce MacDonald committed
347
348
349
	return u32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
350
func (llm ggufModel) readI32(r io.Reader) int32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
351
	var i32 int32
Michael Yang's avatar
ggufv3  
Michael Yang committed
352
	binary.Read(r, llm.bo, &i32)
Bruce MacDonald's avatar
Bruce MacDonald committed
353
354
355
	return i32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
356
func (llm ggufModel) readU64(r io.Reader) uint64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
357
	var u64 uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
358
	binary.Read(r, llm.bo, &u64)
Bruce MacDonald's avatar
Bruce MacDonald committed
359
360
361
	return u64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
362
func (llm ggufModel) readI64(r io.Reader) int64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
363
	var i64 int64
Michael Yang's avatar
ggufv3  
Michael Yang committed
364
	binary.Read(r, llm.bo, &i64)
Bruce MacDonald's avatar
Bruce MacDonald committed
365
366
367
	return i64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
368
func (llm ggufModel) readF32(r io.Reader) float32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
369
	var f32 float32
Michael Yang's avatar
ggufv3  
Michael Yang committed
370
	binary.Read(r, llm.bo, &f32)
Bruce MacDonald's avatar
Bruce MacDonald committed
371
372
373
	return f32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
374
func (llm ggufModel) readF64(r io.Reader) float64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
375
	var f64 float64
Michael Yang's avatar
ggufv3  
Michael Yang committed
376
	binary.Read(r, llm.bo, &f64)
Bruce MacDonald's avatar
Bruce MacDonald committed
377
378
379
	return f64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
380
func (llm ggufModel) readBool(r io.Reader) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
381
	var b bool
Michael Yang's avatar
ggufv3  
Michael Yang committed
382
	binary.Read(r, llm.bo, &b)
Bruce MacDonald's avatar
Bruce MacDonald committed
383
384
385
	return b
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
386
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
Bruce MacDonald's avatar
Bruce MacDonald committed
387
	var nameLength uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
388
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
389
390
391
392
393
394
395
396
397
398
399
400
401

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

func (llm ggufModel) readString(r io.Reader) (string, error) {
402
403
404
405
	if llm.Version == 1 {
		return llm.readStringV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
406
	var nameLength uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
407
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	return b.String(), nil
}

func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
	atype := llm.readU32(r)
	n := llm.readU32(r)

	for i := 0; uint32(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
426
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readStringV1(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}

func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
455
456
457
458
	if llm.Version == 1 {
		return llm.readArrayV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
459
460
461
462
463
464
465
466
	atype := llm.readU32(r)
	n := llm.readU64(r)

	for i := 0; uint64(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
467
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeUint64:
			arr = append(arr, llm.readU64(r))
		case ggufTypeInt64:
			arr = append(arr, llm.readI64(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeFloat64:
			arr = append(arr, llm.readF64(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readString(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}