gguf.go 13.9 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
	"strings"
9

10
	"github.com/ollama/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
11
12
)

13
type containerGGUF struct {
14
	ByteOrder binary.ByteOrder
Michael Yang's avatar
ggufv3  
Michael Yang committed
15

Bruce MacDonald's avatar
Bruce MacDonald committed
16
17
18
19
20
21
22
23
24
25
26
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
27
28
29
30
31

	V3 struct {
		NumTensor uint64
		NumKV     uint64
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
32
33
}

34
func (c *containerGGUF) Name() string {
Bruce MacDonald's avatar
Bruce MacDonald committed
35
36
37
	return "gguf"
}

38
39
40
41
func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
	if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
		return nil, err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
42

43
	var err error
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
	switch c.Version {
	case 1:
46
47
48
		err = binary.Read(rs, c.ByteOrder, &c.V1)
	case 2:
		err = binary.Read(rs, c.ByteOrder, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
49
	default:
50
51
52
53
		err = binary.Read(rs, c.ByteOrder, &c.V3)
	}
	if err != nil {
		return nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
54
55
	}

56
	model := newGGUF(c)
Michael Yang's avatar
Michael Yang committed
57
	if err := model.Decode(rs); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
58
59
60
61
62
63
64
		return nil, err
	}

	return model, nil
}

const (
65
66
67
68
69
70
71
	_ uint32 = iota
	GGUFTokenNormal
	GGUFTokenUnknown
	GGUFTokenControl
	GGUFTokenUserDefined
	GGUFTokenUnused
	GGUFTokenByte
Bruce MacDonald's avatar
Bruce MacDonald committed
72
73
)

74
const (
75
76
77
78
79
80
81
82
83
84
85
86
87
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
88
)
Bruce MacDonald's avatar
Bruce MacDonald committed
89

90
91
type gguf struct {
	*containerGGUF
92
93
94

	KV
	Tensors []Tensor
95
96

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
97
98
}

99
100
101
func newGGUF(container *containerGGUF) *gguf {
	return &gguf{
		containerGGUF: container,
102
		KV:            make(KV),
Bruce MacDonald's avatar
Bruce MacDonald committed
103
104
105
	}
}

106
107
108
109
110
111
112
func NewGGUFV3(bo binary.ByteOrder) *gguf {
	return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
}

func (llm *gguf) numTensor() uint64 {
	switch llm.Version {
	case 1:
113
		return uint64(llm.V1.NumTensor)
114
115
116
117
	case 2:
		return llm.V2.NumTensor
	default:
		return llm.V3.NumTensor
118
119
120
	}
}

121
122
123
func (llm *gguf) numKV() uint64 {
	switch llm.Version {
	case 1:
Bruce MacDonald's avatar
Bruce MacDonald committed
124
		return uint64(llm.V1.NumKV)
125
126
127
128
	case 2:
		return llm.V2.NumKV
	default:
		return llm.V3.NumKV
Bruce MacDonald's avatar
Bruce MacDonald committed
129
130
131
	}
}

132
func (llm *gguf) ModelFamily() string {
133
	if t, ok := llm.KV["general.architecture"].(string); ok {
Michael Yang's avatar
Michael Yang committed
134
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
135
136
	}

Michael Yang's avatar
Michael Yang committed
137
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
}

140
func (llm *gguf) ModelType() string {
141
142
143
144
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Michael Yang's avatar
Michael Yang committed
145
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
146
147
}

148
func (llm *gguf) FileType() string {
149
	if t, ok := llm.KV["general.file_type"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
150
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
151
152
	}

Michael Yang's avatar
Michael Yang committed
153
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
154
155
}

156
157
158
159
160
func (llm *gguf) Decode(rs io.ReadSeeker) error {
	// decode key-values
	for i := 0; uint64(i) < llm.numKV(); i++ {
		k, err := readGGUFString(llm, rs)
		if err != nil {
161
162
163
			return err
		}

164
165
		t, err := readGGUF[uint32](llm, rs)
		if err != nil {
166
167
168
			return err
		}

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
		var v any
		switch t {
		case ggufTypeUint8:
			v, err = readGGUF[uint8](llm, rs)
		case ggufTypeInt8:
			v, err = readGGUF[int8](llm, rs)
		case ggufTypeUint16:
			v, err = readGGUF[uint16](llm, rs)
		case ggufTypeInt16:
			v, err = readGGUF[int16](llm, rs)
		case ggufTypeUint32:
			v, err = readGGUF[uint32](llm, rs)
		case ggufTypeInt32:
			v, err = readGGUF[int32](llm, rs)
		case ggufTypeUint64:
			v, err = readGGUF[uint64](llm, rs)
		case ggufTypeInt64:
			v, err = readGGUF[int64](llm, rs)
		case ggufTypeFloat32:
			v, err = readGGUF[float32](llm, rs)
		case ggufTypeFloat64:
			v, err = readGGUF[float64](llm, rs)
		case ggufTypeBool:
			v, err = readGGUF[bool](llm, rs)
		case ggufTypeString:
			v, err = readGGUFString(llm, rs)
		case ggufTypeArray:
			v, err = readGGUFArray(llm, rs)
		default:
			return fmt.Errorf("invalid type: %d", t)
199
200
		}

201
		if err != nil {
202
203
204
			return err
		}

205
		llm.KV[k] = v
206
207
	}

208
209
210
	// decode tensors
	for i := 0; uint64(i) < llm.numTensor(); i++ {
		name, err := readGGUFString(llm, rs)
211
212
213
214
		if err != nil {
			return err
		}

215
216
		// dims is the number of dimensions in the tensor
		dims, err := readGGUF[uint32](llm, rs)
Bruce MacDonald's avatar
Bruce MacDonald committed
217
218
219
220
		if err != nil {
			return err
		}

221
222
223
		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
			shape[i], err = readGGUF[uint64](llm, rs)
Bruce MacDonald's avatar
Bruce MacDonald committed
224
225
226
227
228
			if err != nil {
				return err
			}
		}

229
		kind, err := readGGUF[uint32](llm, rs)
230
		if err != nil {
231
232
233
			return err
		}

234
235
236
		offset, err := readGGUF[uint64](llm, rs)
		if err != nil {
			return err
237
238
		}

239
240
		tensor := Tensor{
			Name:   name,
241
242
			Kind:   kind,
			Offset: offset,
Michael Yang's avatar
Michael Yang committed
243
			Shape:  shape[:],
Michael Yang's avatar
Michael Yang committed
244
		}
245

246
		llm.Tensors = append(llm.Tensors, tensor)
247
		llm.parameters += tensor.parameters()
248
249
	}

250
	alignment, ok := llm.KV["general.alignment"].(uint32)
251
252
253
	if !ok {
		alignment = 32
	}
254

Michael Yang's avatar
Michael Yang committed
255
256
257
258
259
	offset, err := rs.Seek(0, io.SeekCurrent)
	if err != nil {
		return err
	}

260
261
	padding := llm.padding(offset, int64(alignment))
	if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
Michael Yang's avatar
Michael Yang committed
262
263
264
		return err
	}

265
	for _, tensor := range llm.Tensors {
266
		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
267
268
269
		if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
			return err
		}
270
271
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
274
	return nil
}

275
func (llm *gguf) NumLayers() uint32 {
276
	value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
277
278
279
280
	if !exists {
		return 0
	}

281
282
283
	return value.(uint32)
}

284
func (llm *gguf) NumHead() uint32 {
285
	value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
286
287
288
289
290
291
292
	if !exists {
		return 0
	}

	return value.(uint32)
}

293
func (llm *gguf) NumEmbed() uint32 {
294
	value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
295
296
297
298
299
300
301
	if !exists {
		return 0
	}

	return value.(uint32)
}

302
func (llm *gguf) NumHeadKv() uint32 {
303
	value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
304
305
306
307
308
309
310
	if !exists {
		return 0
	}

	return value.(uint32)
}

311
func (llm *gguf) NumCtx() uint32 {
312
	value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
Michael Yang's avatar
Michael Yang committed
313
314
315
316
317
318
319
	if !exists {
		return 0
	}

	return value.(uint32)
}

320
func (llm *gguf) NumGQA() uint32 {
321
322
323
324
325
326
	numHeadKv := llm.NumHeadKv()
	if numHeadKv == 0 {
		return 0
	}

	return llm.NumHead() / numHeadKv
327
328
}

329
330
331
332
func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
	var t T
	err := binary.Read(r, llm.ByteOrder, &t)
	return t, err
Bruce MacDonald's avatar
Bruce MacDonald committed
333
334
}

335
336
337
338
func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
	if err := binary.Write(w, llm.ByteOrder, t); err != nil {
		return err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
339

340
	return binary.Write(w, llm.ByteOrder, v)
Bruce MacDonald's avatar
Bruce MacDonald committed
341
342
}

343
344
345
346
347
func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
	var length uint64
	if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
		return "", err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
348
349

	var b bytes.Buffer
350
	if _, err := io.CopyN(&b, r, int64(length)); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
351
352
353
354
355
356
357
358
359
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

360
func readGGUFString(llm *gguf, r io.Reader) (string, error) {
361
	if llm.Version == 1 {
362
		return readGGUFV1String(llm, r)
363
364
	}

365
366
367
368
	var length uint64
	if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
		return "", err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
369
370

	var b bytes.Buffer
371
	if _, err := io.CopyN(&b, r, int64(length)); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
372
373
374
375
376
377
		return "", err
	}

	return b.String(), nil
}

378
379
380
381
func writeGGUFString(llm *gguf, w io.Writer, s string) error {
	if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
		return err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
382

383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
	if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
		return err
	}

	_, err := io.Copy(w, strings.NewReader(s))
	return err
}

func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
	t, err := readGGUF[uint32](llm, r)
	if err != nil {
		return nil, err
	}

	n, err := readGGUF[uint32](llm, r)
	if err != nil {
		return nil, err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
401

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
	for i := 0; uint32(i) < n; i++ {
		var e any
		switch t {
		case ggufTypeUint8:
			e, err = readGGUF[uint8](llm, r)
		case ggufTypeInt8:
			e, err = readGGUF[int8](llm, r)
		case ggufTypeUint16:
			e, err = readGGUF[uint16](llm, r)
		case ggufTypeInt16:
			e, err = readGGUF[int16](llm, r)
		case ggufTypeUint32:
			e, err = readGGUF[uint32](llm, r)
		case ggufTypeInt32:
			e, err = readGGUF[int32](llm, r)
		case ggufTypeUint64:
			e, err = readGGUF[uint64](llm, r)
		case ggufTypeInt64:
			e, err = readGGUF[int64](llm, r)
		case ggufTypeFloat32:
			e, err = readGGUF[float32](llm, r)
		case ggufTypeFloat64:
			e, err = readGGUF[float64](llm, r)
		case ggufTypeBool:
			e, err = readGGUF[bool](llm, r)
		case ggufTypeString:
			e, err = readGGUFV1String(llm, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
429
		default:
430
			return nil, fmt.Errorf("invalid array type: %d", t)
Bruce MacDonald's avatar
Bruce MacDonald committed
431
		}
432
433
434
435
436
		if err != nil {
			return nil, err
		}

		a = append(a, e)
Bruce MacDonald's avatar
Bruce MacDonald committed
437
438
439
440
441
	}

	return
}

442
func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
443
	if llm.Version == 1 {
444
		return readGGUFV1Array(llm, r)
445
446
	}

447
448
449
450
	t, err := readGGUF[uint32](llm, r)
	if err != nil {
		return nil, err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
451

452
453
454
455
	n, err := readGGUF[uint64](llm, r)
	if err != nil {
		return nil, err
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
456

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
	for i := 0; uint64(i) < n; i++ {
		var e any
		switch t {
		case ggufTypeUint8:
			e, err = readGGUF[uint8](llm, r)
		case ggufTypeInt8:
			e, err = readGGUF[int8](llm, r)
		case ggufTypeUint16:
			e, err = readGGUF[uint16](llm, r)
		case ggufTypeInt16:
			e, err = readGGUF[int16](llm, r)
		case ggufTypeUint32:
			e, err = readGGUF[uint32](llm, r)
		case ggufTypeInt32:
			e, err = readGGUF[int32](llm, r)
		case ggufTypeUint64:
			e, err = readGGUF[uint64](llm, r)
		case ggufTypeInt64:
			e, err = readGGUF[int64](llm, r)
		case ggufTypeFloat32:
			e, err = readGGUF[float32](llm, r)
		case ggufTypeFloat64:
			e, err = readGGUF[float64](llm, r)
		case ggufTypeBool:
			e, err = readGGUF[bool](llm, r)
		case ggufTypeString:
			e, err = readGGUFString(llm, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
484
		default:
485
			return nil, fmt.Errorf("invalid array type: %d", t)
Bruce MacDonald's avatar
Bruce MacDonald committed
486
		}
487
488
489
490
491
		if err != nil {
			return nil, err
		}

		a = append(a, e)
Bruce MacDonald's avatar
Bruce MacDonald committed
492
493
494
495
	}

	return
}
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703

func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
	if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
		return err
	}

	if err := binary.Write(w, llm.ByteOrder, t); err != nil {
		return err
	}

	if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
		return err
	}

	for _, e := range s {
		if err := binary.Write(w, llm.ByteOrder, e); err != nil {
			return err
		}
	}

	return nil
}

var ggufKVOrder = map[string][]string{
	"llama": {
		"general.architecture",
		"general.name",
		"llama.context_length",
		"llama.embedding_length",
		"llama.block_count",
		"llama.feed_forward_length",
		"llama.rope.dimension_count",
		"llama.attention.head_count",
		"llama.attention.head_count_kv",
		"llama.attention.layer_norm_rms_epsilon",
		"llama.rope.freq_base",
		"gemma.context_length",
		"gemma.embedding_length",
		"gemma.block_count",
		"gemma.feed_forward_length",
		"gemma.attention.head_count",
		"gemma.attention.head_count_kv",
		"gemma.attention.layer_norm_rms_epsilon",
		"gemma.attention.key_length",
		"gemma.attention.value_length",
		"general.file_type",
		"tokenizer.ggml.model",
		"tokenizer.ggml.tokens",
		"tokenizer.ggml.scores",
		"tokenizer.ggml.token_type",
		"tokenizer.ggml.bos_token_id",
		"tokenizer.ggml.eos_token_id",
		"tokenizer.ggml.unknown_token_id",
		"tokenizer.ggml.padding_token_id",
		"tokenizer.ggml.add_bos_token",
		"tokenizer.ggml.add_eos_token",
		"tokenizer.chat_template",
	},
}

func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
	switch llm.Version {
	case 3:
		llm.V3.NumTensor = uint64(len(tensors))
		llm.V3.NumKV = uint64(len(kv))
	default:
		return fmt.Errorf("not implemented: ggufv%d", llm.Version)
	}

	if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
		return err
	}

	if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
		return err
	}

	if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
		return err
	}

	if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
		return err
	}

	for _, k := range ggufKVOrder["llama"] {
		v, ok := kv[k]
		if !ok {
			continue
		}

		if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
			return err
		}

		if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
			return err
		}

		var err error
		switch v := v.(type) {
		case uint32:
			err = writeGGUF(llm, ws, ggufTypeUint32, v)
		case float32:
			err = writeGGUF(llm, ws, ggufTypeFloat32, v)
		case bool:
			err = writeGGUF(llm, ws, ggufTypeBool, v)
		case string:
			err = writeGGUFString(llm, ws, v)
		case []int32:
			err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
		case []uint32:
			err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
		case []float32:
			err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
		case []string:
			if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
				return err
			}

			if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
				return err
			}

			if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
				return err
			}

			for _, e := range v {
				if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
					return err
				}

				if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
					return err
				}
			}
		}
		if err != nil {
			return err
		}
	}

	for _, tensor := range tensors {
		if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
			return err
		}

		if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
			return err
		}

		dims := 1
		if tensor.Shape[1] > 0 {
			dims = 2
		}

		if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
			return err
		}

		for i := 0; i < dims; i++ {
			if err := binary.Write(ws, llm.ByteOrder, uint64(tensor.Shape[dims-1-i])); err != nil {
				return err
			}
		}

		if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
			return err
		}

		if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
			return err
		}
	}

	offset, err := ws.Seek(0, io.SeekCurrent)
	if err != nil {
		return err
	}

	padding := llm.padding(offset, 32)
	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
		return err
	}

	for _, tensor := range tensors {
		if _, err := tensor.WriteTo(ws); err != nil {
			return err
		}

		offset, err := ws.Seek(0, io.SeekCurrent)
		if err != nil {
			return err
		}

		padding := llm.padding(offset, 32)
		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
			return err
		}
	}

	return nil
}

func (gguf) padding(offset, align int64) int64 {
	return (offset + align - 1) / align * align
}