gguf.go 9.5 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
9

	"github.com/jmorganca/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
)

type containerGGUF struct {
Michael Yang's avatar
ggufv3  
Michael Yang committed
13
14
	bo binary.ByteOrder

Bruce MacDonald's avatar
Bruce MacDonald committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
}

func (c *containerGGUF) Name() string {
	return "gguf"
}

Michael Yang's avatar
Michael Yang committed
32
33
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
	binary.Read(rso, c.bo, &c.Version)
Bruce MacDonald's avatar
Bruce MacDonald committed
34
35
36

	switch c.Version {
	case 1:
Michael Yang's avatar
Michael Yang committed
37
		binary.Read(rso, c.bo, &c.V1)
Bruce MacDonald's avatar
Bruce MacDonald committed
38
	default:
Michael Yang's avatar
Michael Yang committed
39
		binary.Read(rso, c.bo, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
	}

	model := newGGUFModel(c)
Michael Yang's avatar
Michael Yang committed
43
	if err := model.Decode(rso); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
		return nil, err
	}

	return model, nil
}

const (
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
)

type kv map[string]any

68
69
70
71
72
73
74
75
76
77
type tensor struct {
	name   string
	kind   uint32
	offset uint64
	size   uint64

	// shape is the number of elements in each dimension
	shape [4]uint64
}

Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
type ggufModel struct {
	*containerGGUF
80

Bruce MacDonald's avatar
Bruce MacDonald committed
81
	kv
82
83
84
	tensors []tensor

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
88
89
90
91
92
93
}

func newGGUFModel(container *containerGGUF) *ggufModel {
	return &ggufModel{
		containerGGUF: container,
		kv:            make(kv),
	}
}

94
95
96
97
98
99
100
101
func (llm *ggufModel) NumTensor() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumTensor)
	}

	return llm.V2.NumTensor
}

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
func (llm *ggufModel) NumKV() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumKV)
	}

	return llm.V2.NumKV
}

Michael Yang's avatar
Michael Yang committed
110
func (llm *ggufModel) ModelFamily() string {
Michael Yang's avatar
Michael Yang committed
111
	if t, ok := llm.kv["general.architecture"].(string); ok {
Michael Yang's avatar
Michael Yang committed
112
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
	}

Michael Yang's avatar
Michael Yang committed
115
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
}

Michael Yang's avatar
Michael Yang committed
118
func (llm *ggufModel) ModelType() string {
119
120
121
122
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Michael Yang's avatar
Michael Yang committed
123
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
124
125
}

Michael Yang's avatar
Michael Yang committed
126
func (llm *ggufModel) FileType() string {
Michael Yang's avatar
Michael Yang committed
127
	if t, ok := llm.kv["general.file_type"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
128
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
129
130
	}

Michael Yang's avatar
Michael Yang committed
131
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
132
133
}

Michael Yang's avatar
Michael Yang committed
134
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
135
	// decode key-values
Bruce MacDonald's avatar
Bruce MacDonald committed
136
	for i := 0; uint64(i) < llm.NumKV(); i++ {
Michael Yang's avatar
Michael Yang committed
137
		k, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
140
141
		if err != nil {
			return err
		}

Michael Yang's avatar
Michael Yang committed
142
		vtype := llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
143
144
145
146

		var v any
		switch vtype {
		case ggufTypeUint8:
Michael Yang's avatar
Michael Yang committed
147
			v = llm.readU8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
148
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
149
			v = llm.readI8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
150
		case ggufTypeUint16:
Michael Yang's avatar
Michael Yang committed
151
			v = llm.readU16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
		case ggufTypeInt16:
Michael Yang's avatar
Michael Yang committed
153
			v = llm.readI16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
154
		case ggufTypeUint32:
Michael Yang's avatar
Michael Yang committed
155
			v = llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
156
		case ggufTypeInt32:
Michael Yang's avatar
Michael Yang committed
157
			v = llm.readI32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
158
		case ggufTypeUint64:
Michael Yang's avatar
Michael Yang committed
159
			v = llm.readU64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
160
		case ggufTypeInt64:
Michael Yang's avatar
Michael Yang committed
161
			v = llm.readI64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
162
		case ggufTypeFloat32:
Michael Yang's avatar
Michael Yang committed
163
			v = llm.readF32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
164
		case ggufTypeFloat64:
Michael Yang's avatar
Michael Yang committed
165
			v = llm.readF64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
166
		case ggufTypeBool:
Michael Yang's avatar
Michael Yang committed
167
			v = llm.readBool(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
168
		case ggufTypeString:
Michael Yang's avatar
Michael Yang committed
169
			s, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
170
171
172
173
174
175
			if err != nil {
				return err
			}

			v = s
		case ggufTypeArray:
Michael Yang's avatar
Michael Yang committed
176
			a, err := llm.readArray(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
177
178
179
180
181
182
183
184
185
186
187
188
			if err != nil {
				return err
			}

			v = a
		default:
			return fmt.Errorf("invalid type: %d", vtype)
		}

		llm.kv[k] = v
	}

189
190
	// decode tensors
	for i := 0; uint64(i) < llm.NumTensor(); i++ {
Michael Yang's avatar
Michael Yang committed
191
		name, err := llm.readString(rso)
192
		if err != nil {
193
194
195
			return err
		}

Michael Yang's avatar
Michael Yang committed
196
		// dims is the number of dimensions in the tensor
Michael Yang's avatar
Michael Yang committed
197
		dims := llm.readU32(rso)
198
199
200

		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
Michael Yang's avatar
Michael Yang committed
201
			shape[i] = llm.readU64(rso)
202
203
		}

Michael Yang's avatar
Michael Yang committed
204
205
		kind := llm.readU32(rso)
		offset := llm.readU64(rso)
206
207
208
209
210
211
212
213
214
215

		var blockSize uint64
		switch {
		case kind < 2:
			blockSize = 1
		case kind < 10:
			blockSize = 32
		default:
			blockSize = 256
		}
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
		var typeSize uint64
		switch kind {
		case 0: // FP32
			typeSize = 4
		case 1: // FP16
			typeSize = 2
		case 2: // Q4_0
			typeSize = 2 + blockSize/2
		case 3: // Q4_1
			typeSize = 2 + 2 + blockSize/2
		case 6: // Q5_0
			typeSize = 2 + 4 + blockSize/2
		case 7: // Q5_1
			typeSize = 2 + 2 + 4 + blockSize/2
		case 8: // Q8_0
			typeSize = 2 + blockSize
		case 9: // Q8_1
			typeSize = 4 + 4 + blockSize
		case 10: // Q2_K
			typeSize = blockSize/16 + blockSize/4 + 2 + 2
		case 11: // Q3_K
			typeSize = blockSize/8 + blockSize/4 + 12 + 2
		case 12: // Q4_K
			typeSize = 2 + 2 + 12 + blockSize/2
		case 13: // Q5_K
			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
		case 14: // Q6_K
			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
245
246
		}

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
		parameters := shape[0] * shape[1] * shape[2] * shape[3]
		size := parameters * typeSize / blockSize

		llm.tensors = append(llm.tensors, tensor{
			name:   name,
			kind:   kind,
			offset: offset,
			size:   size,
			shape:  shape,
		})

		llm.parameters += parameters
	}

	alignment, ok := llm.kv["general.alignment"].(uint32)
	if !ok {
		alignment = 32
	}
265

Michael Yang's avatar
Michael Yang committed
266
	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
267
268
	for _, tensor := range llm.tensors {
		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
269
		rso.Seek(padded, io.SeekCurrent)
270
271
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
274
	return nil
}

275
func (llm *ggufModel) NumLayers() uint32 {
276
277
278
279
280
	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
	return value.(uint32)
}

func (llm *ggufModel) NumHead() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

func (llm *ggufModel) NumEmbed() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

func (llm *ggufModel) NumHeadKv() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

Michael Yang's avatar
Michael Yang committed
311
312
313
314
315
316
317
318
319
func (llm *ggufModel) NumCtx() uint32 {
	value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
	if !exists {
		return 0
	}

	return value.(uint32)
}

320
321
322
323
324
325
326
func (llm *ggufModel) NumGQA() uint32 {
	numHeadKv := llm.NumHeadKv()
	if numHeadKv == 0 {
		return 0
	}

	return llm.NumHead() / numHeadKv
327
328
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
329
func (llm ggufModel) readU8(r io.Reader) uint8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
330
	var u8 uint8
Michael Yang's avatar
ggufv3  
Michael Yang committed
331
	binary.Read(r, llm.bo, &u8)
Bruce MacDonald's avatar
Bruce MacDonald committed
332
333
334
	return u8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
335
func (llm ggufModel) readI8(r io.Reader) int8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
336
	var i8 int8
Michael Yang's avatar
ggufv3  
Michael Yang committed
337
	binary.Read(r, llm.bo, &i8)
Bruce MacDonald's avatar
Bruce MacDonald committed
338
339
340
	return i8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
341
func (llm ggufModel) readU16(r io.Reader) uint16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
342
	var u16 uint16
Michael Yang's avatar
ggufv3  
Michael Yang committed
343
	binary.Read(r, llm.bo, &u16)
Bruce MacDonald's avatar
Bruce MacDonald committed
344
345
346
	return u16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
347
func (llm ggufModel) readI16(r io.Reader) int16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
348
	var i16 int16
Michael Yang's avatar
ggufv3  
Michael Yang committed
349
	binary.Read(r, llm.bo, &i16)
Bruce MacDonald's avatar
Bruce MacDonald committed
350
351
352
	return i16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
353
func (llm ggufModel) readU32(r io.Reader) uint32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
354
	var u32 uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
355
	binary.Read(r, llm.bo, &u32)
Bruce MacDonald's avatar
Bruce MacDonald committed
356
357
358
	return u32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
359
func (llm ggufModel) readI32(r io.Reader) int32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
360
	var i32 int32
Michael Yang's avatar
ggufv3  
Michael Yang committed
361
	binary.Read(r, llm.bo, &i32)
Bruce MacDonald's avatar
Bruce MacDonald committed
362
363
364
	return i32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
365
func (llm ggufModel) readU64(r io.Reader) uint64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
366
	var u64 uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
367
	binary.Read(r, llm.bo, &u64)
Bruce MacDonald's avatar
Bruce MacDonald committed
368
369
370
	return u64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
371
func (llm ggufModel) readI64(r io.Reader) int64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
372
	var i64 int64
Michael Yang's avatar
ggufv3  
Michael Yang committed
373
	binary.Read(r, llm.bo, &i64)
Bruce MacDonald's avatar
Bruce MacDonald committed
374
375
376
	return i64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
377
func (llm ggufModel) readF32(r io.Reader) float32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
378
	var f32 float32
Michael Yang's avatar
ggufv3  
Michael Yang committed
379
	binary.Read(r, llm.bo, &f32)
Bruce MacDonald's avatar
Bruce MacDonald committed
380
381
382
	return f32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
383
func (llm ggufModel) readF64(r io.Reader) float64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
384
	var f64 float64
Michael Yang's avatar
ggufv3  
Michael Yang committed
385
	binary.Read(r, llm.bo, &f64)
Bruce MacDonald's avatar
Bruce MacDonald committed
386
387
388
	return f64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
389
func (llm ggufModel) readBool(r io.Reader) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
390
	var b bool
Michael Yang's avatar
ggufv3  
Michael Yang committed
391
	binary.Read(r, llm.bo, &b)
Bruce MacDonald's avatar
Bruce MacDonald committed
392
393
394
	return b
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
395
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
Bruce MacDonald's avatar
Bruce MacDonald committed
396
	var nameLength uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
397
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
398
399
400
401
402
403
404
405
406
407
408
409
410

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

func (llm ggufModel) readString(r io.Reader) (string, error) {
411
412
413
414
	if llm.Version == 1 {
		return llm.readStringV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
415
	var nameLength uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
416
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	return b.String(), nil
}

func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
	atype := llm.readU32(r)
	n := llm.readU32(r)

	for i := 0; uint32(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
435
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readStringV1(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}

func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
464
465
466
467
	if llm.Version == 1 {
		return llm.readArrayV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
468
469
470
471
472
473
474
475
	atype := llm.readU32(r)
	n := llm.readU64(r)

	for i := 0; uint64(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
476
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeUint64:
			arr = append(arr, llm.readU64(r))
		case ggufTypeInt64:
			arr = append(arr, llm.readI64(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeFloat64:
			arr = append(arr, llm.readF64(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readString(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}