gguf.go 9.2 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
9

	"github.com/jmorganca/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
)

type containerGGUF struct {
Michael Yang's avatar
ggufv3  
Michael Yang committed
13
14
	bo binary.ByteOrder

Bruce MacDonald's avatar
Bruce MacDonald committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
}

func (c *containerGGUF) Name() string {
	return "gguf"
}

Michael Yang's avatar
Michael Yang committed
32
33
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
	binary.Read(rso, c.bo, &c.Version)
Bruce MacDonald's avatar
Bruce MacDonald committed
34
35
36

	switch c.Version {
	case 1:
Michael Yang's avatar
Michael Yang committed
37
		binary.Read(rso, c.bo, &c.V1)
Bruce MacDonald's avatar
Bruce MacDonald committed
38
	default:
Michael Yang's avatar
Michael Yang committed
39
		binary.Read(rso, c.bo, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
	}

	model := newGGUFModel(c)
Michael Yang's avatar
Michael Yang committed
43
	if err := model.Decode(rso); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
		return nil, err
	}

	return model, nil
}

const (
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
)

type kv map[string]any

68
69
70
71
72
73
74
75
76
77
type tensor struct {
	name   string
	kind   uint32
	offset uint64
	size   uint64

	// shape is the number of elements in each dimension
	shape [4]uint64
}

Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
type ggufModel struct {
	*containerGGUF
80

Bruce MacDonald's avatar
Bruce MacDonald committed
81
	kv
82
83
84
	tensors []tensor

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
88
89
90
91
92
93
}

func newGGUFModel(container *containerGGUF) *ggufModel {
	return &ggufModel{
		containerGGUF: container,
		kv:            make(kv),
	}
}

94
95
96
97
98
99
100
101
func (llm *ggufModel) NumTensor() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumTensor)
	}

	return llm.V2.NumTensor
}

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
func (llm *ggufModel) NumKV() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumKV)
	}

	return llm.V2.NumKV
}

Michael Yang's avatar
Michael Yang committed
110
func (llm *ggufModel) ModelFamily() string {
Bruce MacDonald's avatar
Bruce MacDonald committed
111
112
	t, ok := llm.kv["general.architecture"].(string)
	if ok {
Michael Yang's avatar
Michael Yang committed
113
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
114
115
	}

Michael Yang's avatar
Michael Yang committed
116
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
117
118
}

Michael Yang's avatar
Michael Yang committed
119
func (llm *ggufModel) ModelType() string {
120
121
122
123
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
124
	switch llm.ModelFamily() {
Michael Yang's avatar
Michael Yang committed
125
126
	case "llama":
		if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
127
128
129
130
131
132
			heads, headsOK := llm.kv["llama.head_count"].(uint32)
			headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
			if headsOK && headsKVsOK && heads/headKVs == 8 {
				return "70B"
			}

Michael Yang's avatar
Michael Yang committed
133
134
135
136
137
			return llamaModelType(blocks)
		}
	case "falcon":
		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
			return falconModelType(blocks)
Bruce MacDonald's avatar
Bruce MacDonald committed
138
		}
Michael Yang's avatar
Michael Yang committed
139
140
141
142
	case "starcoder":
		if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
			return starCoderModelType(blocks)
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
143
144
	}

Michael Yang's avatar
Michael Yang committed
145
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
146
147
}

Michael Yang's avatar
Michael Yang committed
148
149
150
151
func (llm *ggufModel) FileType() string {
	t, ok := llm.kv["general.file_type"].(uint32)
	if ok {
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
153
	}

Michael Yang's avatar
Michael Yang committed
154
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
155
156
}

Michael Yang's avatar
Michael Yang committed
157
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
158
	// decode key-values
Bruce MacDonald's avatar
Bruce MacDonald committed
159
	for i := 0; uint64(i) < llm.NumKV(); i++ {
Michael Yang's avatar
Michael Yang committed
160
		k, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
161
162
163
164
		if err != nil {
			return err
		}

Michael Yang's avatar
Michael Yang committed
165
		vtype := llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
166
167
168
169

		var v any
		switch vtype {
		case ggufTypeUint8:
Michael Yang's avatar
Michael Yang committed
170
			v = llm.readU8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
171
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
172
			v = llm.readI8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
173
		case ggufTypeUint16:
Michael Yang's avatar
Michael Yang committed
174
			v = llm.readU16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
175
		case ggufTypeInt16:
Michael Yang's avatar
Michael Yang committed
176
			v = llm.readI16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
177
		case ggufTypeUint32:
Michael Yang's avatar
Michael Yang committed
178
			v = llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
179
		case ggufTypeInt32:
Michael Yang's avatar
Michael Yang committed
180
			v = llm.readI32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
181
		case ggufTypeUint64:
Michael Yang's avatar
Michael Yang committed
182
			v = llm.readU64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
183
		case ggufTypeInt64:
Michael Yang's avatar
Michael Yang committed
184
			v = llm.readI64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
185
		case ggufTypeFloat32:
Michael Yang's avatar
Michael Yang committed
186
			v = llm.readF32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
187
		case ggufTypeFloat64:
Michael Yang's avatar
Michael Yang committed
188
			v = llm.readF64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
189
		case ggufTypeBool:
Michael Yang's avatar
Michael Yang committed
190
			v = llm.readBool(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
191
		case ggufTypeString:
Michael Yang's avatar
Michael Yang committed
192
			s, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
193
194
195
196
197
198
			if err != nil {
				return err
			}

			v = s
		case ggufTypeArray:
Michael Yang's avatar
Michael Yang committed
199
			a, err := llm.readArray(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
200
201
202
203
204
205
206
207
208
209
210
211
			if err != nil {
				return err
			}

			v = a
		default:
			return fmt.Errorf("invalid type: %d", vtype)
		}

		llm.kv[k] = v
	}

212
213
	// decode tensors
	for i := 0; uint64(i) < llm.NumTensor(); i++ {
Michael Yang's avatar
Michael Yang committed
214
		name, err := llm.readString(rso)
215
		if err != nil {
216
217
218
			return err
		}

Michael Yang's avatar
Michael Yang committed
219
		dims := llm.readU32(rso)
220
221
222

		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
Michael Yang's avatar
Michael Yang committed
223
			shape[i] = llm.readU64(rso)
224
225
		}

Michael Yang's avatar
Michael Yang committed
226
227
		kind := llm.readU32(rso)
		offset := llm.readU64(rso)
228
229
230
231
232
233
234
235
236
237

		var blockSize uint64
		switch {
		case kind < 2:
			blockSize = 1
		case kind < 10:
			blockSize = 32
		default:
			blockSize = 256
		}
238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
		var typeSize uint64
		switch kind {
		case 0: // FP32
			typeSize = 4
		case 1: // FP16
			typeSize = 2
		case 2: // Q4_0
			typeSize = 2 + blockSize/2
		case 3: // Q4_1
			typeSize = 2 + 2 + blockSize/2
		case 6: // Q5_0
			typeSize = 2 + 4 + blockSize/2
		case 7: // Q5_1
			typeSize = 2 + 2 + 4 + blockSize/2
		case 8: // Q8_0
			typeSize = 2 + blockSize
		case 9: // Q8_1
			typeSize = 4 + 4 + blockSize
		case 10: // Q2_K
			typeSize = blockSize/16 + blockSize/4 + 2 + 2
		case 11: // Q3_K
			typeSize = blockSize/8 + blockSize/4 + 12 + 2
		case 12: // Q4_K
			typeSize = 2 + 2 + 12 + blockSize/2
		case 13: // Q5_K
			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
		case 14: // Q6_K
			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
267
268
		}

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
		parameters := shape[0] * shape[1] * shape[2] * shape[3]
		size := parameters * typeSize / blockSize

		llm.tensors = append(llm.tensors, tensor{
			name:   name,
			kind:   kind,
			offset: offset,
			size:   size,
			shape:  shape,
		})

		llm.parameters += parameters
	}

	alignment, ok := llm.kv["general.alignment"].(uint32)
	if !ok {
		alignment = 32
	}
287

Michael Yang's avatar
Michael Yang committed
288
	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
289
290
	for _, tensor := range llm.tensors {
		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
291
		rso.Seek(padded, io.SeekCurrent)
292
293
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
294
295
296
	return nil
}

297
298
299
300
301
302
303
304
305
306
func (llm *ggufModel) NumLayers() int64 {
	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

	v := value.(uint32)
	return int64(v)
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
307
func (llm ggufModel) readU8(r io.Reader) uint8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
308
	var u8 uint8
Michael Yang's avatar
ggufv3  
Michael Yang committed
309
	binary.Read(r, llm.bo, &u8)
Bruce MacDonald's avatar
Bruce MacDonald committed
310
311
312
	return u8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
313
func (llm ggufModel) readI8(r io.Reader) int8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
314
	var i8 int8
Michael Yang's avatar
ggufv3  
Michael Yang committed
315
	binary.Read(r, llm.bo, &i8)
Bruce MacDonald's avatar
Bruce MacDonald committed
316
317
318
	return i8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
319
func (llm ggufModel) readU16(r io.Reader) uint16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
320
	var u16 uint16
Michael Yang's avatar
ggufv3  
Michael Yang committed
321
	binary.Read(r, llm.bo, &u16)
Bruce MacDonald's avatar
Bruce MacDonald committed
322
323
324
	return u16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
325
func (llm ggufModel) readI16(r io.Reader) int16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
326
	var i16 int16
Michael Yang's avatar
ggufv3  
Michael Yang committed
327
	binary.Read(r, llm.bo, &i16)
Bruce MacDonald's avatar
Bruce MacDonald committed
328
329
330
	return i16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
331
func (llm ggufModel) readU32(r io.Reader) uint32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
332
	var u32 uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
333
	binary.Read(r, llm.bo, &u32)
Bruce MacDonald's avatar
Bruce MacDonald committed
334
335
336
	return u32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
337
func (llm ggufModel) readI32(r io.Reader) int32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
338
	var i32 int32
Michael Yang's avatar
ggufv3  
Michael Yang committed
339
	binary.Read(r, llm.bo, &i32)
Bruce MacDonald's avatar
Bruce MacDonald committed
340
341
342
	return i32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
343
func (llm ggufModel) readU64(r io.Reader) uint64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
344
	var u64 uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
345
	binary.Read(r, llm.bo, &u64)
Bruce MacDonald's avatar
Bruce MacDonald committed
346
347
348
	return u64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
349
func (llm ggufModel) readI64(r io.Reader) int64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
350
	var i64 int64
Michael Yang's avatar
ggufv3  
Michael Yang committed
351
	binary.Read(r, llm.bo, &i64)
Bruce MacDonald's avatar
Bruce MacDonald committed
352
353
354
	return i64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
355
func (llm ggufModel) readF32(r io.Reader) float32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
356
	var f32 float32
Michael Yang's avatar
ggufv3  
Michael Yang committed
357
	binary.Read(r, llm.bo, &f32)
Bruce MacDonald's avatar
Bruce MacDonald committed
358
359
360
	return f32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
361
func (llm ggufModel) readF64(r io.Reader) float64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
362
	var f64 float64
Michael Yang's avatar
ggufv3  
Michael Yang committed
363
	binary.Read(r, llm.bo, &f64)
Bruce MacDonald's avatar
Bruce MacDonald committed
364
365
366
	return f64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
367
func (llm ggufModel) readBool(r io.Reader) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
368
	var b bool
Michael Yang's avatar
ggufv3  
Michael Yang committed
369
	binary.Read(r, llm.bo, &b)
Bruce MacDonald's avatar
Bruce MacDonald committed
370
371
372
	return b
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
373
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
Bruce MacDonald's avatar
Bruce MacDonald committed
374
	var nameLength uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
375
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
376
377
378
379
380
381
382
383
384
385
386
387
388

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

func (llm ggufModel) readString(r io.Reader) (string, error) {
389
390
391
392
	if llm.Version == 1 {
		return llm.readStringV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
393
	var nameLength uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
394
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	return b.String(), nil
}

func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
	atype := llm.readU32(r)
	n := llm.readU32(r)

	for i := 0; uint32(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
413
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readStringV1(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}

func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
442
443
444
445
	if llm.Version == 1 {
		return llm.readArrayV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
446
447
448
449
450
451
452
453
	atype := llm.readU32(r)
	n := llm.readU64(r)

	for i := 0; uint64(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
454
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeUint64:
			arr = append(arr, llm.readU64(r))
		case ggufTypeInt64:
			arr = append(arr, llm.readI64(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeFloat64:
			arr = append(arr, llm.readF64(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readString(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}