gguf.go 9.25 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
9

	"github.com/jmorganca/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
)

type containerGGUF struct {
Michael Yang's avatar
ggufv3  
Michael Yang committed
13
14
	bo binary.ByteOrder

Bruce MacDonald's avatar
Bruce MacDonald committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
}

func (c *containerGGUF) Name() string {
	return "gguf"
}

Michael Yang's avatar
Michael Yang committed
32
33
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
	binary.Read(rso, c.bo, &c.Version)
Bruce MacDonald's avatar
Bruce MacDonald committed
34
35
36

	switch c.Version {
	case 1:
Michael Yang's avatar
Michael Yang committed
37
		binary.Read(rso, c.bo, &c.V1)
Bruce MacDonald's avatar
Bruce MacDonald committed
38
	default:
Michael Yang's avatar
Michael Yang committed
39
		binary.Read(rso, c.bo, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
	}

	model := newGGUFModel(c)
Michael Yang's avatar
Michael Yang committed
43
	if err := model.Decode(rso); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
		return nil, err
	}

	return model, nil
}

const (
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
)

type kv map[string]any

68
69
70
71
72
73
74
75
76
77
type tensor struct {
	name   string
	kind   uint32
	offset uint64
	size   uint64

	// shape is the number of elements in each dimension
	shape [4]uint64
}

Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
type ggufModel struct {
	*containerGGUF
80

Bruce MacDonald's avatar
Bruce MacDonald committed
81
	kv
82
83
84
	tensors []tensor

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
88
89
90
91
92
93
}

func newGGUFModel(container *containerGGUF) *ggufModel {
	return &ggufModel{
		containerGGUF: container,
		kv:            make(kv),
	}
}

94
95
96
97
98
99
100
101
func (llm *ggufModel) NumTensor() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumTensor)
	}

	return llm.V2.NumTensor
}

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
func (llm *ggufModel) NumKV() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumKV)
	}

	return llm.V2.NumKV
}

Michael Yang's avatar
Michael Yang committed
110
func (llm *ggufModel) ModelFamily() string {
Michael Yang's avatar
Michael Yang committed
111
	if t, ok := llm.kv["general.architecture"].(string); ok {
Michael Yang's avatar
Michael Yang committed
112
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
	}

Michael Yang's avatar
Michael Yang committed
115
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
}

Michael Yang's avatar
Michael Yang committed
118
func (llm *ggufModel) ModelType() string {
119
120
121
122
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
123
	switch llm.ModelFamily() {
Michael Yang's avatar
Michael Yang committed
124
125
	case "llama":
		if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
126
127
128
129
130
131
			heads, headsOK := llm.kv["llama.head_count"].(uint32)
			headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
			if headsOK && headsKVsOK && heads/headKVs == 8 {
				return "70B"
			}

Michael Yang's avatar
Michael Yang committed
132
133
134
135
136
			return llamaModelType(blocks)
		}
	case "falcon":
		if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
			return falconModelType(blocks)
Bruce MacDonald's avatar
Bruce MacDonald committed
137
		}
Michael Yang's avatar
Michael Yang committed
138
139
140
141
	case "starcoder":
		if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
			return starCoderModelType(blocks)
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
142
143
	}

Michael Yang's avatar
Michael Yang committed
144
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
145
146
}

Michael Yang's avatar
Michael Yang committed
147
func (llm *ggufModel) FileType() string {
Michael Yang's avatar
Michael Yang committed
148
	if t, ok := llm.kv["general.file_type"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
149
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
150
151
	}

Michael Yang's avatar
Michael Yang committed
152
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
153
154
}

Michael Yang's avatar
Michael Yang committed
155
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
156
	// decode key-values
Bruce MacDonald's avatar
Bruce MacDonald committed
157
	for i := 0; uint64(i) < llm.NumKV(); i++ {
Michael Yang's avatar
Michael Yang committed
158
		k, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
159
160
161
162
		if err != nil {
			return err
		}

Michael Yang's avatar
Michael Yang committed
163
		vtype := llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
164
165
166
167

		var v any
		switch vtype {
		case ggufTypeUint8:
Michael Yang's avatar
Michael Yang committed
168
			v = llm.readU8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
169
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
170
			v = llm.readI8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
171
		case ggufTypeUint16:
Michael Yang's avatar
Michael Yang committed
172
			v = llm.readU16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
173
		case ggufTypeInt16:
Michael Yang's avatar
Michael Yang committed
174
			v = llm.readI16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
175
		case ggufTypeUint32:
Michael Yang's avatar
Michael Yang committed
176
			v = llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
177
		case ggufTypeInt32:
Michael Yang's avatar
Michael Yang committed
178
			v = llm.readI32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
179
		case ggufTypeUint64:
Michael Yang's avatar
Michael Yang committed
180
			v = llm.readU64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
181
		case ggufTypeInt64:
Michael Yang's avatar
Michael Yang committed
182
			v = llm.readI64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
183
		case ggufTypeFloat32:
Michael Yang's avatar
Michael Yang committed
184
			v = llm.readF32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
185
		case ggufTypeFloat64:
Michael Yang's avatar
Michael Yang committed
186
			v = llm.readF64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
187
		case ggufTypeBool:
Michael Yang's avatar
Michael Yang committed
188
			v = llm.readBool(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
189
		case ggufTypeString:
Michael Yang's avatar
Michael Yang committed
190
			s, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
191
192
193
194
195
196
			if err != nil {
				return err
			}

			v = s
		case ggufTypeArray:
Michael Yang's avatar
Michael Yang committed
197
			a, err := llm.readArray(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
198
199
200
201
202
203
204
205
206
207
208
209
			if err != nil {
				return err
			}

			v = a
		default:
			return fmt.Errorf("invalid type: %d", vtype)
		}

		llm.kv[k] = v
	}

210
211
	// decode tensors
	for i := 0; uint64(i) < llm.NumTensor(); i++ {
Michael Yang's avatar
Michael Yang committed
212
		name, err := llm.readString(rso)
213
		if err != nil {
214
215
216
			return err
		}

Michael Yang's avatar
Michael Yang committed
217
		// dims is the number of dimensions in the tensor
Michael Yang's avatar
Michael Yang committed
218
		dims := llm.readU32(rso)
219
220
221

		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
Michael Yang's avatar
Michael Yang committed
222
			shape[i] = llm.readU64(rso)
223
224
		}

Michael Yang's avatar
Michael Yang committed
225
226
		kind := llm.readU32(rso)
		offset := llm.readU64(rso)
227
228
229
230
231
232
233
234
235
236

		var blockSize uint64
		switch {
		case kind < 2:
			blockSize = 1
		case kind < 10:
			blockSize = 32
		default:
			blockSize = 256
		}
237

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
		var typeSize uint64
		switch kind {
		case 0: // FP32
			typeSize = 4
		case 1: // FP16
			typeSize = 2
		case 2: // Q4_0
			typeSize = 2 + blockSize/2
		case 3: // Q4_1
			typeSize = 2 + 2 + blockSize/2
		case 6: // Q5_0
			typeSize = 2 + 4 + blockSize/2
		case 7: // Q5_1
			typeSize = 2 + 2 + 4 + blockSize/2
		case 8: // Q8_0
			typeSize = 2 + blockSize
		case 9: // Q8_1
			typeSize = 4 + 4 + blockSize
		case 10: // Q2_K
			typeSize = blockSize/16 + blockSize/4 + 2 + 2
		case 11: // Q3_K
			typeSize = blockSize/8 + blockSize/4 + 12 + 2
		case 12: // Q4_K
			typeSize = 2 + 2 + 12 + blockSize/2
		case 13: // Q5_K
			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
		case 14: // Q6_K
			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
266
267
		}

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
		parameters := shape[0] * shape[1] * shape[2] * shape[3]
		size := parameters * typeSize / blockSize

		llm.tensors = append(llm.tensors, tensor{
			name:   name,
			kind:   kind,
			offset: offset,
			size:   size,
			shape:  shape,
		})

		llm.parameters += parameters
	}

	alignment, ok := llm.kv["general.alignment"].(uint32)
	if !ok {
		alignment = 32
	}
286

Michael Yang's avatar
Michael Yang committed
287
	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
288
289
	for _, tensor := range llm.tensors {
		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
290
		rso.Seek(padded, io.SeekCurrent)
291
292
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
293
294
295
	return nil
}

296
297
298
299
300
301
302
303
304
305
func (llm *ggufModel) NumLayers() int64 {
	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

	v := value.(uint32)
	return int64(v)
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
306
func (llm ggufModel) readU8(r io.Reader) uint8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
307
	var u8 uint8
Michael Yang's avatar
ggufv3  
Michael Yang committed
308
	binary.Read(r, llm.bo, &u8)
Bruce MacDonald's avatar
Bruce MacDonald committed
309
310
311
	return u8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
312
func (llm ggufModel) readI8(r io.Reader) int8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
313
	var i8 int8
Michael Yang's avatar
ggufv3  
Michael Yang committed
314
	binary.Read(r, llm.bo, &i8)
Bruce MacDonald's avatar
Bruce MacDonald committed
315
316
317
	return i8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
318
func (llm ggufModel) readU16(r io.Reader) uint16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
319
	var u16 uint16
Michael Yang's avatar
ggufv3  
Michael Yang committed
320
	binary.Read(r, llm.bo, &u16)
Bruce MacDonald's avatar
Bruce MacDonald committed
321
322
323
	return u16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
324
func (llm ggufModel) readI16(r io.Reader) int16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
325
	var i16 int16
Michael Yang's avatar
ggufv3  
Michael Yang committed
326
	binary.Read(r, llm.bo, &i16)
Bruce MacDonald's avatar
Bruce MacDonald committed
327
328
329
	return i16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
330
func (llm ggufModel) readU32(r io.Reader) uint32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
331
	var u32 uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
332
	binary.Read(r, llm.bo, &u32)
Bruce MacDonald's avatar
Bruce MacDonald committed
333
334
335
	return u32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
336
func (llm ggufModel) readI32(r io.Reader) int32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
337
	var i32 int32
Michael Yang's avatar
ggufv3  
Michael Yang committed
338
	binary.Read(r, llm.bo, &i32)
Bruce MacDonald's avatar
Bruce MacDonald committed
339
340
341
	return i32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
342
func (llm ggufModel) readU64(r io.Reader) uint64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
343
	var u64 uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
344
	binary.Read(r, llm.bo, &u64)
Bruce MacDonald's avatar
Bruce MacDonald committed
345
346
347
	return u64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
348
func (llm ggufModel) readI64(r io.Reader) int64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
349
	var i64 int64
Michael Yang's avatar
ggufv3  
Michael Yang committed
350
	binary.Read(r, llm.bo, &i64)
Bruce MacDonald's avatar
Bruce MacDonald committed
351
352
353
	return i64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
354
func (llm ggufModel) readF32(r io.Reader) float32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
355
	var f32 float32
Michael Yang's avatar
ggufv3  
Michael Yang committed
356
	binary.Read(r, llm.bo, &f32)
Bruce MacDonald's avatar
Bruce MacDonald committed
357
358
359
	return f32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
360
func (llm ggufModel) readF64(r io.Reader) float64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
361
	var f64 float64
Michael Yang's avatar
ggufv3  
Michael Yang committed
362
	binary.Read(r, llm.bo, &f64)
Bruce MacDonald's avatar
Bruce MacDonald committed
363
364
365
	return f64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
366
func (llm ggufModel) readBool(r io.Reader) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
367
	var b bool
Michael Yang's avatar
ggufv3  
Michael Yang committed
368
	binary.Read(r, llm.bo, &b)
Bruce MacDonald's avatar
Bruce MacDonald committed
369
370
371
	return b
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
372
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
Bruce MacDonald's avatar
Bruce MacDonald committed
373
	var nameLength uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
374
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
375
376
377
378
379
380
381
382
383
384
385
386
387

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

func (llm ggufModel) readString(r io.Reader) (string, error) {
388
389
390
391
	if llm.Version == 1 {
		return llm.readStringV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
392
	var nameLength uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
393
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	return b.String(), nil
}

func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
	atype := llm.readU32(r)
	n := llm.readU32(r)

	for i := 0; uint32(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
412
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readStringV1(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}

func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
441
442
443
444
	if llm.Version == 1 {
		return llm.readArrayV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
445
446
447
448
449
450
451
452
	atype := llm.readU32(r)
	n := llm.readU64(r)

	for i := 0; uint64(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
453
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeUint64:
			arr = append(arr, llm.readU64(r))
		case ggufTypeInt64:
			arr = append(arr, llm.readI64(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeFloat64:
			arr = append(arr, llm.readF64(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readString(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}