gguf.go 8.68 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
package llm

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
8
9

	"github.com/jmorganca/ollama/format"
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
)

type containerGGUF struct {
Michael Yang's avatar
ggufv3  
Michael Yang committed
13
14
	bo binary.ByteOrder

Bruce MacDonald's avatar
Bruce MacDonald committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	Version uint32

	V1 struct {
		NumTensor uint32
		NumKV     uint32
	}

	V2 struct {
		NumTensor uint64
		NumKV     uint64
	}
}

func (c *containerGGUF) Name() string {
	return "gguf"
}

Michael Yang's avatar
Michael Yang committed
32
33
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
	binary.Read(rso, c.bo, &c.Version)
Bruce MacDonald's avatar
Bruce MacDonald committed
34
35
36

	switch c.Version {
	case 1:
Michael Yang's avatar
Michael Yang committed
37
		binary.Read(rso, c.bo, &c.V1)
Bruce MacDonald's avatar
Bruce MacDonald committed
38
	default:
Michael Yang's avatar
Michael Yang committed
39
		binary.Read(rso, c.bo, &c.V2)
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
	}

	model := newGGUFModel(c)
Michael Yang's avatar
Michael Yang committed
43
	if err := model.Decode(rso); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
		return nil, err
	}

	return model, nil
}

const (
	ggufTypeUint8 uint32 = iota
	ggufTypeInt8
	ggufTypeUint16
	ggufTypeInt16
	ggufTypeUint32
	ggufTypeInt32
	ggufTypeFloat32
	ggufTypeBool
	ggufTypeString
	ggufTypeArray
	ggufTypeUint64
	ggufTypeInt64
	ggufTypeFloat64
)

type kv map[string]any

68
69
70
71
72
73
74
75
76
77
type tensor struct {
	name   string
	kind   uint32
	offset uint64
	size   uint64

	// shape is the number of elements in each dimension
	shape [4]uint64
}

Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
type ggufModel struct {
	*containerGGUF
80

Bruce MacDonald's avatar
Bruce MacDonald committed
81
	kv
82
83
84
	tensors []tensor

	parameters uint64
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
88
89
90
91
92
93
}

func newGGUFModel(container *containerGGUF) *ggufModel {
	return &ggufModel{
		containerGGUF: container,
		kv:            make(kv),
	}
}

94
95
96
97
98
99
100
101
func (llm *ggufModel) NumTensor() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumTensor)
	}

	return llm.V2.NumTensor
}

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
func (llm *ggufModel) NumKV() uint64 {
	if llm.Version == 1 {
		return uint64(llm.V1.NumKV)
	}

	return llm.V2.NumKV
}

Michael Yang's avatar
Michael Yang committed
110
func (llm *ggufModel) ModelFamily() string {
Michael Yang's avatar
Michael Yang committed
111
	if t, ok := llm.kv["general.architecture"].(string); ok {
Michael Yang's avatar
Michael Yang committed
112
		return t
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
	}

Michael Yang's avatar
Michael Yang committed
115
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
}

Michael Yang's avatar
Michael Yang committed
118
func (llm *ggufModel) ModelType() string {
119
120
121
122
	if llm.parameters > 0 {
		return format.HumanNumber(llm.parameters)
	}

Michael Yang's avatar
Michael Yang committed
123
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
124
125
}

Michael Yang's avatar
Michael Yang committed
126
func (llm *ggufModel) FileType() string {
Michael Yang's avatar
Michael Yang committed
127
	if t, ok := llm.kv["general.file_type"].(uint32); ok {
Michael Yang's avatar
Michael Yang committed
128
		return fileType(t)
Bruce MacDonald's avatar
Bruce MacDonald committed
129
130
	}

Michael Yang's avatar
Michael Yang committed
131
	return "unknown"
Bruce MacDonald's avatar
Bruce MacDonald committed
132
133
}

Michael Yang's avatar
Michael Yang committed
134
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
135
	// decode key-values
Bruce MacDonald's avatar
Bruce MacDonald committed
136
	for i := 0; uint64(i) < llm.NumKV(); i++ {
Michael Yang's avatar
Michael Yang committed
137
		k, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
140
141
		if err != nil {
			return err
		}

Michael Yang's avatar
Michael Yang committed
142
		vtype := llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
143
144
145
146

		var v any
		switch vtype {
		case ggufTypeUint8:
Michael Yang's avatar
Michael Yang committed
147
			v = llm.readU8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
148
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
149
			v = llm.readI8(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
150
		case ggufTypeUint16:
Michael Yang's avatar
Michael Yang committed
151
			v = llm.readU16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
		case ggufTypeInt16:
Michael Yang's avatar
Michael Yang committed
153
			v = llm.readI16(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
154
		case ggufTypeUint32:
Michael Yang's avatar
Michael Yang committed
155
			v = llm.readU32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
156
		case ggufTypeInt32:
Michael Yang's avatar
Michael Yang committed
157
			v = llm.readI32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
158
		case ggufTypeUint64:
Michael Yang's avatar
Michael Yang committed
159
			v = llm.readU64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
160
		case ggufTypeInt64:
Michael Yang's avatar
Michael Yang committed
161
			v = llm.readI64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
162
		case ggufTypeFloat32:
Michael Yang's avatar
Michael Yang committed
163
			v = llm.readF32(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
164
		case ggufTypeFloat64:
Michael Yang's avatar
Michael Yang committed
165
			v = llm.readF64(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
166
		case ggufTypeBool:
Michael Yang's avatar
Michael Yang committed
167
			v = llm.readBool(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
168
		case ggufTypeString:
Michael Yang's avatar
Michael Yang committed
169
			s, err := llm.readString(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
170
171
172
173
174
175
			if err != nil {
				return err
			}

			v = s
		case ggufTypeArray:
Michael Yang's avatar
Michael Yang committed
176
			a, err := llm.readArray(rso)
Bruce MacDonald's avatar
Bruce MacDonald committed
177
178
179
180
181
182
183
184
185
186
187
188
			if err != nil {
				return err
			}

			v = a
		default:
			return fmt.Errorf("invalid type: %d", vtype)
		}

		llm.kv[k] = v
	}

189
190
	// decode tensors
	for i := 0; uint64(i) < llm.NumTensor(); i++ {
Michael Yang's avatar
Michael Yang committed
191
		name, err := llm.readString(rso)
192
		if err != nil {
193
194
195
			return err
		}

Michael Yang's avatar
Michael Yang committed
196
		// dims is the number of dimensions in the tensor
Michael Yang's avatar
Michael Yang committed
197
		dims := llm.readU32(rso)
198
199
200

		shape := [4]uint64{1, 1, 1, 1}
		for i := 0; uint32(i) < dims; i++ {
Michael Yang's avatar
Michael Yang committed
201
			shape[i] = llm.readU64(rso)
202
203
		}

Michael Yang's avatar
Michael Yang committed
204
205
		kind := llm.readU32(rso)
		offset := llm.readU64(rso)
206
207
208
209
210
211
212
213
214
215

		var blockSize uint64
		switch {
		case kind < 2:
			blockSize = 1
		case kind < 10:
			blockSize = 32
		default:
			blockSize = 256
		}
216

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
		var typeSize uint64
		switch kind {
		case 0: // FP32
			typeSize = 4
		case 1: // FP16
			typeSize = 2
		case 2: // Q4_0
			typeSize = 2 + blockSize/2
		case 3: // Q4_1
			typeSize = 2 + 2 + blockSize/2
		case 6: // Q5_0
			typeSize = 2 + 4 + blockSize/2
		case 7: // Q5_1
			typeSize = 2 + 2 + 4 + blockSize/2
		case 8: // Q8_0
			typeSize = 2 + blockSize
		case 9: // Q8_1
			typeSize = 4 + 4 + blockSize
		case 10: // Q2_K
			typeSize = blockSize/16 + blockSize/4 + 2 + 2
		case 11: // Q3_K
			typeSize = blockSize/8 + blockSize/4 + 12 + 2
		case 12: // Q4_K
			typeSize = 2 + 2 + 12 + blockSize/2
		case 13: // Q5_K
			typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
		case 14: // Q6_K
			typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
245
246
		}

247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
		parameters := shape[0] * shape[1] * shape[2] * shape[3]
		size := parameters * typeSize / blockSize

		llm.tensors = append(llm.tensors, tensor{
			name:   name,
			kind:   kind,
			offset: offset,
			size:   size,
			shape:  shape,
		})

		llm.parameters += parameters
	}

	alignment, ok := llm.kv["general.alignment"].(uint32)
	if !ok {
		alignment = 32
	}
265

Michael Yang's avatar
Michael Yang committed
266
	rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
267
268
	for _, tensor := range llm.tensors {
		padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
Michael Yang's avatar
Michael Yang committed
269
		rso.Seek(padded, io.SeekCurrent)
270
271
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
274
	return nil
}

275
276
277
278
279
280
281
282
283
284
func (llm *ggufModel) NumLayers() int64 {
	value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
	if !exists {
		return 0
	}

	v := value.(uint32)
	return int64(v)
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
285
func (llm ggufModel) readU8(r io.Reader) uint8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
286
	var u8 uint8
Michael Yang's avatar
ggufv3  
Michael Yang committed
287
	binary.Read(r, llm.bo, &u8)
Bruce MacDonald's avatar
Bruce MacDonald committed
288
289
290
	return u8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
291
func (llm ggufModel) readI8(r io.Reader) int8 {
Bruce MacDonald's avatar
Bruce MacDonald committed
292
	var i8 int8
Michael Yang's avatar
ggufv3  
Michael Yang committed
293
	binary.Read(r, llm.bo, &i8)
Bruce MacDonald's avatar
Bruce MacDonald committed
294
295
296
	return i8
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
297
func (llm ggufModel) readU16(r io.Reader) uint16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
298
	var u16 uint16
Michael Yang's avatar
ggufv3  
Michael Yang committed
299
	binary.Read(r, llm.bo, &u16)
Bruce MacDonald's avatar
Bruce MacDonald committed
300
301
302
	return u16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
303
func (llm ggufModel) readI16(r io.Reader) int16 {
Bruce MacDonald's avatar
Bruce MacDonald committed
304
	var i16 int16
Michael Yang's avatar
ggufv3  
Michael Yang committed
305
	binary.Read(r, llm.bo, &i16)
Bruce MacDonald's avatar
Bruce MacDonald committed
306
307
308
	return i16
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
309
func (llm ggufModel) readU32(r io.Reader) uint32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
310
	var u32 uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
311
	binary.Read(r, llm.bo, &u32)
Bruce MacDonald's avatar
Bruce MacDonald committed
312
313
314
	return u32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
315
func (llm ggufModel) readI32(r io.Reader) int32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
316
	var i32 int32
Michael Yang's avatar
ggufv3  
Michael Yang committed
317
	binary.Read(r, llm.bo, &i32)
Bruce MacDonald's avatar
Bruce MacDonald committed
318
319
320
	return i32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
321
func (llm ggufModel) readU64(r io.Reader) uint64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
322
	var u64 uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
323
	binary.Read(r, llm.bo, &u64)
Bruce MacDonald's avatar
Bruce MacDonald committed
324
325
326
	return u64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
327
func (llm ggufModel) readI64(r io.Reader) int64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
328
	var i64 int64
Michael Yang's avatar
ggufv3  
Michael Yang committed
329
	binary.Read(r, llm.bo, &i64)
Bruce MacDonald's avatar
Bruce MacDonald committed
330
331
332
	return i64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
333
func (llm ggufModel) readF32(r io.Reader) float32 {
Bruce MacDonald's avatar
Bruce MacDonald committed
334
	var f32 float32
Michael Yang's avatar
ggufv3  
Michael Yang committed
335
	binary.Read(r, llm.bo, &f32)
Bruce MacDonald's avatar
Bruce MacDonald committed
336
337
338
	return f32
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
339
func (llm ggufModel) readF64(r io.Reader) float64 {
Bruce MacDonald's avatar
Bruce MacDonald committed
340
	var f64 float64
Michael Yang's avatar
ggufv3  
Michael Yang committed
341
	binary.Read(r, llm.bo, &f64)
Bruce MacDonald's avatar
Bruce MacDonald committed
342
343
344
	return f64
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
345
func (llm ggufModel) readBool(r io.Reader) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
346
	var b bool
Michael Yang's avatar
ggufv3  
Michael Yang committed
347
	binary.Read(r, llm.bo, &b)
Bruce MacDonald's avatar
Bruce MacDonald committed
348
349
350
	return b
}

Michael Yang's avatar
ggufv3  
Michael Yang committed
351
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
Bruce MacDonald's avatar
Bruce MacDonald committed
352
	var nameLength uint32
Michael Yang's avatar
ggufv3  
Michael Yang committed
353
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
354
355
356
357
358
359
360
361
362
363
364
365
366

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	// gguf v1 strings are null-terminated
	b.Truncate(b.Len() - 1)

	return b.String(), nil
}

func (llm ggufModel) readString(r io.Reader) (string, error) {
367
368
369
370
	if llm.Version == 1 {
		return llm.readStringV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
371
	var nameLength uint64
Michael Yang's avatar
ggufv3  
Michael Yang committed
372
	binary.Read(r, llm.bo, &nameLength)
Bruce MacDonald's avatar
Bruce MacDonald committed
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

	var b bytes.Buffer
	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
		return "", err
	}

	return b.String(), nil
}

func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
	atype := llm.readU32(r)
	n := llm.readU32(r)

	for i := 0; uint32(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
391
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readStringV1(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}

func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
420
421
422
423
	if llm.Version == 1 {
		return llm.readArrayV1(r)
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
424
425
426
427
428
429
430
431
	atype := llm.readU32(r)
	n := llm.readU64(r)

	for i := 0; uint64(i) < n; i++ {
		switch atype {
		case ggufTypeUint8:
			arr = append(arr, llm.readU8(r))
		case ggufTypeInt8:
Michael Yang's avatar
Michael Yang committed
432
			arr = append(arr, llm.readI8(r))
Bruce MacDonald's avatar
Bruce MacDonald committed
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
		case ggufTypeUint16:
			arr = append(arr, llm.readU16(r))
		case ggufTypeInt16:
			arr = append(arr, llm.readI16(r))
		case ggufTypeUint32:
			arr = append(arr, llm.readU32(r))
		case ggufTypeInt32:
			arr = append(arr, llm.readI32(r))
		case ggufTypeUint64:
			arr = append(arr, llm.readU64(r))
		case ggufTypeInt64:
			arr = append(arr, llm.readI64(r))
		case ggufTypeFloat32:
			arr = append(arr, llm.readF32(r))
		case ggufTypeFloat64:
			arr = append(arr, llm.readF64(r))
		case ggufTypeBool:
			arr = append(arr, llm.readBool(r))
		case ggufTypeString:
			s, err := llm.readString(r)
			if err != nil {
				return nil, err
			}

			arr = append(arr, s)
		default:
			return nil, fmt.Errorf("invalid array type: %d", atype)
		}
	}

	return
}