tensor.go 5.84 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
package gguf

import (
	"log/slog"
	"strings"
)

type TensorInfo struct {
	Name   string
	Offset uint64
	Shape  []uint64
	Type   TensorType
}

func (ti TensorInfo) Valid() bool {
	return ti.Name != "" && ti.NumBytes() > 0
}

func (ti TensorInfo) NumValues() int64 {
	var numItems int64 = 1
	for _, dim := range ti.Shape {
		numItems *= int64(dim)
	}
	return numItems
}

// NumBytes returns the number of bytes in the tensor.
func (ti TensorInfo) NumBytes() int64 {
	return int64(float64(ti.NumValues()) * ti.Type.NumBytes())
}

func (ti TensorInfo) LogValue() slog.Value {
	return slog.GroupValue(
		slog.String("name", ti.Name),
		slog.Int64("offset", int64(ti.Offset)),
		slog.Any("shape", ti.Shape),
		slog.Int64("num_values", ti.NumValues()),
		slog.Int64("num_bytes", ti.NumBytes()),
		slog.Any("type", ti.Type),
	)
}

type TensorType uint32

const (
	TensorTypeF32 TensorType = iota
	TensorTypeF16
	TensorTypeQ4_0
	TensorTypeQ4_1

	// unexported // unused in gguf
	tensorTypeQ4_2
	tensorTypeQ4_3

	TensorTypeQ5_0
	TensorTypeQ5_1
	TensorTypeQ8_0
	TensorTypeQ8_1
	TensorTypeQ2_K
	TensorTypeQ3_K
	TensorTypeQ4_K
	TensorTypeQ5_K
	TensorTypeQ6_K
	TensorTypeQ8_K

	// unexported // unquantizable by ollama
	tensorTypeIQ2_XXS
	tensorTypeIQ2_XS
	tensorTypeIQ3_XXS
	tensorTypeIQ1_S
	tensorTypeIQ4_NL
	tensorTypeIQ3_S
	tensorTypeIQ2_S
	tensorTypeIQ4_XS

	TensorTypeI8
	TensorTypeI16
	TensorTypeI32
	TensorTypeI64
	TensorTypeF64

	// unexported // unquantizable by ollama
	tensorTypeIQ1_M

	TensorTypeBF16

	// unexported // unused in gguf
	tensorTypeQ4_0_4_4
	tensorTypeQ4_0_4_8
	tensorTypeQ4_0_8_8

	// unexported // unquantizable by ollama
	tensorTypeTQ1_0
	tensorTypeTQ2_0

	// unexported // unused in gguf
	tensorTypeIQ4_NL_4_4
	tensorTypeIQ4_NL_4_8
	tensorTypeIQ4_NL_8_8
)

func (tt TensorType) NumBytes() float64 {
	return float64(tt.typeSize()) / float64(tt.blockSize())
}

func (tt TensorType) typeSize() int64 {
	switch tt {
	case TensorTypeF32:
		return 4
	case TensorTypeF16:
		return 2
	case TensorTypeQ4_0:
		return 2 + tt.blockSize()/2
	case TensorTypeQ4_1:
		return 2 + 2 + tt.blockSize()/2
	case TensorTypeQ5_0:
		return 2 + 4 + tt.blockSize()/2
	case TensorTypeQ5_1:
		return 2 + 2 + 4 + tt.blockSize()/2
	case TensorTypeQ8_0:
		return 2 + tt.blockSize()
	case TensorTypeQ8_1:
		return 2 + 2 + tt.blockSize()
	case TensorTypeQ2_K:
		return tt.blockSize()/16 + tt.blockSize()/4 + 2 + 2
	case TensorTypeQ3_K:
		return tt.blockSize()/8 + tt.blockSize()/4 + 12 + 2
	case TensorTypeQ4_K:
		return 2 + 2 + 12 + tt.blockSize()/2
	case TensorTypeQ5_K:
		return 2 + 2 + 12 + tt.blockSize()/8 + tt.blockSize()/2
	case TensorTypeQ6_K:
		return tt.blockSize()/2 + tt.blockSize()/4 + tt.blockSize()/16 + 2
	case TensorTypeQ8_K:
		return 4 + tt.blockSize() + 2*tt.blockSize()/16
	case tensorTypeIQ2_XXS:
		return 2 + 2*tt.blockSize()/8
	case tensorTypeIQ2_XS:
		return 2 + 2*tt.blockSize()/8 + tt.blockSize()/32
	case tensorTypeIQ3_XXS:
		return 2 + tt.blockSize()/4 + tt.blockSize()/8
	case tensorTypeIQ1_S:
		return 2 + tt.blockSize()/8 + tt.blockSize()/16
	case tensorTypeIQ4_NL:
		return 2 + tt.blockSize()/2
	case tensorTypeIQ3_S:
		return 2 + tt.blockSize()/4 + tt.blockSize()/8 + tt.blockSize()/32 + 4
	case tensorTypeIQ2_S:
		return 2 + tt.blockSize()/4 + tt.blockSize()/16
	case tensorTypeIQ4_XS:
		return 2 + 2 + tt.blockSize()/2 + tt.blockSize()/64
	case TensorTypeI8:
		return 1
	case TensorTypeI16:
		return 2
	case TensorTypeI32:
		return 4
	case TensorTypeI64:
		return 8
	case TensorTypeF64:
		return 8
	case tensorTypeIQ1_M:
		return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32
	case TensorTypeBF16:
		return 2
	default:
		return 0
	}
}

func (tt TensorType) blockSize() int64 {
	switch tt {
	case TensorTypeF32,
		TensorTypeF16,
		TensorTypeI8,
		TensorTypeI16,
		TensorTypeI32,
		TensorTypeI64,
		TensorTypeF64,
		TensorTypeBF16:
		return 1
	case TensorTypeQ4_0,
		TensorTypeQ4_1,
		TensorTypeQ5_0,
		TensorTypeQ5_1,
		TensorTypeQ8_0,
		TensorTypeQ8_1,
		tensorTypeIQ4_NL:
		return 32
	default:
		return 256
	}
}

func (tt TensorType) String() string {
	switch tt {
	case TensorTypeF32:
		return "f32"
	case TensorTypeF16:
		return "f16"
	case TensorTypeQ4_0:
		return "q4_0"
	case TensorTypeQ4_1:
		return "q4_1"
	case tensorTypeQ4_2:
		return "q4_2"
	case tensorTypeQ4_3:
		return "q4_3"
	case TensorTypeQ5_0:
		return "q5_0"
	case TensorTypeQ5_1:
		return "q5_1"
	case TensorTypeQ8_0:
		return "q8_0"
	case TensorTypeQ8_1:
		return "q8_1"
	case TensorTypeQ2_K:
		return "q2_k"
	case TensorTypeQ3_K:
		return "q3_k"
	case TensorTypeQ4_K:
		return "q4_k"
	case TensorTypeQ5_K:
		return "q5_k"
	case TensorTypeQ6_K:
		return "q6_k"
	case TensorTypeQ8_K:
		return "q8_k"
	case tensorTypeIQ2_XXS:
		return "iq2_xxs"
	case tensorTypeIQ2_XS:
		return "iq2_xs"
	case tensorTypeIQ3_XXS:
		return "iq3_xxs"
	case tensorTypeIQ1_S:
		return "iq1_s"
	case tensorTypeIQ4_NL:
		return "iq4_nl"
	case tensorTypeIQ3_S:
		return "iq3_s"
	case tensorTypeIQ2_S:
		return "iq2_s"
	case tensorTypeIQ4_XS:
		return "iq4_xs"
	case TensorTypeI8:
		return "i8"
	case TensorTypeI16:
		return "i16"
	case TensorTypeI32:
		return "i32"
	case TensorTypeI64:
		return "i64"
	case TensorTypeF64:
		return "f64"
	case tensorTypeIQ1_M:
		return "iq1_m"
	case TensorTypeBF16:
		return "bf16"
	case tensorTypeQ4_0_4_4:
		return "q4_0_4_4"
	case tensorTypeQ4_0_4_8:
		return "q4_0_4_8"
	case tensorTypeQ4_0_8_8:
		return "q4_0_8_8"
	case tensorTypeTQ1_0:
		return "tq1_0"
	case tensorTypeTQ2_0:
		return "tq2_0"
	case tensorTypeIQ4_NL_4_4:
		return "iq4_nl_4_4"
	case tensorTypeIQ4_NL_4_8:
		return "iq4_nl_4_8"
	case tensorTypeIQ4_NL_8_8:
		return "iq4_nl_8_8"
	default:
		return "unknown"
	}
}

func (tt TensorType) LogValue() slog.Value {
	return slog.GroupValue(
		slog.Uint64("value", uint64(tt)),
		slog.String("name", strings.ToUpper(tt.String())),
		slog.Int64("size", tt.typeSize()),
		slog.Int64("block_size", tt.blockSize()),
		slog.Float64("num_bytes", tt.NumBytes()),
	)
}