Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
9e1e295c
Unverified
Commit
9e1e295c
authored
Jan 25, 2024
by
Michael Yang
Committed by
GitHub
Jan 25, 2024
Browse files
Merge pull request #2175 from ollama/mxyng/refactor-tensor-read
refactor tensor read
parents
a643823f
cd22855e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
54 deletions
+61
-54
llm/gguf.go
llm/gguf.go
+61
-54
No files found.
llm/gguf.go
View file @
9e1e295c
...
@@ -69,12 +69,65 @@ type tensor struct {
...
@@ -69,12 +69,65 @@ type tensor struct {
name
string
name
string
kind
uint32
kind
uint32
offset
uint64
offset
uint64
size
uint64
// shape is the number of elements in each dimension
// shape is the number of elements in each dimension
shape
[
4
]
uint64
shape
[
4
]
uint64
}
}
func
(
t
tensor
)
blockSize
()
uint64
{
switch
{
case
t
.
kind
<
2
:
return
1
case
t
.
kind
<
10
:
return
32
default
:
return
256
}
}
func
(
t
tensor
)
typeSize
()
uint64
{
blockSize
:=
t
.
blockSize
()
switch
t
.
kind
{
case
0
:
// FP32
return
4
case
1
:
// FP16
return
2
case
2
:
// Q4_0
return
2
+
blockSize
/
2
case
3
:
// Q4_1
return
2
+
2
+
blockSize
/
2
case
6
:
// Q5_0
return
2
+
4
+
blockSize
/
2
case
7
:
// Q5_1
return
2
+
2
+
4
+
blockSize
/
2
case
8
:
// Q8_0
return
2
+
blockSize
case
9
:
// Q8_1
return
4
+
4
+
blockSize
case
10
:
// Q2_K
return
blockSize
/
16
+
blockSize
/
4
+
2
+
2
case
11
:
// Q3_K
return
blockSize
/
8
+
blockSize
/
4
+
12
+
2
case
12
:
// Q4_K
return
2
+
2
+
12
+
blockSize
/
2
case
13
:
// Q5_K
return
2
+
2
+
12
+
blockSize
/
8
+
blockSize
/
2
case
14
:
// Q6_K
return
blockSize
/
2
+
blockSize
/
4
+
blockSize
/
16
+
2
default
:
return
0
}
}
func
(
t
tensor
)
parameters
()
uint64
{
return
t
.
shape
[
0
]
*
t
.
shape
[
1
]
*
t
.
shape
[
2
]
*
t
.
shape
[
3
]
}
func
(
t
tensor
)
size
()
uint64
{
return
t
.
parameters
()
*
t
.
typeSize
()
/
t
.
blockSize
()
}
type
ggufModel
struct
{
type
ggufModel
struct
{
*
containerGGUF
*
containerGGUF
...
@@ -201,61 +254,15 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
...
@@ -201,61 +254,15 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
shape
[
i
]
=
llm
.
readU64
(
rso
)
shape
[
i
]
=
llm
.
readU64
(
rso
)
}
}
kind
:=
llm
.
readU32
(
rso
)
tensor
:=
tensor
{
offset
:=
llm
.
readU64
(
rso
)
var
blockSize
uint64
switch
{
case
kind
<
2
:
blockSize
=
1
case
kind
<
10
:
blockSize
=
32
default
:
blockSize
=
256
}
var
typeSize
uint64
switch
kind
{
case
0
:
// FP32
typeSize
=
4
case
1
:
// FP16
typeSize
=
2
case
2
:
// Q4_0
typeSize
=
2
+
blockSize
/
2
case
3
:
// Q4_1
typeSize
=
2
+
2
+
blockSize
/
2
case
6
:
// Q5_0
typeSize
=
2
+
4
+
blockSize
/
2
case
7
:
// Q5_1
typeSize
=
2
+
2
+
4
+
blockSize
/
2
case
8
:
// Q8_0
typeSize
=
2
+
blockSize
case
9
:
// Q8_1
typeSize
=
4
+
4
+
blockSize
case
10
:
// Q2_K
typeSize
=
blockSize
/
16
+
blockSize
/
4
+
2
+
2
case
11
:
// Q3_K
typeSize
=
blockSize
/
8
+
blockSize
/
4
+
12
+
2
case
12
:
// Q4_K
typeSize
=
2
+
2
+
12
+
blockSize
/
2
case
13
:
// Q5_K
typeSize
=
2
+
2
+
12
+
blockSize
/
8
+
blockSize
/
2
case
14
:
// Q6_K
typeSize
=
blockSize
/
2
+
blockSize
/
4
+
blockSize
/
16
+
2
}
parameters
:=
shape
[
0
]
*
shape
[
1
]
*
shape
[
2
]
*
shape
[
3
]
size
:=
parameters
*
typeSize
/
blockSize
llm
.
tensors
=
append
(
llm
.
tensors
,
tensor
{
name
:
name
,
name
:
name
,
kind
:
kind
,
kind
:
llm
.
readU32
(
rso
),
offset
:
offset
,
offset
:
llm
.
readU64
(
rso
),
size
:
size
,
shape
:
shape
,
shape
:
shape
,
}
)
}
llm
.
parameters
+=
parameters
llm
.
tensors
=
append
(
llm
.
tensors
,
tensor
)
llm
.
parameters
+=
tensor
.
parameters
()
}
}
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
...
@@ -265,7 +272,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
...
@@ -265,7 +272,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
rso
.
Seek
(
int64
(
alignment
)
-
rso
.
offset
%
int64
(
alignment
),
io
.
SeekCurrent
)
rso
.
Seek
(
int64
(
alignment
)
-
rso
.
offset
%
int64
(
alignment
),
io
.
SeekCurrent
)
for
_
,
tensor
:=
range
llm
.
tensors
{
for
_
,
tensor
:=
range
llm
.
tensors
{
padded
:=
(
int64
(
tensor
.
size
)
+
int64
(
alignment
)
-
1
)
&
^
(
int64
(
alignment
)
-
1
)
padded
:=
(
int64
(
tensor
.
size
()
)
+
int64
(
alignment
)
-
1
)
&
^
(
int64
(
alignment
)
-
1
)
rso
.
Seek
(
padded
,
io
.
SeekCurrent
)
rso
.
Seek
(
padded
,
io
.
SeekCurrent
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment