Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
2cb0fa7d
Commit
2cb0fa7d
authored
Nov 24, 2023
by
Michael Yang
Browse files
split from into one or more models
parent
7232f1fa
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
177 additions
and
69 deletions
+177
-69
llm/ggml.go
llm/ggml.go
+42
-23
llm/gguf.go
llm/gguf.go
+106
-31
server/images.go
server/images.go
+29
-15
No files found.
llm/ggml.go
View file @
2cb0fa7d
...
...
@@ -7,9 +7,10 @@ import (
)
type
GGML
struct
{
magic
uint32
container
model
Size
int64
}
const
(
...
...
@@ -82,7 +83,7 @@ type model interface {
type
container
interface
{
Name
()
string
Decode
(
io
.
Reader
)
(
model
,
error
)
Decode
(
*
readOffset
)
(
model
,
error
)
}
type
containerGGML
struct
{}
...
...
@@ -91,7 +92,7 @@ func (c *containerGGML) Name() string {
return
"ggml"
}
func
(
c
*
containerGGML
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGML
)
Decode
(
r
o
*
readOffset
)
(
model
,
error
)
{
return
nil
,
nil
}
...
...
@@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string {
return
"ggmf"
}
func
(
c
*
containerGGMF
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGMF
)
Decode
(
r
o
*
readOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
:
...
...
@@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string {
return
"ggjt"
}
func
(
c
*
containerGGJT
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGJT
)
Decode
(
r
o
*
readOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
,
2
,
3
:
...
...
@@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
// different model types may have different layouts for hyperparameters
var
llama
llamaModel
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
llama
.
hyperparameters
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
llama
.
hyperparameters
)
return
&
llama
,
nil
}
...
...
@@ -151,9 +152,9 @@ func (c *containerLORA) Name() string {
return
"ggla"
}
func
(
c
*
containerLORA
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerLORA
)
Decode
(
r
o
*
readOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
:
...
...
@@ -180,33 +181,51 @@ const (
)
func
DecodeGGML
(
r
io
.
Reader
)
(
*
GGML
,
error
)
{
var
ggml
GGML
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
ggml
.
magic
)
ro
:=
readOffset
{
Reader
:
r
}
switch
ggml
.
magic
{
var
magic
uint32
if
err
:=
binary
.
Read
(
&
ro
,
binary
.
LittleEndian
,
&
magic
);
err
!=
nil
{
return
nil
,
err
}
var
c
container
switch
magic
{
case
FILE_MAGIC_GGML
:
ggml
.
container
=
&
containerGGML
{}
c
=
&
containerGGML
{}
case
FILE_MAGIC_GGMF
:
ggml
.
container
=
&
containerGGMF
{}
c
=
&
containerGGMF
{}
case
FILE_MAGIC_GGJT
:
ggml
.
container
=
&
containerGGJT
{}
c
=
&
containerGGJT
{}
case
FILE_MAGIC_GGLA
:
ggml
.
container
=
&
containerLORA
{}
c
=
&
containerLORA
{}
case
FILE_MAGIC_GGUF_LE
:
ggml
.
container
=
&
containerGGUF
{
bo
:
binary
.
LittleEndian
}
c
=
&
containerGGUF
{
bo
:
binary
.
LittleEndian
}
case
FILE_MAGIC_GGUF_BE
:
ggml
.
container
=
&
containerGGUF
{
bo
:
binary
.
BigEndian
}
c
=
&
containerGGUF
{
bo
:
binary
.
BigEndian
}
default
:
return
nil
,
errors
.
New
(
"invalid file magic"
)
}
model
,
err
:=
ggml
.
Decode
(
r
)
model
,
err
:=
c
.
Decode
(
&
ro
)
if
err
!=
nil
{
return
nil
,
err
}
ggml
.
model
=
model
// final model type
return
&
ggml
,
nil
return
&
GGML
{
container
:
c
,
model
:
model
,
Size
:
ro
.
offset
,
},
nil
}
type
readOffset
struct
{
io
.
Reader
offset
int64
}
func
(
r
*
readOffset
)
Read
(
p
[]
byte
)
(
int
,
error
)
{
n
,
err
:=
r
.
Reader
.
Read
(
p
)
r
.
offset
+=
int64
(
n
)
return
n
,
err
}
llm/gguf.go
View file @
2cb0fa7d
...
...
@@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor
uint64
NumKV
uint64
}
parameters
uint64
}
func
(
c
*
containerGGUF
)
Name
()
string
{
return
"gguf"
}
func
(
c
*
containerGGUF
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
Version
)
func
(
c
*
containerGGUF
)
Decode
(
r
o
*
readOffset
)
(
model
,
error
)
{
binary
.
Read
(
r
o
,
c
.
bo
,
&
c
.
Version
)
switch
c
.
Version
{
case
1
:
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
V1
)
binary
.
Read
(
r
o
,
c
.
bo
,
&
c
.
V1
)
default
:
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
V2
)
binary
.
Read
(
r
o
,
c
.
bo
,
&
c
.
V2
)
}
model
:=
newGGUFModel
(
c
)
if
err
:=
model
.
Decode
(
r
);
err
!=
nil
{
if
err
:=
model
.
Decode
(
r
o
);
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -67,9 +65,23 @@ const (
type
kv
map
[
string
]
any
type
tensor
struct
{
name
string
kind
uint32
offset
uint64
size
uint64
// shape is the number of elements in each dimension
shape
[
4
]
uint64
}
type
ggufModel
struct
{
*
containerGGUF
kv
tensors
[]
tensor
parameters
uint64
}
func
newGGUFModel
(
container
*
containerGGUF
)
*
ggufModel
{
...
...
@@ -142,49 +154,49 @@ func (llm *ggufModel) FileType() string {
return
"unknown"
}
func
(
llm
*
ggufModel
)
Decode
(
r
io
.
Reader
)
error
{
func
(
llm
*
ggufModel
)
Decode
(
r
o
*
readOffset
)
error
{
// decode key-values
for
i
:=
0
;
uint64
(
i
)
<
llm
.
NumKV
();
i
++
{
k
,
err
:=
llm
.
readString
(
r
)
k
,
err
:=
llm
.
readString
(
r
o
)
if
err
!=
nil
{
return
err
}
vtype
:=
llm
.
readU32
(
r
)
vtype
:=
llm
.
readU32
(
r
o
)
var
v
any
switch
vtype
{
case
ggufTypeUint8
:
v
=
llm
.
readU8
(
r
)
v
=
llm
.
readU8
(
r
o
)
case
ggufTypeInt8
:
v
=
llm
.
readI8
(
r
)
v
=
llm
.
readI8
(
r
o
)
case
ggufTypeUint16
:
v
=
llm
.
readU16
(
r
)
v
=
llm
.
readU16
(
r
o
)
case
ggufTypeInt16
:
v
=
llm
.
readI16
(
r
)
v
=
llm
.
readI16
(
r
o
)
case
ggufTypeUint32
:
v
=
llm
.
readU32
(
r
)
v
=
llm
.
readU32
(
r
o
)
case
ggufTypeInt32
:
v
=
llm
.
readI32
(
r
)
v
=
llm
.
readI32
(
r
o
)
case
ggufTypeUint64
:
v
=
llm
.
readU64
(
r
)
v
=
llm
.
readU64
(
r
o
)
case
ggufTypeInt64
:
v
=
llm
.
readI64
(
r
)
v
=
llm
.
readI64
(
r
o
)
case
ggufTypeFloat32
:
v
=
llm
.
readF32
(
r
)
v
=
llm
.
readF32
(
r
o
)
case
ggufTypeFloat64
:
v
=
llm
.
readF64
(
r
)
v
=
llm
.
readF64
(
r
o
)
case
ggufTypeBool
:
v
=
llm
.
readBool
(
r
)
v
=
llm
.
readBool
(
r
o
)
case
ggufTypeString
:
s
,
err
:=
llm
.
readString
(
r
)
s
,
err
:=
llm
.
readString
(
r
o
)
if
err
!=
nil
{
return
err
}
v
=
s
case
ggufTypeArray
:
a
,
err
:=
llm
.
readArray
(
r
)
a
,
err
:=
llm
.
readArray
(
r
o
)
if
err
!=
nil
{
return
err
}
...
...
@@ -199,21 +211,84 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors
for
i
:=
0
;
uint64
(
i
)
<
llm
.
NumTensor
();
i
++
{
if
_
,
err
:=
llm
.
readString
(
r
);
err
!=
nil
{
name
,
err
:=
llm
.
readString
(
ro
)
if
err
!=
nil
{
return
err
}
dim
ension
s
:=
llm
.
readU32
(
r
)
dims
:=
llm
.
readU32
(
r
o
)
var
elements
uint64
=
1
for
i
:=
0
;
uint32
(
i
)
<
dim
ension
s
;
i
++
{
elements
*
=
llm
.
readU64
(
r
)
shape
:=
[
4
]
uint64
{
1
,
1
,
1
,
1
}
for
i
:=
0
;
uint32
(
i
)
<
dims
;
i
++
{
shape
[
i
]
=
llm
.
readU64
(
r
o
)
}
llm
.
readU32
(
r
)
// type
llm
.
readU64
(
r
)
// offset
kind
:=
llm
.
readU32
(
ro
)
offset
:=
llm
.
readU64
(
ro
)
var
blockSize
uint64
switch
{
case
kind
<
2
:
blockSize
=
1
case
kind
<
10
:
blockSize
=
32
default
:
blockSize
=
256
}
var
typeSize
uint64
switch
kind
{
case
0
:
// FP32
typeSize
=
4
case
1
:
// FP16
typeSize
=
2
case
2
:
// Q4_0
typeSize
=
2
+
blockSize
/
2
case
3
:
// Q4_1
typeSize
=
2
+
2
+
blockSize
/
2
case
6
:
// Q5_0
typeSize
=
2
+
4
+
blockSize
/
2
case
7
:
// Q5_1
typeSize
=
2
+
2
+
4
+
blockSize
/
2
case
8
:
// Q8_0
typeSize
=
2
+
blockSize
case
9
:
// Q8_1
typeSize
=
4
+
4
+
blockSize
case
10
:
// Q2_K
typeSize
=
blockSize
/
16
+
blockSize
/
4
+
2
+
2
case
11
:
// Q3_K
typeSize
=
blockSize
/
8
+
blockSize
/
4
+
12
+
2
case
12
:
// Q4_K
typeSize
=
2
+
2
+
12
+
blockSize
/
2
case
13
:
// Q5_K
typeSize
=
2
+
2
+
12
+
blockSize
/
8
+
blockSize
/
2
case
14
:
// Q6_K
typeSize
=
blockSize
/
2
+
blockSize
/
4
+
blockSize
/
16
+
2
}
parameters
:=
shape
[
0
]
*
shape
[
1
]
*
shape
[
2
]
*
shape
[
3
]
size
:=
parameters
*
typeSize
/
blockSize
llm
.
tensors
=
append
(
llm
.
tensors
,
tensor
{
name
:
name
,
kind
:
kind
,
offset
:
offset
,
size
:
size
,
shape
:
shape
,
})
llm
.
parameters
+=
parameters
}
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
if
!
ok
{
alignment
=
32
}
llm
.
parameters
+=
elements
io
.
CopyN
(
io
.
Discard
,
ro
,
int64
(
alignment
)
-
ro
.
offset
%
int64
(
alignment
))
for
_
,
tensor
:=
range
llm
.
tensors
{
padded
:=
(
int64
(
tensor
.
size
)
+
int64
(
alignment
)
-
1
)
&
^
(
int64
(
alignment
)
-
1
)
io
.
CopyN
(
io
.
Discard
,
ro
,
padded
)
}
return
nil
...
...
server/images.go
View file @
2cb0fa7d
...
...
@@ -388,24 +388,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
}
defer
bin
.
Close
()
fn
(
api
.
ProgressResponse
{
Status
:
"creating model layer"
})
ggml
,
err
:=
llm
.
DecodeGGML
(
bin
)
if
err
!=
nil
{
return
err
}
var
offset
int64
for
{
fn
(
api
.
ProgressResponse
{
Status
:
"creating model layer"
})
bin
.
Seek
(
offset
,
io
.
SeekStart
)
ggml
,
err
:=
llm
.
DecodeGGML
(
bin
)
if
errors
.
Is
(
err
,
io
.
EOF
)
{
break
}
else
if
err
!=
nil
{
return
err
}
config
.
ModelFormat
=
ggml
.
Name
()
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
config
.
FileType
=
ggml
.
FileType
()
config
.
ModelFormat
=
ggml
.
Name
()
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
config
.
FileType
=
ggml
.
FileType
()
bin
.
Seek
(
0
,
io
.
SeekStart
)
layer
,
err
:=
NewLayer
(
bin
,
mediatype
)
if
err
!=
nil
{
return
err
}
mediatype
:=
mediatype
if
ggml
.
ModelFamily
()
==
"clip"
{
mediatype
=
"application/vnd.ollama.image.projector"
}
layers
.
Add
(
layer
)
sr
:=
io
.
NewSectionReader
(
bin
,
offset
,
ggml
.
Size
)
layer
,
err
:=
NewLayer
(
sr
,
mediatype
)
if
err
!=
nil
{
return
err
}
layers
.
Add
(
layer
)
offset
+=
ggml
.
Size
}
case
"adapter"
:
if
strings
.
HasPrefix
(
c
.
Args
,
"@"
)
{
blobPath
,
err
:=
GetBlobsPath
(
strings
.
TrimPrefix
(
c
.
Args
,
"@"
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment