Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
409bb967
Unverified
Commit
409bb967
authored
Dec 05, 2023
by
Michael Yang
Committed by
GitHub
Dec 05, 2023
Browse files
Merge pull request #1308 from jmorganca/mxyng/split-from
split from into one or more models
parents
d3479c07
5a5dca13
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
233 additions
and
83 deletions
+233
-83
llm/ggml.go
llm/ggml.go
+53
-24
llm/gguf.go
llm/gguf.go
+109
-35
server/images.go
server/images.go
+71
-24
No files found.
llm/ggml.go
View file @
409bb967
...
...
@@ -7,9 +7,10 @@ import (
)
type
GGML
struct
{
magic
uint32
container
model
Size
int64
}
const
(
...
...
@@ -82,7 +83,7 @@ type model interface {
type
container
interface
{
Name
()
string
Decode
(
io
.
Reader
)
(
model
,
error
)
Decode
(
*
readSeekOffset
)
(
model
,
error
)
}
type
containerGGML
struct
{}
...
...
@@ -91,7 +92,7 @@ func (c *containerGGML) Name() string {
return
"ggml"
}
func
(
c
*
containerGGML
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGML
)
Decode
(
r
o
*
readSeekOffset
)
(
model
,
error
)
{
return
nil
,
nil
}
...
...
@@ -103,9 +104,9 @@ func (c *containerGGMF) Name() string {
return
"ggmf"
}
func
(
c
*
containerGGMF
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGMF
)
Decode
(
r
o
*
readSeekOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
:
...
...
@@ -125,9 +126,9 @@ func (c *containerGGJT) Name() string {
return
"ggjt"
}
func
(
c
*
containerGGJT
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerGGJT
)
Decode
(
r
o
*
readSeekOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
,
2
,
3
:
...
...
@@ -139,7 +140,7 @@ func (c *containerGGJT) Decode(r io.Reader) (model, error) {
// different model types may have different layouts for hyperparameters
var
llama
llamaModel
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
llama
.
hyperparameters
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
llama
.
hyperparameters
)
return
&
llama
,
nil
}
...
...
@@ -151,9 +152,9 @@ func (c *containerLORA) Name() string {
return
"ggla"
}
func
(
c
*
containerLORA
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
func
(
c
*
containerLORA
)
Decode
(
r
o
*
readSeekOffset
)
(
model
,
error
)
{
var
version
uint32
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
version
)
binary
.
Read
(
r
o
,
binary
.
LittleEndian
,
&
version
)
switch
version
{
case
1
:
...
...
@@ -179,34 +180,62 @@ const (
FILE_MAGIC_GGUF_BE
=
0x47475546
)
func
DecodeGGML
(
r
io
.
Reader
)
(
*
GGML
,
error
)
{
var
ggml
GGML
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
ggml
.
magic
)
func
DecodeGGML
(
r
io
.
ReadSeeker
)
(
*
GGML
,
error
)
{
ro
:=
readSeekOffset
{
ReadSeeker
:
r
}
var
magic
uint32
if
err
:=
binary
.
Read
(
&
ro
,
binary
.
LittleEndian
,
&
magic
);
err
!=
nil
{
return
nil
,
err
}
switch
ggml
.
magic
{
var
c
container
switch
magic
{
case
FILE_MAGIC_GGML
:
ggml
.
container
=
&
containerGGML
{}
c
=
&
containerGGML
{}
case
FILE_MAGIC_GGMF
:
ggml
.
container
=
&
containerGGMF
{}
c
=
&
containerGGMF
{}
case
FILE_MAGIC_GGJT
:
ggml
.
container
=
&
containerGGJT
{}
c
=
&
containerGGJT
{}
case
FILE_MAGIC_GGLA
:
ggml
.
container
=
&
containerLORA
{}
c
=
&
containerLORA
{}
case
FILE_MAGIC_GGUF_LE
:
ggml
.
container
=
&
containerGGUF
{
bo
:
binary
.
LittleEndian
}
c
=
&
containerGGUF
{
bo
:
binary
.
LittleEndian
}
case
FILE_MAGIC_GGUF_BE
:
ggml
.
container
=
&
containerGGUF
{
bo
:
binary
.
BigEndian
}
c
=
&
containerGGUF
{
bo
:
binary
.
BigEndian
}
default
:
return
nil
,
errors
.
New
(
"invalid file magic"
)
}
model
,
err
:=
ggml
.
Decode
(
r
)
model
,
err
:=
c
.
Decode
(
&
ro
)
if
err
!=
nil
{
return
nil
,
err
}
ggml
.
model
=
model
// final model type
return
&
ggml
,
nil
return
&
GGML
{
container
:
c
,
model
:
model
,
Size
:
ro
.
offset
,
},
nil
}
type
readSeekOffset
struct
{
io
.
ReadSeeker
offset
int64
}
func
(
rso
*
readSeekOffset
)
Seek
(
offset
int64
,
whence
int
)
(
int64
,
error
)
{
offset
,
err
:=
rso
.
ReadSeeker
.
Seek
(
offset
,
whence
)
if
err
!=
nil
{
return
0
,
err
}
rso
.
offset
=
offset
return
offset
,
nil
}
func
(
rso
*
readSeekOffset
)
Read
(
p
[]
byte
)
(
int
,
error
)
{
n
,
err
:=
rso
.
ReadSeeker
.
Read
(
p
)
rso
.
offset
+=
int64
(
n
)
return
n
,
err
}
llm/gguf.go
View file @
409bb967
...
...
@@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor
uint64
NumKV
uint64
}
parameters
uint64
}
func
(
c
*
containerGGUF
)
Name
()
string
{
return
"gguf"
}
func
(
c
*
containerGGUF
)
Decode
(
r
io
.
Reader
)
(
model
,
error
)
{
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
Version
)
func
(
c
*
containerGGUF
)
Decode
(
r
so
*
readSeekOffset
)
(
model
,
error
)
{
binary
.
Read
(
r
so
,
c
.
bo
,
&
c
.
Version
)
switch
c
.
Version
{
case
1
:
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
V1
)
binary
.
Read
(
r
so
,
c
.
bo
,
&
c
.
V1
)
default
:
binary
.
Read
(
r
,
c
.
bo
,
&
c
.
V2
)
binary
.
Read
(
r
so
,
c
.
bo
,
&
c
.
V2
)
}
model
:=
newGGUFModel
(
c
)
if
err
:=
model
.
Decode
(
r
);
err
!=
nil
{
if
err
:=
model
.
Decode
(
r
so
);
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -67,9 +65,23 @@ const (
type
kv
map
[
string
]
any
type
tensor
struct
{
name
string
kind
uint32
offset
uint64
size
uint64
// shape is the number of elements in each dimension
shape
[
4
]
uint64
}
type
ggufModel
struct
{
*
containerGGUF
kv
tensors
[]
tensor
parameters
uint64
}
func
newGGUFModel
(
container
*
containerGGUF
)
*
ggufModel
{
...
...
@@ -96,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 {
}
func
(
llm
*
ggufModel
)
ModelFamily
()
string
{
t
,
ok
:=
llm
.
kv
[
"general.architecture"
]
.
(
string
)
if
ok
{
if
t
,
ok
:=
llm
.
kv
[
"general.architecture"
]
.
(
string
);
ok
{
return
t
}
...
...
@@ -134,57 +145,56 @@ func (llm *ggufModel) ModelType() string {
}
func
(
llm
*
ggufModel
)
FileType
()
string
{
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
)
if
ok
{
if
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
);
ok
{
return
fileType
(
t
)
}
return
"unknown"
}
func
(
llm
*
ggufModel
)
Decode
(
r
io
.
Reader
)
error
{
func
(
llm
*
ggufModel
)
Decode
(
r
so
*
readSeekOffset
)
error
{
// decode key-values
for
i
:=
0
;
uint64
(
i
)
<
llm
.
NumKV
();
i
++
{
k
,
err
:=
llm
.
readString
(
r
)
k
,
err
:=
llm
.
readString
(
r
so
)
if
err
!=
nil
{
return
err
}
vtype
:=
llm
.
readU32
(
r
)
vtype
:=
llm
.
readU32
(
r
so
)
var
v
any
switch
vtype
{
case
ggufTypeUint8
:
v
=
llm
.
readU8
(
r
)
v
=
llm
.
readU8
(
r
so
)
case
ggufTypeInt8
:
v
=
llm
.
readI8
(
r
)
v
=
llm
.
readI8
(
r
so
)
case
ggufTypeUint16
:
v
=
llm
.
readU16
(
r
)
v
=
llm
.
readU16
(
r
so
)
case
ggufTypeInt16
:
v
=
llm
.
readI16
(
r
)
v
=
llm
.
readI16
(
r
so
)
case
ggufTypeUint32
:
v
=
llm
.
readU32
(
r
)
v
=
llm
.
readU32
(
r
so
)
case
ggufTypeInt32
:
v
=
llm
.
readI32
(
r
)
v
=
llm
.
readI32
(
r
so
)
case
ggufTypeUint64
:
v
=
llm
.
readU64
(
r
)
v
=
llm
.
readU64
(
r
so
)
case
ggufTypeInt64
:
v
=
llm
.
readI64
(
r
)
v
=
llm
.
readI64
(
r
so
)
case
ggufTypeFloat32
:
v
=
llm
.
readF32
(
r
)
v
=
llm
.
readF32
(
r
so
)
case
ggufTypeFloat64
:
v
=
llm
.
readF64
(
r
)
v
=
llm
.
readF64
(
r
so
)
case
ggufTypeBool
:
v
=
llm
.
readBool
(
r
)
v
=
llm
.
readBool
(
r
so
)
case
ggufTypeString
:
s
,
err
:=
llm
.
readString
(
r
)
s
,
err
:=
llm
.
readString
(
r
so
)
if
err
!=
nil
{
return
err
}
v
=
s
case
ggufTypeArray
:
a
,
err
:=
llm
.
readArray
(
r
)
a
,
err
:=
llm
.
readArray
(
r
so
)
if
err
!=
nil
{
return
err
}
...
...
@@ -199,21 +209,85 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors
for
i
:=
0
;
uint64
(
i
)
<
llm
.
NumTensor
();
i
++
{
if
_
,
err
:=
llm
.
readString
(
r
);
err
!=
nil
{
name
,
err
:=
llm
.
readString
(
rso
)
if
err
!=
nil
{
return
err
}
dimensions
:=
llm
.
readU32
(
r
)
// dims is the number of dimensions in the tensor
dims
:=
llm
.
readU32
(
rso
)
var
elements
uint64
=
1
for
i
:=
0
;
uint32
(
i
)
<
dim
ension
s
;
i
++
{
elements
*
=
llm
.
readU64
(
r
)
shape
:=
[
4
]
uint64
{
1
,
1
,
1
,
1
}
for
i
:=
0
;
uint32
(
i
)
<
dims
;
i
++
{
shape
[
i
]
=
llm
.
readU64
(
r
so
)
}
llm
.
readU32
(
r
)
// type
llm
.
readU64
(
r
)
// offset
kind
:=
llm
.
readU32
(
rso
)
offset
:=
llm
.
readU64
(
rso
)
var
blockSize
uint64
switch
{
case
kind
<
2
:
blockSize
=
1
case
kind
<
10
:
blockSize
=
32
default
:
blockSize
=
256
}
var
typeSize
uint64
switch
kind
{
case
0
:
// FP32
typeSize
=
4
case
1
:
// FP16
typeSize
=
2
case
2
:
// Q4_0
typeSize
=
2
+
blockSize
/
2
case
3
:
// Q4_1
typeSize
=
2
+
2
+
blockSize
/
2
case
6
:
// Q5_0
typeSize
=
2
+
4
+
blockSize
/
2
case
7
:
// Q5_1
typeSize
=
2
+
2
+
4
+
blockSize
/
2
case
8
:
// Q8_0
typeSize
=
2
+
blockSize
case
9
:
// Q8_1
typeSize
=
4
+
4
+
blockSize
case
10
:
// Q2_K
typeSize
=
blockSize
/
16
+
blockSize
/
4
+
2
+
2
case
11
:
// Q3_K
typeSize
=
blockSize
/
8
+
blockSize
/
4
+
12
+
2
case
12
:
// Q4_K
typeSize
=
2
+
2
+
12
+
blockSize
/
2
case
13
:
// Q5_K
typeSize
=
2
+
2
+
12
+
blockSize
/
8
+
blockSize
/
2
case
14
:
// Q6_K
typeSize
=
blockSize
/
2
+
blockSize
/
4
+
blockSize
/
16
+
2
}
parameters
:=
shape
[
0
]
*
shape
[
1
]
*
shape
[
2
]
*
shape
[
3
]
size
:=
parameters
*
typeSize
/
blockSize
llm
.
tensors
=
append
(
llm
.
tensors
,
tensor
{
name
:
name
,
kind
:
kind
,
offset
:
offset
,
size
:
size
,
shape
:
shape
,
})
llm
.
parameters
+=
parameters
}
alignment
,
ok
:=
llm
.
kv
[
"general.alignment"
]
.
(
uint32
)
if
!
ok
{
alignment
=
32
}
llm
.
parameters
+=
elements
rso
.
Seek
(
int64
(
alignment
)
-
rso
.
offset
%
int64
(
alignment
),
io
.
SeekCurrent
)
for
_
,
tensor
:=
range
llm
.
tensors
{
padded
:=
(
int64
(
tensor
.
size
)
+
int64
(
alignment
)
-
1
)
&
^
(
int64
(
alignment
)
-
1
)
rso
.
Seek
(
padded
,
io
.
SeekCurrent
)
}
return
nil
...
...
server/images.go
View file @
409bb967
...
...
@@ -19,6 +19,8 @@ import (
"strings"
"text/template"
"golang.org/x/exp/slices"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser"
...
...
@@ -134,17 +136,48 @@ type ManifestV2 struct {
}
type
ConfigV2
struct
{
ModelFormat
string
`json:"model_format"`
ModelFamily
string
`json:"model_family"`
ModelType
string
`json:"model_type"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
ModelFormat
string
`json:"model_format"`
ModelFamily
string
`json:"model_family"`
ModelFamilies
[]
string
`json:"model_families"`
ModelType
string
`json:"model_type"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
// required by spec
Architecture
string
`json:"architecture"`
OS
string
`json:"os"`
}
func
(
c
*
ConfigV2
)
SetModelFormat
(
format
string
)
{
if
c
.
ModelFormat
==
""
{
c
.
ModelFormat
=
format
}
}
func
(
c
*
ConfigV2
)
SetModelFamily
(
families
...
string
)
{
for
_
,
family
:=
range
families
{
if
c
.
ModelFamily
==
""
{
c
.
ModelFamily
=
family
}
if
!
slices
.
Contains
(
c
.
ModelFamilies
,
family
)
{
c
.
ModelFamilies
=
append
(
c
.
ModelFamilies
,
family
)
}
}
}
func
(
c
*
ConfigV2
)
SetModelType
(
modelType
string
)
{
if
c
.
ModelType
==
""
{
c
.
ModelType
=
modelType
}
}
func
(
c
*
ConfigV2
)
SetFileType
(
fileType
string
)
{
if
c
.
FileType
==
""
{
c
.
FileType
=
fileType
}
}
type
RootFS
struct
{
Type
string
`json:"type"`
DiffIDs
[]
string
`json:"diff_ids"`
...
...
@@ -354,10 +387,10 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
return
err
}
config
.
ModelFormat
=
fromConfig
.
ModelFormat
config
.
ModelFamily
=
fromConfig
.
ModelFamily
config
.
ModelType
=
fromConfig
.
ModelType
config
.
FileType
=
fromConfig
.
FileType
config
.
Set
ModelFormat
(
fromConfig
.
ModelFormat
)
config
.
Set
ModelFamily
(
append
(
fromConfig
.
ModelFamilies
,
fromConfig
.
ModelFamily
)
...
)
config
.
Set
ModelType
(
fromConfig
.
ModelType
)
config
.
Set
FileType
(
fromConfig
.
FileType
)
for
_
,
layer
:=
range
manifest
.
Layers
{
deleteMap
[
layer
.
Digest
]
=
struct
{}{}
...
...
@@ -391,24 +424,38 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
}
defer
bin
.
Close
()
fn
(
api
.
ProgressResponse
{
Status
:
"creating model layer"
})
ggml
,
err
:=
llm
.
DecodeGGML
(
bin
)
if
err
!=
nil
{
return
err
}
var
offset
int64
for
{
fn
(
api
.
ProgressResponse
{
Status
:
"creating model layer"
})
config
.
ModelFormat
=
ggml
.
Name
()
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
config
.
FileType
=
ggml
.
FileType
()
bin
.
Seek
(
offset
,
io
.
SeekStart
)
ggml
,
err
:=
llm
.
DecodeGGML
(
bin
)
if
errors
.
Is
(
err
,
io
.
EOF
)
{
break
}
else
if
err
!=
nil
{
return
err
}
bin
.
Seek
(
0
,
io
.
SeekStart
)
layer
,
err
:=
NewLayer
(
bin
,
mediatype
)
if
err
!=
nil
{
return
err
}
config
.
SetModelFormat
(
ggml
.
Name
())
config
.
SetModelFamily
(
ggml
.
ModelFamily
())
config
.
SetModelType
(
ggml
.
ModelType
())
config
.
SetFileType
(
ggml
.
FileType
())
layers
.
Add
(
layer
)
mediatype
:=
mediatype
if
ggml
.
ModelFamily
()
==
"clip"
{
mediatype
=
"application/vnd.ollama.image.projector"
}
sr
:=
io
.
NewSectionReader
(
bin
,
offset
,
ggml
.
Size
)
layer
,
err
:=
NewLayer
(
sr
,
mediatype
)
if
err
!=
nil
{
return
err
}
layers
.
Add
(
layer
)
offset
+=
ggml
.
Size
}
case
"adapter"
:
if
strings
.
HasPrefix
(
c
.
Args
,
"@"
)
{
blobPath
,
err
:=
GetBlobsPath
(
strings
.
TrimPrefix
(
c
.
Args
,
"@"
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment