Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
949553db
"src/array/cuda/array_sort.hip" did not exist on "81831111a5553f7c749d094a159dd748c39e6f28"
Unverified
Commit
949553db
authored
Sep 13, 2023
by
Michael Yang
Committed by
GitHub
Sep 13, 2023
Browse files
Merge pull request #519 from jmorganca/mxyng/decode
Mxyng/decode
parents
f59c4d03
0c5a4543
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
134 additions
and
157 deletions
+134
-157
llm/ggml.go
llm/ggml.go
+61
-38
llm/gguf.go
llm/gguf.go
+23
-19
llm/llama.go
llm/llama.go
+18
-80
llm/llm.go
llm/llm.go
+15
-9
server/images.go
server/images.go
+17
-11
No files found.
llm/ggml.go
View file @
949553db
...
@@ -8,54 +8,77 @@ import (
...
@@ -8,54 +8,77 @@ import (
"sync"
"sync"
)
)
type
ModelFamily
string
type
GGML
struct
{
magic
uint32
con
st
ModelFamilyUnknown
ModelFamily
=
"unknown"
con
tainer
model
type
ModelType
uint32
}
const
(
const
(
ModelType3B
ModelType
=
26
fileTypeF32
uint32
=
iota
ModelType7B
ModelType
=
32
fileTypeF16
ModelType13B
ModelType
=
40
fileTypeQ4_0
ModelType34B
ModelType
=
48
fileTypeQ4_1
ModelType30B
ModelType
=
60
fileTypeQ4_1_F16
ModelType65B
ModelType
=
80
fileTypeQ8_0
uint32
=
iota
+
2
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
)
)
func
(
mt
ModelType
)
String
()
string
{
func
fileType
(
fileType
uint32
)
string
{
switch
mt
{
switch
fileType
{
case
ModelType3B
:
case
fileTypeF32
:
return
"3B"
return
"F32"
case
ModelType7B
:
case
fileTypeF16
:
return
"7B"
return
"F16"
case
ModelType13B
:
case
fileTypeQ4_0
:
return
"13B"
return
"Q4_0"
case
ModelType34B
:
case
fileTypeQ4_1
:
return
"34B"
return
"Q4_1"
case
ModelType30B
:
case
fileTypeQ4_1_F16
:
return
"30B"
return
"Q4_1_F16"
case
ModelType65B
:
case
fileTypeQ8_0
:
return
"65B"
return
"Q8_0"
case
fileTypeQ5_0
:
return
"Q5_0"
case
fileTypeQ5_1
:
return
"Q5_1"
case
fileTypeQ2_K
:
return
"Q2_K"
case
fileTypeQ3_K_S
:
return
"Q3_K_S"
case
fileTypeQ3_K_M
:
return
"Q3_K_M"
case
fileTypeQ3_K_L
:
return
"Q3_K_L"
case
fileTypeQ4_K_S
:
return
"Q4_K_S"
case
fileTypeQ4_K_M
:
return
"Q4_K_M"
case
fileTypeQ5_K_S
:
return
"Q5_K_S"
case
fileTypeQ5_K_M
:
return
"Q5_K_M"
case
fileTypeQ6_K
:
return
"Q6_K"
default
:
default
:
return
"Unknown"
return
"Unknown"
}
}
}
}
type
FileType
interface
{
String
()
string
}
type
GGML
struct
{
magic
uint32
container
model
}
type
model
interface
{
type
model
interface
{
ModelFamily
()
ModelFamily
ModelFamily
()
string
ModelType
()
ModelType
ModelType
()
string
FileType
()
FileType
FileType
()
string
}
}
type
container
interface
{
type
container
interface
{
...
...
llm/gguf.go
View file @
949553db
...
@@ -6,7 +6,6 @@ import (
...
@@ -6,7 +6,6 @@ import (
"errors"
"errors"
"fmt"
"fmt"
"io"
"io"
"log"
"path"
"path"
"sync"
"sync"
)
)
...
@@ -87,38 +86,43 @@ func (llm *ggufModel) NumKV() uint64 {
...
@@ -87,38 +86,43 @@ func (llm *ggufModel) NumKV() uint64 {
return
llm
.
V2
.
NumKV
return
llm
.
V2
.
NumKV
}
}
func
(
llm
*
ggufModel
)
ModelFamily
()
ModelFamily
{
func
(
llm
*
ggufModel
)
ModelFamily
()
string
{
t
,
ok
:=
llm
.
kv
[
"general.architecture"
]
.
(
string
)
t
,
ok
:=
llm
.
kv
[
"general.architecture"
]
.
(
string
)
if
ok
{
if
ok
{
return
ModelFamily
(
t
)
return
t
}
}
log
.
Printf
(
"unknown model family: %T"
,
t
)
return
"unknown"
return
ModelFamilyUnknown
}
}
func
(
llm
*
ggufModel
)
ModelType
()
ModelType
{
func
(
llm
*
ggufModel
)
ModelType
()
string
{
switch
llm
.
ModelFamily
()
{
switch
llm
.
ModelFamily
()
{
case
ModelFamilyLlama
:
case
"llama"
:
blocks
,
ok
:=
llm
.
kv
[
"llama.block_count"
]
.
(
uint32
)
if
blocks
,
ok
:=
llm
.
kv
[
"llama.block_count"
]
.
(
uint32
);
ok
{
if
ok
{
heads
,
headsOK
:=
llm
.
kv
[
"llama.head_count"
]
.
(
uint32
)
return
ModelType
(
blocks
)
headKVs
,
headsKVsOK
:=
llm
.
kv
[
"llama.head_count_kv"
]
.
(
uint32
)
if
headsOK
&&
headsKVsOK
&&
heads
/
headKVs
==
8
{
return
"70B"
}
return
llamaModelType
(
blocks
)
}
case
"falcon"
:
if
blocks
,
ok
:=
llm
.
kv
[
"falcon.block_count"
]
.
(
uint32
);
ok
{
return
falconModelType
(
blocks
)
}
}
}
}
return
ModelType7B
return
"Unknown"
}
}
func
(
llm
*
ggufModel
)
FileType
()
FileType
{
func
(
llm
*
ggufModel
)
FileType
()
string
{
switch
llm
.
ModelFamily
()
{
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
)
case
ModelFamilyLlama
:
if
ok
{
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
)
return
fileType
(
t
)
if
ok
{
return
llamaFileType
(
t
)
}
}
}
return
llamaFileTypeF16
return
"Unknown"
}
}
func
(
llm
*
ggufModel
)
Decode
(
r
io
.
Reader
)
error
{
func
(
llm
*
ggufModel
)
Decode
(
r
io
.
Reader
)
error
{
...
...
llm/llama.go
View file @
949553db
...
@@ -95,38 +95,39 @@ func chooseRunner(gpuPath, cpuPath string) string {
...
@@ -95,38 +95,39 @@ func chooseRunner(gpuPath, cpuPath string) string {
return
runPath
return
runPath
}
}
const
ModelFamilyLlama
ModelFamily
=
"llama"
type
llamaModel
struct
{
type
llamaModel
struct
{
hyperparameters
llamaHyperparameters
hyperparameters
llamaHyperparameters
}
}
func
(
llm
*
llamaModel
)
ModelFamily
()
ModelFamily
{
func
(
llm
*
llamaModel
)
ModelFamily
()
string
{
return
ModelFamilyL
lama
return
"l
lama
"
}
}
func
(
llm
*
llamaModel
)
ModelType
()
ModelType
{
func
llamaModel
Type
(
numLayer
uint32
)
string
{
switch
llm
.
hyperparameters
.
N
umLayer
{
switch
n
umLayer
{
case
26
:
case
26
:
return
ModelType
3B
return
"
3B
"
case
32
:
case
32
:
return
ModelType
7B
return
"
7B
"
case
40
:
case
40
:
return
ModelType
13B
return
"
13B
"
case
48
:
case
48
:
return
ModelType
34B
return
"
34B
"
case
60
:
case
60
:
return
ModelType
30B
return
"
30B
"
case
80
:
case
80
:
return
ModelType65B
return
"65B"
default
:
return
"Unknown"
}
}
}
// TODO: find a better default
func
(
llm
*
llamaModel
)
ModelType
()
string
{
return
ModelType
7B
return
llama
ModelType
(
llm
.
hyperparameters
.
NumLayer
)
}
}
func
(
llm
*
llamaModel
)
FileType
()
FileType
{
func
(
llm
*
llamaModel
)
FileType
()
string
{
return
llm
.
hyperparameters
.
FileType
return
fileType
(
llm
.
hyperparameters
.
FileType
)
}
}
type
llamaHyperparameters
struct
{
type
llamaHyperparameters
struct
{
...
@@ -143,70 +144,7 @@ type llamaHyperparameters struct {
...
@@ -143,70 +144,7 @@ type llamaHyperparameters struct {
NumRot
uint32
NumRot
uint32
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
FileType
llamaFileType
FileType
uint32
}
type
llamaFileType
uint32
const
(
llamaFileTypeF32
llamaFileType
=
iota
llamaFileTypeF16
llamaFileTypeQ4_0
llamaFileTypeQ4_1
llamaFileTypeQ4_1_F16
llamaFileTypeQ8_0
llamaFileType
=
iota
+
2
llamaFileTypeQ5_0
llamaFileTypeQ5_1
llamaFileTypeQ2_K
llamaFileTypeQ3_K_S
llamaFileTypeQ3_K_M
llamaFileTypeQ3_K_L
llamaFileTypeQ4_K_S
llamaFileTypeQ4_K_M
llamaFileTypeQ5_K_S
llamaFileTypeQ5_K_M
llamaFileTypeQ6_K
)
func
(
ft
llamaFileType
)
String
()
string
{
switch
ft
{
case
llamaFileTypeF32
:
return
"F32"
case
llamaFileTypeF16
:
return
"F16"
case
llamaFileTypeQ4_0
:
return
"Q4_0"
case
llamaFileTypeQ4_1
:
return
"Q4_1"
case
llamaFileTypeQ4_1_F16
:
return
"Q4_1_F16"
case
llamaFileTypeQ8_0
:
return
"Q8_0"
case
llamaFileTypeQ5_0
:
return
"Q5_0"
case
llamaFileTypeQ5_1
:
return
"Q5_1"
case
llamaFileTypeQ2_K
:
return
"Q2_K"
case
llamaFileTypeQ3_K_S
:
return
"Q3_K_S"
case
llamaFileTypeQ3_K_M
:
return
"Q3_K_M"
case
llamaFileTypeQ3_K_L
:
return
"Q3_K_L"
case
llamaFileTypeQ4_K_S
:
return
"Q4_K_S"
case
llamaFileTypeQ4_K_M
:
return
"Q4_K_M"
case
llamaFileTypeQ5_K_S
:
return
"Q5_K_S"
case
llamaFileTypeQ5_K_M
:
return
"Q5_K_M"
case
llamaFileTypeQ6_K
:
return
"Q6_K"
default
:
return
"Unknown"
}
}
}
type
Running
struct
{
type
Running
struct
{
...
...
llm/llm.go
View file @
949553db
...
@@ -37,7 +37,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
...
@@ -37,7 +37,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
return
nil
,
err
return
nil
,
err
}
}
switch
ggml
.
FileType
()
.
String
()
{
switch
ggml
.
FileType
()
{
case
"Q8_0"
:
case
"Q8_0"
:
if
ggml
.
Name
()
!=
"gguf"
&&
opts
.
NumGPU
!=
0
{
if
ggml
.
Name
()
!=
"gguf"
&&
opts
.
NumGPU
!=
0
{
// GGML Q8_0 do not support Metal API and will
// GGML Q8_0 do not support Metal API and will
...
@@ -56,30 +56,36 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
...
@@ -56,30 +56,36 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
totalResidentMemory
:=
memory
.
TotalMemory
()
totalResidentMemory
:=
memory
.
TotalMemory
()
switch
ggml
.
ModelType
()
{
switch
ggml
.
ModelType
()
{
case
ModelType
3B
,
ModelType
7B
:
case
"
3B
"
,
"
7B
"
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
16
*
1024
*
1024
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
16
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 16GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 16GB of memory"
)
}
else
if
totalResidentMemory
<
8
*
1024
*
1024
{
}
else
if
totalResidentMemory
<
8
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
}
}
case
ModelType
13B
:
case
"
13B
"
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
32
*
1024
*
1024
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
32
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 32GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 32GB of memory"
)
}
else
if
totalResidentMemory
<
16
*
1024
*
1024
{
}
else
if
totalResidentMemory
<
16
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 16GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 16GB of memory"
)
}
}
case
ModelType
30B
,
ModelType34B
:
case
"
30B
"
,
"34B"
,
"40B"
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
64
*
1024
*
1024
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
64
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 64GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 64GB of memory"
)
}
else
if
totalResidentMemory
<
32
*
1024
*
1024
{
}
else
if
totalResidentMemory
<
32
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 32GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 32GB of memory"
)
}
}
case
ModelType65B
:
case
"65B"
,
"70B"
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
128
*
1024
*
1024
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
128
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 128GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 128GB of memory"
)
}
else
if
totalResidentMemory
<
64
*
1024
*
1024
{
}
else
if
totalResidentMemory
<
64
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 64GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 64GB of memory"
)
}
}
case
"180B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
512
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 512GB of memory"
)
}
else
if
totalResidentMemory
<
128
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 128GB of memory"
)
}
}
}
switch
ggml
.
Name
()
{
switch
ggml
.
Name
()
{
...
...
server/images.go
View file @
949553db
...
@@ -114,11 +114,11 @@ type LayerReader struct {
...
@@ -114,11 +114,11 @@ type LayerReader struct {
}
}
type
ConfigV2
struct
{
type
ConfigV2
struct
{
ModelF
amily
llm
.
ModelFamily
`json:"model_f
amily
"`
ModelF
ormat
string
`json:"model_f
ormat
"`
Model
Type
string
`json:"model_
type
"`
Model
Family
string
`json:"model_
family
"`
Model
Format
string
`json:"model_
format
"`
Model
Type
string
`json:"model_
type
"`
FileType
string
`json:"file_type"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
RootFS
RootFS
`json:"rootfs"`
// required by spec
// required by spec
Architecture
string
`json:"architecture"`
Architecture
string
`json:"architecture"`
...
@@ -357,10 +357,10 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
...
@@ -357,10 +357,10 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return
err
return
err
}
}
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
.
String
()
config
.
ModelFormat
=
ggml
.
Name
()
config
.
ModelFormat
=
ggml
.
Name
()
config
.
FileType
=
ggml
.
FileType
()
.
String
()
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
config
.
FileType
=
ggml
.
FileType
()
// reset the file
// reset the file
file
.
Seek
(
0
,
io
.
SeekStart
)
file
.
Seek
(
0
,
io
.
SeekStart
)
...
@@ -498,6 +498,12 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
...
@@ -498,6 +498,12 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
}
}
}
}
if
config
.
ModelType
==
"65B"
{
if
numGQA
,
ok
:=
formattedParams
[
"num_gqa"
]
.
(
int
);
ok
&&
numGQA
==
8
{
config
.
ModelType
=
"70B"
}
}
bts
,
err
:=
json
.
Marshal
(
formattedParams
)
bts
,
err
:=
json
.
Marshal
(
formattedParams
)
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
...
@@ -815,14 +821,14 @@ func formatParams(params map[string][]string) (map[string]interface{}, error) {
...
@@ -815,14 +821,14 @@ func formatParams(params map[string][]string) (map[string]interface{}, error) {
return
nil
,
fmt
.
Errorf
(
"invalid float value %s"
,
vals
)
return
nil
,
fmt
.
Errorf
(
"invalid float value %s"
,
vals
)
}
}
out
[
key
]
=
floatVal
out
[
key
]
=
float32
(
floatVal
)
case
reflect
.
Int
:
case
reflect
.
Int
:
intVal
,
err
:=
strconv
.
ParseInt
(
vals
[
0
],
10
,
0
)
intVal
,
err
:=
strconv
.
ParseInt
(
vals
[
0
],
10
,
64
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"invalid int value %s"
,
vals
)
return
nil
,
fmt
.
Errorf
(
"invalid int value %s"
,
vals
)
}
}
out
[
key
]
=
intVal
out
[
key
]
=
int
(
intVal
)
case
reflect
.
Bool
:
case
reflect
.
Bool
:
boolVal
,
err
:=
strconv
.
ParseBool
(
vals
[
0
])
boolVal
,
err
:=
strconv
.
ParseBool
(
vals
[
0
])
if
err
!=
nil
{
if
err
!=
nil
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment