Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
7dee25a0
"vscode:/vscode.git/clone" did not exist on "5f5d02fbf15a4a4e998a6855675e2abb81dc85a5"
Commit
7dee25a0
authored
Sep 12, 2023
by
Michael Yang
Browse files
fix falcon decode
get model and file type from bin file
parent
f2216370
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
119 additions
and
154 deletions
+119
-154
llm/ggml.go
llm/ggml.go
+61
-38
llm/gguf.go
llm/gguf.go
+17
-19
llm/llama.go
llm/llama.go
+18
-80
llm/llm.go
llm/llm.go
+15
-9
server/images.go
server/images.go
+8
-8
No files found.
llm/ggml.go
View file @
7dee25a0
...
...
@@ -8,54 +8,77 @@ import (
"sync"
)
type
ModelFamily
string
con
st
ModelFamilyUnknown
ModelFamily
=
"unknown"
type
ModelType
uint32
type
GGML
struct
{
magic
uint32
con
tainer
model
}
const
(
ModelType3B
ModelType
=
26
ModelType7B
ModelType
=
32
ModelType13B
ModelType
=
40
ModelType34B
ModelType
=
48
ModelType30B
ModelType
=
60
ModelType65B
ModelType
=
80
fileTypeF32
uint32
=
iota
fileTypeF16
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16
fileTypeQ8_0
uint32
=
iota
+
2
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
)
func
(
mt
ModelType
)
String
()
string
{
switch
mt
{
case
ModelType3B
:
return
"3B"
case
ModelType7B
:
return
"7B"
case
ModelType13B
:
return
"13B"
case
ModelType34B
:
return
"34B"
case
ModelType30B
:
return
"30B"
case
ModelType65B
:
return
"65B"
func
fileType
(
fileType
uint32
)
string
{
switch
fileType
{
case
fileTypeF32
:
return
"F32"
case
fileTypeF16
:
return
"F16"
case
fileTypeQ4_0
:
return
"Q4_0"
case
fileTypeQ4_1
:
return
"Q4_1"
case
fileTypeQ4_1_F16
:
return
"Q4_1_F16"
case
fileTypeQ8_0
:
return
"Q8_0"
case
fileTypeQ5_0
:
return
"Q5_0"
case
fileTypeQ5_1
:
return
"Q5_1"
case
fileTypeQ2_K
:
return
"Q2_K"
case
fileTypeQ3_K_S
:
return
"Q3_K_S"
case
fileTypeQ3_K_M
:
return
"Q3_K_M"
case
fileTypeQ3_K_L
:
return
"Q3_K_L"
case
fileTypeQ4_K_S
:
return
"Q4_K_S"
case
fileTypeQ4_K_M
:
return
"Q4_K_M"
case
fileTypeQ5_K_S
:
return
"Q5_K_S"
case
fileTypeQ5_K_M
:
return
"Q5_K_M"
case
fileTypeQ6_K
:
return
"Q6_K"
default
:
return
"Unknown"
}
}
type
FileType
interface
{
String
()
string
}
type
GGML
struct
{
magic
uint32
container
model
}
type
model
interface
{
ModelFamily
()
ModelFamily
ModelType
()
ModelType
FileType
()
FileType
ModelFamily
()
string
ModelType
()
string
FileType
()
string
}
type
container
interface
{
...
...
llm/gguf.go
View file @
7dee25a0
...
...
@@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"io"
"log"
"path"
"sync"
)
...
...
@@ -87,38 +86,37 @@ func (llm *ggufModel) NumKV() uint64 {
return
llm
.
V2
.
NumKV
}
func
(
llm
*
ggufModel
)
ModelFamily
()
ModelFamily
{
func
(
llm
*
ggufModel
)
ModelFamily
()
string
{
t
,
ok
:=
llm
.
kv
[
"general.architecture"
]
.
(
string
)
if
ok
{
return
ModelFamily
(
t
)
return
t
}
log
.
Printf
(
"unknown model family: %T"
,
t
)
return
ModelFamilyUnknown
return
"unknown"
}
func
(
llm
*
ggufModel
)
ModelType
()
ModelType
{
func
(
llm
*
ggufModel
)
ModelType
()
string
{
switch
llm
.
ModelFamily
()
{
case
ModelFamilyLlama
:
blocks
,
ok
:=
llm
.
kv
[
"llama.block_count"
]
.
(
uint32
)
if
ok
{
return
ModelType
(
blocks
)
case
"llama"
:
if
blocks
,
ok
:=
llm
.
kv
[
"llama.block_count"
]
.
(
uint32
);
ok
{
return
llamaModelType
(
blocks
)
}
case
"falcon"
:
if
blocks
,
ok
:=
llm
.
kv
[
"falcon.block_count"
]
.
(
uint32
);
ok
{
return
falconModelType
(
blocks
)
}
}
return
ModelType7B
return
"Unknown"
}
func
(
llm
*
ggufModel
)
FileType
()
FileType
{
switch
llm
.
ModelFamily
()
{
case
ModelFamilyLlama
:
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
)
if
ok
{
return
llamaFileType
(
t
)
}
func
(
llm
*
ggufModel
)
FileType
()
string
{
t
,
ok
:=
llm
.
kv
[
"general.file_type"
]
.
(
uint32
)
if
ok
{
return
fileType
(
t
)
}
return
llamaFileTypeF16
return
"Unknown"
}
func
(
llm
*
ggufModel
)
Decode
(
r
io
.
Reader
)
error
{
...
...
llm/llama.go
View file @
7dee25a0
...
...
@@ -95,38 +95,39 @@ func chooseRunner(gpuPath, cpuPath string) string {
return
runPath
}
const
ModelFamilyLlama
ModelFamily
=
"llama"
type
llamaModel
struct
{
hyperparameters
llamaHyperparameters
}
func
(
llm
*
llamaModel
)
ModelFamily
()
ModelFamily
{
return
ModelFamilyL
lama
func
(
llm
*
llamaModel
)
ModelFamily
()
string
{
return
"l
lama
"
}
func
(
llm
*
llamaModel
)
ModelType
()
ModelType
{
switch
llm
.
hyperparameters
.
N
umLayer
{
func
llamaModel
Type
(
numLayer
uint32
)
string
{
switch
n
umLayer
{
case
26
:
return
ModelType
3B
return
"
3B
"
case
32
:
return
ModelType
7B
return
"
7B
"
case
40
:
return
ModelType
13B
return
"
13B
"
case
48
:
return
ModelType
34B
return
"
34B
"
case
60
:
return
ModelType
30B
return
"
30B
"
case
80
:
return
ModelType65B
return
"65B"
default
:
return
"Unknown"
}
}
// TODO: find a better default
return
ModelType
7B
func
(
llm
*
llamaModel
)
ModelType
()
string
{
return
llama
ModelType
(
llm
.
hyperparameters
.
NumLayer
)
}
func
(
llm
*
llamaModel
)
FileType
()
FileType
{
return
llm
.
hyperparameters
.
FileType
func
(
llm
*
llamaModel
)
FileType
()
string
{
return
fileType
(
llm
.
hyperparameters
.
FileType
)
}
type
llamaHyperparameters
struct
{
...
...
@@ -143,70 +144,7 @@ type llamaHyperparameters struct {
NumRot
uint32
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
FileType
llamaFileType
}
type
llamaFileType
uint32
const
(
llamaFileTypeF32
llamaFileType
=
iota
llamaFileTypeF16
llamaFileTypeQ4_0
llamaFileTypeQ4_1
llamaFileTypeQ4_1_F16
llamaFileTypeQ8_0
llamaFileType
=
iota
+
2
llamaFileTypeQ5_0
llamaFileTypeQ5_1
llamaFileTypeQ2_K
llamaFileTypeQ3_K_S
llamaFileTypeQ3_K_M
llamaFileTypeQ3_K_L
llamaFileTypeQ4_K_S
llamaFileTypeQ4_K_M
llamaFileTypeQ5_K_S
llamaFileTypeQ5_K_M
llamaFileTypeQ6_K
)
func
(
ft
llamaFileType
)
String
()
string
{
switch
ft
{
case
llamaFileTypeF32
:
return
"F32"
case
llamaFileTypeF16
:
return
"F16"
case
llamaFileTypeQ4_0
:
return
"Q4_0"
case
llamaFileTypeQ4_1
:
return
"Q4_1"
case
llamaFileTypeQ4_1_F16
:
return
"Q4_1_F16"
case
llamaFileTypeQ8_0
:
return
"Q8_0"
case
llamaFileTypeQ5_0
:
return
"Q5_0"
case
llamaFileTypeQ5_1
:
return
"Q5_1"
case
llamaFileTypeQ2_K
:
return
"Q2_K"
case
llamaFileTypeQ3_K_S
:
return
"Q3_K_S"
case
llamaFileTypeQ3_K_M
:
return
"Q3_K_M"
case
llamaFileTypeQ3_K_L
:
return
"Q3_K_L"
case
llamaFileTypeQ4_K_S
:
return
"Q4_K_S"
case
llamaFileTypeQ4_K_M
:
return
"Q4_K_M"
case
llamaFileTypeQ5_K_S
:
return
"Q5_K_S"
case
llamaFileTypeQ5_K_M
:
return
"Q5_K_M"
case
llamaFileTypeQ6_K
:
return
"Q6_K"
default
:
return
"Unknown"
}
FileType
uint32
}
type
Running
struct
{
...
...
llm/llm.go
View file @
7dee25a0
...
...
@@ -37,7 +37,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
return
nil
,
err
}
switch
ggml
.
FileType
()
.
String
()
{
switch
ggml
.
FileType
()
{
case
"Q8_0"
:
if
ggml
.
Name
()
!=
"gguf"
&&
opts
.
NumGPU
!=
0
{
// GGML Q8_0 do not support Metal API and will
...
...
@@ -56,30 +56,36 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
totalResidentMemory
:=
memory
.
TotalMemory
()
switch
ggml
.
ModelType
()
{
case
ModelType
3B
,
ModelType
7B
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
16
*
1024
*
1024
{
case
"
3B
"
,
"
7B
"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
16
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 16GB of memory"
)
}
else
if
totalResidentMemory
<
8
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
}
case
ModelType
13B
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
32
*
1024
*
1024
{
case
"
13B
"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
32
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 32GB of memory"
)
}
else
if
totalResidentMemory
<
16
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 16GB of memory"
)
}
case
ModelType
30B
,
ModelType34B
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
64
*
1024
*
1024
{
case
"
30B
"
,
"34B"
,
"40B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
64
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 64GB of memory"
)
}
else
if
totalResidentMemory
<
32
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 32GB of memory"
)
}
case
ModelType65B
:
if
ggml
.
FileType
()
.
String
()
==
"F16"
&&
totalResidentMemory
<
128
*
1024
*
1024
{
case
"65B"
,
"70B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
128
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 128GB of memory"
)
}
else
if
totalResidentMemory
<
64
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 64GB of memory"
)
}
case
"180B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
512
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 512GB of memory"
)
}
else
if
totalResidentMemory
<
128
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 128GB of memory"
)
}
}
switch
ggml
.
Name
()
{
...
...
server/images.go
View file @
7dee25a0
...
...
@@ -114,11 +114,11 @@ type LayerReader struct {
}
type
ConfigV2
struct
{
ModelF
amily
llm
.
ModelFamily
`json:"model_f
amily
"`
Model
Type
string
`json:"model_
type
"`
Model
Format
string
`json:"model_
format
"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
ModelF
ormat
string
`json:"model_f
ormat
"`
Model
Family
string
`json:"model_
family
"`
Model
Type
string
`json:"model_
type
"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
// required by spec
Architecture
string
`json:"architecture"`
...
...
@@ -357,10 +357,10 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return
err
}
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
.
String
()
config
.
ModelFormat
=
ggml
.
Name
()
config
.
FileType
=
ggml
.
FileType
()
.
String
()
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
()
config
.
FileType
=
ggml
.
FileType
()
// reset the file
file
.
Seek
(
0
,
io
.
SeekStart
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment