Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
cabaada9
Unverified
Commit
cabaada9
authored
Aug 17, 2023
by
Michael Yang
Committed by
GitHub
Aug 17, 2023
Browse files
Merge pull request #372 from jmorganca/mxyng/string-types
model and file type as strings
parents
519f4d98
a894cc79
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
133 additions
and
48 deletions
+133
-48
llm/ggml.go
llm/ggml.go
+28
-25
llm/llama.go
llm/llama.go
+93
-11
llm/llm.go
llm/llm.go
+6
-6
server/images.go
server/images.go
+6
-6
No files found.
llm/ggml.go
View file @
cabaada9
...
@@ -9,8 +9,6 @@ import (
...
@@ -9,8 +9,6 @@ import (
type
ModelFamily
string
type
ModelFamily
string
const
ModelFamilyLlama
ModelFamily
=
"llama"
type
ModelType
uint32
type
ModelType
uint32
const
(
const
(
...
@@ -21,32 +19,37 @@ const (
...
@@ -21,32 +19,37 @@ const (
ModelType65B
ModelType
=
80
ModelType65B
ModelType
=
80
)
)
type
FileType
uint32
func
(
mt
ModelType
)
String
()
string
{
switch
mt
{
case
ModelType3B
:
return
"3B"
case
ModelType7B
:
return
"7B"
case
ModelType13B
:
return
"13B"
case
ModelType30B
:
return
"30B"
case
ModelType65B
:
return
"65B"
default
:
return
"Unknown"
}
}
const
(
type
FileType
interface
{
FileTypeF32
FileType
=
iota
String
()
string
FileTypeF16
}
FileTypeQ4_0
FileTypeQ4_1
FileTypeQ4_1_F16
FileTypeQ8_0
=
iota
+
2
FileTypeQ5_0
FileTypeQ5_1
FileTypeQ2_K
FileTypeQ3_K
FileTypeQ4_K
FileTypeQ5_K
FileTypeQ6_K
)
type
GGML
struct
{
type
GGML
struct
{
ModelFamily
ModelType
magic
uint32
magic
uint32
container
container
model
}
llamaHyperparameters
type
model
interface
{
ModelFamily
()
ModelFamily
ModelType
()
ModelType
FileType
()
FileType
}
}
type
container
interface
{
type
container
interface
{
...
@@ -166,14 +169,14 @@ func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
...
@@ -166,14 +169,14 @@ func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
// different model types may have different layouts for hyperparameters
// different model types may have different layouts for hyperparameters
switch
hint
{
switch
hint
{
case
ModelFamilyLlama
:
case
ModelFamilyLlama
:
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
ggml
.
llamaHyperparameters
)
var
llama
llamaModel
binary
.
Read
(
r
,
binary
.
LittleEndian
,
&
llama
.
hyperparameters
)
ggml
.
model
=
&
llama
// TODO: sanity check hyperparameters
// TODO: sanity check hyperparameters
default
:
default
:
return
nil
,
fmt
.
Errorf
(
"unsupported model type: %s"
,
hint
)
return
nil
,
fmt
.
Errorf
(
"unsupported model type: %s"
,
hint
)
}
}
// final model type
// final model type
ggml
.
ModelFamily
=
hint
ggml
.
ModelType
=
ModelType
(
ggml
.
NumLayer
)
return
&
ggml
,
nil
return
&
ggml
,
nil
}
}
llm/llama.go
View file @
cabaada9
...
@@ -106,19 +106,22 @@ import (
...
@@ -106,19 +106,22 @@ import (
//go:embed ggml-metal.metal
//go:embed ggml-metal.metal
var
fs
embed
.
FS
var
fs
embed
.
FS
type
llama
struct
{
const
ModelFamilyLlama
ModelFamily
=
"llama"
params
*
C
.
struct_llama_context_params
model
*
C
.
struct_llama_model
ctx
*
C
.
struct_llama_context
last
[]
C
.
llama_token
type
llamaModel
struct
{
embd
[]
C
.
llama_token
hyperparameters
llamaHyperparameters
cursor
int
}
mu
sync
.
Mutex
func
(
llm
*
llamaModel
)
ModelFamily
()
ModelFamily
{
gc
bool
return
ModelFamilyLlama
}
api
.
Options
func
(
llm
*
llamaModel
)
ModelType
()
ModelType
{
return
ModelType30B
}
func
(
llm
*
llamaModel
)
FileType
()
FileType
{
return
llm
.
hyperparameters
.
FileType
}
}
type
llamaHyperparameters
struct
{
type
llamaHyperparameters
struct
{
...
@@ -133,8 +136,87 @@ type llamaHyperparameters struct {
...
@@ -133,8 +136,87 @@ type llamaHyperparameters struct {
// NumLayer is the number of layers in the model.
// NumLayer is the number of layers in the model.
NumLayer
uint32
NumLayer
uint32
NumRot
uint32
NumRot
uint32
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
FileType
FileType
llamaFileType
}
type
llamaFileType
uint32
const
(
llamaFileTypeF32
llamaFileType
=
iota
llamaFileTypeF16
llamaFileTypeQ4_0
llamaFileTypeQ4_1
llamaFileTypeQ4_1_F16
llamaFileTypeQ8_0
llamaFileType
=
iota
+
2
llamaFileTypeQ5_0
llamaFileTypeQ5_1
llamaFileTypeQ2_K
llamaFileTypeQ3_K_S
llamaFileTypeQ3_K_M
llamaFileTypeQ3_K_L
llamaFileTypeQ4_K_S
llamaFileTypeQ4_K_M
llamaFileTypeQ5_K_S
llamaFileTypeQ5_K_M
llamaFileTypeQ6_K
)
func
(
ft
llamaFileType
)
String
()
string
{
switch
ft
{
case
llamaFileTypeF32
:
return
"F32"
case
llamaFileTypeF16
:
return
"F16"
case
llamaFileTypeQ4_0
:
return
"Q4_0"
case
llamaFileTypeQ4_1
:
return
"Q4_1"
case
llamaFileTypeQ4_1_F16
:
return
"Q4_1_F16"
case
llamaFileTypeQ8_0
:
return
"Q8_0"
case
llamaFileTypeQ5_0
:
return
"Q5_0"
case
llamaFileTypeQ5_1
:
return
"Q5_1"
case
llamaFileTypeQ2_K
:
return
"Q2_K"
case
llamaFileTypeQ3_K_S
:
return
"Q3_K_S"
case
llamaFileTypeQ3_K_M
:
return
"Q3_K_M"
case
llamaFileTypeQ3_K_L
:
return
"Q3_K_L"
case
llamaFileTypeQ4_K_S
:
return
"Q4_K_S"
case
llamaFileTypeQ4_K_M
:
return
"Q4_K_M"
case
llamaFileTypeQ5_K_S
:
return
"Q5_K_S"
case
llamaFileTypeQ5_K_M
:
return
"Q5_K_M"
case
llamaFileTypeQ6_K
:
return
"Q6_K"
default
:
return
"Unknown"
}
}
type
llama
struct
{
params
*
C
.
struct_llama_context_params
model
*
C
.
struct_llama_model
ctx
*
C
.
struct_llama_context
last
[]
C
.
llama_token
embd
[]
C
.
llama_token
cursor
int
mu
sync
.
Mutex
gc
bool
api
.
Options
}
}
func
newLlama
(
model
string
,
adapters
[]
string
,
opts
api
.
Options
)
(
*
llama
,
error
)
{
func
newLlama
(
model
string
,
adapters
[]
string
,
opts
api
.
Options
)
(
*
llama
,
error
)
{
...
...
llm/llm.go
View file @
cabaada9
...
@@ -35,10 +35,10 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
...
@@ -35,10 +35,10 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
return
nil
,
err
return
nil
,
err
}
}
switch
ggml
.
FileType
{
switch
ggml
.
FileType
()
.
String
()
{
case
FileType
F32
,
FileType
F16
,
FileType
Q5_0
,
FileType
Q5_1
,
FileType
Q8_0
:
case
"
F32
"
,
"
F16
"
,
"
Q5_0
"
,
"
Q5_1
"
,
"
Q8_0
"
:
if
opts
.
NumGPU
!=
0
{
if
opts
.
NumGPU
!=
0
{
// Q5_0, Q5_1, and Q8_0 do not support Metal API and will
//
F32, F16,
Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
// cause the runner to segmentation fault so disable GPU
log
.
Printf
(
"WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0"
)
log
.
Printf
(
"WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0"
)
opts
.
NumGPU
=
0
opts
.
NumGPU
=
0
...
@@ -46,7 +46,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
...
@@ -46,7 +46,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
}
totalResidentMemory
:=
memory
.
TotalMemory
()
totalResidentMemory
:=
memory
.
TotalMemory
()
switch
ggml
.
ModelType
{
switch
ggml
.
ModelType
()
{
case
ModelType3B
,
ModelType7B
:
case
ModelType3B
,
ModelType7B
:
if
totalResidentMemory
<
8
*
1024
*
1024
{
if
totalResidentMemory
<
8
*
1024
*
1024
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
...
@@ -65,10 +65,10 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
...
@@ -65,10 +65,10 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
}
}
}
switch
ggml
.
ModelFamily
{
switch
ggml
.
ModelFamily
()
{
case
ModelFamilyLlama
:
case
ModelFamilyLlama
:
return
newLlama
(
model
,
adapters
,
opts
)
return
newLlama
(
model
,
adapters
,
opts
)
default
:
default
:
return
nil
,
fmt
.
Errorf
(
"unknown ggml type: %s"
,
ggml
.
ModelFamily
)
return
nil
,
fmt
.
Errorf
(
"unknown ggml type: %s"
,
ggml
.
ModelFamily
()
)
}
}
}
}
server/images.go
View file @
cabaada9
...
@@ -105,9 +105,9 @@ type LayerReader struct {
...
@@ -105,9 +105,9 @@ type LayerReader struct {
type
ConfigV2
struct
{
type
ConfigV2
struct
{
ModelFamily
llm
.
ModelFamily
`json:"model_family"`
ModelFamily
llm
.
ModelFamily
`json:"model_family"`
ModelType
llm
.
ModelType
`json:"model_type"`
ModelType
string
`json:"model_type"`
FileType
llm
.
FileType
`json:"file_type"`
FileType
string
`json:"file_type"`
RootFS
RootFS
`json:"rootfs"`
RootFS
RootFS
`json:"rootfs"`
// required by spec
// required by spec
Architecture
string
`json:"architecture"`
Architecture
string
`json:"architecture"`
...
@@ -308,9 +308,9 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
...
@@ -308,9 +308,9 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return
err
return
err
}
}
config
.
ModelFamily
=
ggml
.
ModelFamily
config
.
ModelFamily
=
ggml
.
ModelFamily
()
config
.
ModelType
=
ggml
.
ModelType
config
.
ModelType
=
ggml
.
ModelType
()
.
String
()
config
.
FileType
=
ggml
.
FileType
config
.
FileType
=
ggml
.
FileType
()
.
String
()
// reset the file
// reset the file
file
.
Seek
(
0
,
io
.
SeekStart
)
file
.
Seek
(
0
,
io
.
SeekStart
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment