Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c77d45d8
Unverified
Commit
c77d45d8
authored
Apr 09, 2024
by
Michael Yang
Committed by
GitHub
Apr 09, 2024
Browse files
Merge pull request #3506 from ollama/mxyng/quantize-redux
cgo quantize
parents
5ec12cec
9502e566
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
145 additions
and
45 deletions
+145
-45
api/client.go
api/client.go
+1
-13
api/types.go
api/types.go
+28
-27
cmd/cmd.go
cmd/cmd.go
+4
-1
llm/llm.go
llm/llm.go
+71
-0
server/images.go
server/images.go
+21
-2
server/routes.go
server/routes.go
+19
-1
server/routes_test.go
server/routes_test.go
+1
-1
No files found.
api/client.go
View file @
c77d45d8
...
@@ -5,7 +5,6 @@ import (
...
@@ -5,7 +5,6 @@ import (
"bytes"
"bytes"
"context"
"context"
"encoding/json"
"encoding/json"
"errors"
"fmt"
"fmt"
"io"
"io"
"net"
"net"
...
@@ -301,18 +300,7 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
...
@@ -301,18 +300,7 @@ func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*Embedd
}
}
func
(
c
*
Client
)
CreateBlob
(
ctx
context
.
Context
,
digest
string
,
r
io
.
Reader
)
error
{
func
(
c
*
Client
)
CreateBlob
(
ctx
context
.
Context
,
digest
string
,
r
io
.
Reader
)
error
{
if
err
:=
c
.
do
(
ctx
,
http
.
MethodHead
,
fmt
.
Sprintf
(
"/api/blobs/%s"
,
digest
),
nil
,
nil
);
err
!=
nil
{
return
c
.
do
(
ctx
,
http
.
MethodPost
,
fmt
.
Sprintf
(
"/api/blobs/%s"
,
digest
),
r
,
nil
)
var
statusError
StatusError
if
!
errors
.
As
(
err
,
&
statusError
)
||
statusError
.
StatusCode
!=
http
.
StatusNotFound
{
return
err
}
if
err
:=
c
.
do
(
ctx
,
http
.
MethodPost
,
fmt
.
Sprintf
(
"/api/blobs/%s"
,
digest
),
r
,
nil
);
err
!=
nil
{
return
err
}
}
return
nil
}
}
func
(
c
*
Client
)
Version
(
ctx
context
.
Context
)
(
string
,
error
)
{
func
(
c
*
Client
)
Version
(
ctx
context
.
Context
)
(
string
,
error
)
{
...
...
api/types.go
View file @
c77d45d8
...
@@ -141,6 +141,7 @@ type CreateRequest struct {
...
@@ -141,6 +141,7 @@ type CreateRequest struct {
Path
string
`json:"path"`
Path
string
`json:"path"`
Modelfile
string
`json:"modelfile"`
Modelfile
string
`json:"modelfile"`
Stream
*
bool
`json:"stream,omitempty"`
Stream
*
bool
`json:"stream,omitempty"`
Quantization
string
`json:"quantization,omitempty"`
// Name is deprecated, see Model
// Name is deprecated, see Model
Name
string
`json:"name"`
Name
string
`json:"name"`
...
...
cmd/cmd.go
View file @
c77d45d8
...
@@ -194,7 +194,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
...
@@ -194,7 +194,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return
nil
return
nil
}
}
request
:=
api
.
CreateRequest
{
Name
:
args
[
0
],
Modelfile
:
string
(
modelfile
)}
quantization
,
_
:=
cmd
.
Flags
()
.
GetString
(
"quantization"
)
request
:=
api
.
CreateRequest
{
Name
:
args
[
0
],
Modelfile
:
string
(
modelfile
),
Quantization
:
quantization
}
if
err
:=
client
.
Create
(
cmd
.
Context
(),
&
request
,
fn
);
err
!=
nil
{
if
err
:=
client
.
Create
(
cmd
.
Context
(),
&
request
,
fn
);
err
!=
nil
{
return
err
return
err
}
}
...
@@ -943,6 +945,7 @@ func NewCLI() *cobra.Command {
...
@@ -943,6 +945,7 @@ func NewCLI() *cobra.Command {
}
}
createCmd
.
Flags
()
.
StringP
(
"file"
,
"f"
,
"Modelfile"
,
"Name of the Modelfile (default
\"
Modelfile
\"
)"
)
createCmd
.
Flags
()
.
StringP
(
"file"
,
"f"
,
"Modelfile"
,
"Name of the Modelfile (default
\"
Modelfile
\"
)"
)
createCmd
.
Flags
()
.
StringP
(
"quantization"
,
"q"
,
""
,
"Quantization level."
)
showCmd
:=
&
cobra
.
Command
{
showCmd
:=
&
cobra
.
Command
{
Use
:
"show MODEL"
,
Use
:
"show MODEL"
,
...
...
llm/llm.go
View file @
c77d45d8
...
@@ -6,10 +6,81 @@ package llm
...
@@ -6,10 +6,81 @@ package llm
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
// #include <stdlib.h>
// #include "llama.h"
// #include "llama.h"
import
"C"
import
"C"
import
(
"fmt"
"unsafe"
)
// SystemInfo is an unused example of calling llama.cpp functions using CGo
// SystemInfo is an unused example of calling llama.cpp functions using CGo
func
SystemInfo
()
string
{
func
SystemInfo
()
string
{
return
C
.
GoString
(
C
.
llama_print_system_info
())
return
C
.
GoString
(
C
.
llama_print_system_info
())
}
}
func
Quantize
(
infile
,
outfile
,
filetype
string
)
error
{
cinfile
:=
C
.
CString
(
infile
)
defer
C
.
free
(
unsafe
.
Pointer
(
cinfile
))
coutfile
:=
C
.
CString
(
outfile
)
defer
C
.
free
(
unsafe
.
Pointer
(
coutfile
))
params
:=
C
.
llama_model_quantize_default_params
()
params
.
nthread
=
-
1
switch
filetype
{
case
"F32"
:
params
.
ftype
=
fileTypeF32
case
"F16"
:
params
.
ftype
=
fileTypeF16
case
"Q4_0"
:
params
.
ftype
=
fileTypeQ4_0
case
"Q4_1"
:
params
.
ftype
=
fileTypeQ4_1
case
"Q4_1_F16"
:
params
.
ftype
=
fileTypeQ4_1_F16
case
"Q8_0"
:
params
.
ftype
=
fileTypeQ8_0
case
"Q5_0"
:
params
.
ftype
=
fileTypeQ5_0
case
"Q5_1"
:
params
.
ftype
=
fileTypeQ5_1
case
"Q2_K"
:
params
.
ftype
=
fileTypeQ2_K
case
"Q3_K_S"
:
params
.
ftype
=
fileTypeQ3_K_S
case
"Q3_K_M"
:
params
.
ftype
=
fileTypeQ3_K_M
case
"Q3_K_L"
:
params
.
ftype
=
fileTypeQ3_K_L
case
"Q4_K_S"
:
params
.
ftype
=
fileTypeQ4_K_S
case
"Q4_K_M"
:
params
.
ftype
=
fileTypeQ4_K_M
case
"Q5_K_S"
:
params
.
ftype
=
fileTypeQ5_K_S
case
"Q5_K_M"
:
params
.
ftype
=
fileTypeQ5_K_M
case
"Q6_K"
:
params
.
ftype
=
fileTypeQ6_K
case
"IQ2_XXS"
:
params
.
ftype
=
fileTypeIQ2_XXS
case
"IQ2_XS"
:
params
.
ftype
=
fileTypeIQ2_XS
case
"Q2_K_S"
:
params
.
ftype
=
fileTypeQ2_K_S
case
"Q3_K_XS"
:
params
.
ftype
=
fileTypeQ3_K_XS
case
"IQ3_XXS"
:
params
.
ftype
=
fileTypeIQ3_XXS
default
:
return
fmt
.
Errorf
(
"unknown filetype: %s"
,
filetype
)
}
if
retval
:=
C
.
llama_model_quantize
(
cinfile
,
coutfile
,
&
params
);
retval
!=
0
{
return
fmt
.
Errorf
(
"llama_model_quantize: %d"
,
retval
)
}
return
nil
}
server/images.go
View file @
c77d45d8
...
@@ -284,7 +284,7 @@ func realpath(mfDir, from string) string {
...
@@ -284,7 +284,7 @@ func realpath(mfDir, from string) string {
return
abspath
return
abspath
}
}
func
CreateModel
(
ctx
context
.
Context
,
name
,
modelFileDir
string
,
commands
[]
parser
.
Command
,
fn
func
(
resp
api
.
ProgressResponse
))
error
{
func
CreateModel
(
ctx
context
.
Context
,
name
,
modelFileDir
,
quantization
string
,
commands
[]
parser
.
Command
,
fn
func
(
resp
api
.
ProgressResponse
))
error
{
deleteMap
:=
make
(
map
[
string
]
struct
{})
deleteMap
:=
make
(
map
[
string
]
struct
{})
if
manifest
,
_
,
err
:=
GetManifest
(
ParseModelPath
(
name
));
err
==
nil
{
if
manifest
,
_
,
err
:=
GetManifest
(
ParseModelPath
(
name
));
err
==
nil
{
for
_
,
layer
:=
range
append
(
manifest
.
Layers
,
manifest
.
Config
)
{
for
_
,
layer
:=
range
append
(
manifest
.
Layers
,
manifest
.
Config
)
{
...
@@ -337,8 +337,27 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
...
@@ -337,8 +337,27 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
if
ggufName
!=
""
{
if
ggufName
!=
""
{
pathName
=
ggufName
pathName
=
ggufName
slog
.
Debug
(
fmt
.
Sprintf
(
"new image layer path: %s"
,
pathName
))
defer
os
.
RemoveAll
(
ggufName
)
defer
os
.
RemoveAll
(
ggufName
)
if
quantization
!=
""
{
quantization
=
strings
.
ToUpper
(
quantization
)
fn
(
api
.
ProgressResponse
{
Status
:
fmt
.
Sprintf
(
"quantizing %s model to %s"
,
"F16"
,
quantization
)})
tempfile
,
err
:=
os
.
CreateTemp
(
filepath
.
Dir
(
ggufName
),
quantization
)
if
err
!=
nil
{
return
err
}
defer
os
.
RemoveAll
(
tempfile
.
Name
())
if
err
:=
llm
.
Quantize
(
ggufName
,
tempfile
.
Name
(),
quantization
);
err
!=
nil
{
return
err
}
if
err
:=
tempfile
.
Close
();
err
!=
nil
{
return
err
}
pathName
=
tempfile
.
Name
()
}
}
}
bin
,
err
:=
os
.
Open
(
pathName
)
bin
,
err
:=
os
.
Open
(
pathName
)
...
...
server/routes.go
View file @
c77d45d8
...
@@ -647,7 +647,7 @@ func CreateModelHandler(c *gin.Context) {
...
@@ -647,7 +647,7 @@ func CreateModelHandler(c *gin.Context) {
ctx
,
cancel
:=
context
.
WithCancel
(
c
.
Request
.
Context
())
ctx
,
cancel
:=
context
.
WithCancel
(
c
.
Request
.
Context
())
defer
cancel
()
defer
cancel
()
if
err
:=
CreateModel
(
ctx
,
model
,
filepath
.
Dir
(
req
.
Path
),
commands
,
fn
);
err
!=
nil
{
if
err
:=
CreateModel
(
ctx
,
model
,
filepath
.
Dir
(
req
.
Path
),
req
.
Quantization
,
commands
,
fn
);
err
!=
nil
{
ch
<-
gin
.
H
{
"error"
:
err
.
Error
()}
ch
<-
gin
.
H
{
"error"
:
err
.
Error
()}
}
}
}()
}()
...
@@ -913,6 +913,24 @@ func HeadBlobHandler(c *gin.Context) {
...
@@ -913,6 +913,24 @@ func HeadBlobHandler(c *gin.Context) {
}
}
func
CreateBlobHandler
(
c
*
gin
.
Context
)
{
func
CreateBlobHandler
(
c
*
gin
.
Context
)
{
path
,
err
:=
GetBlobsPath
(
c
.
Param
(
"digest"
))
if
err
!=
nil
{
c
.
AbortWithStatusJSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
_
,
err
=
os
.
Stat
(
path
)
switch
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
// noop
case
err
!=
nil
:
c
.
AbortWithStatusJSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
default
:
c
.
Status
(
http
.
StatusOK
)
return
}
layer
,
err
:=
NewLayer
(
c
.
Request
.
Body
,
""
)
layer
,
err
:=
NewLayer
(
c
.
Request
.
Body
,
""
)
if
err
!=
nil
{
if
err
!=
nil
{
c
.
AbortWithStatusJSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
c
.
AbortWithStatusJSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
...
...
server/routes_test.go
View file @
c77d45d8
...
@@ -61,7 +61,7 @@ func Test_Routes(t *testing.T) {
...
@@ -61,7 +61,7 @@ func Test_Routes(t *testing.T) {
fn
:=
func
(
resp
api
.
ProgressResponse
)
{
fn
:=
func
(
resp
api
.
ProgressResponse
)
{
t
.
Logf
(
"Status: %s"
,
resp
.
Status
)
t
.
Logf
(
"Status: %s"
,
resp
.
Status
)
}
}
err
=
CreateModel
(
context
.
TODO
(),
name
,
""
,
commands
,
fn
)
err
=
CreateModel
(
context
.
TODO
(),
name
,
""
,
""
,
commands
,
fn
)
assert
.
Nil
(
t
,
err
)
assert
.
Nil
(
t
,
err
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment