Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
54dbfa4c
Commit
54dbfa4c
authored
Dec 18, 2023
by
Daniel Hiltgen
Browse files
Carry ggml-metal.metal as payload
parent
5646826a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
65 additions
and
47 deletions
+65
-47
llm/llama.go
llm/llama.go
+44
-0
llm/shim_darwin.go
llm/shim_darwin.go
+16
-2
llm/shim_ext_server.go
llm/shim_ext_server.go
+5
-45
No files found.
llm/llama.go
View file @
54dbfa4c
...
...
@@ -6,8 +6,12 @@ import (
_
"embed"
"errors"
"fmt"
"io"
"io/fs"
"log"
"os"
"os/exec"
"path/filepath"
"sync"
"time"
...
...
@@ -116,6 +120,7 @@ type ImageData struct {
var
(
errNvidiaSMI
=
errors
.
New
(
"warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed"
)
errAvailableVRAM
=
errors
.
New
(
"not enough VRAM available, falling back to CPU only"
)
payloadMissing
=
fmt
.
Errorf
(
"expected payload not included in this build of ollama"
)
)
// StatusWriter is a writer that captures error messages from the llama runner process
...
...
@@ -202,3 +207,42 @@ type EmbeddingRequest struct {
type
EmbeddingResponse
struct
{
Embedding
[]
float64
`json:"embedding"`
}
func
extractLib
(
workDir
,
glob
string
)
error
{
files
,
err
:=
fs
.
Glob
(
libEmbed
,
glob
)
if
err
!=
nil
||
len
(
files
)
==
0
{
return
payloadMissing
}
if
len
(
files
)
!=
1
{
// Shouldn't happen, but just use the first one we find
log
.
Printf
(
"WARNING: multiple payloads detected - using %s"
,
files
[
0
])
}
srcFile
,
err
:=
libEmbed
.
Open
(
files
[
0
])
if
err
!=
nil
{
return
fmt
.
Errorf
(
"read payload %s: %v"
,
files
[
0
],
err
)
}
defer
srcFile
.
Close
()
if
err
:=
os
.
MkdirAll
(
workDir
,
0
o755
);
err
!=
nil
{
return
fmt
.
Errorf
(
"create payload temp dir %s: %v"
,
workDir
,
err
)
}
destFile
:=
filepath
.
Join
(
workDir
,
filepath
.
Base
(
files
[
0
]))
_
,
err
=
os
.
Stat
(
destFile
)
switch
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
destFile
,
err
:=
os
.
OpenFile
(
destFile
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
0
o755
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"write payload %s: %v"
,
files
[
0
],
err
)
}
defer
destFile
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
srcFile
);
err
!=
nil
{
return
fmt
.
Errorf
(
"copy payload %s: %v"
,
files
[
0
],
err
)
}
case
err
!=
nil
:
return
fmt
.
Errorf
(
"stat payload %s: %v"
,
files
[
0
],
err
)
}
return
nil
}
llm/shim_darwin.go
View file @
54dbfa4c
package
llm
import
(
"embed"
"fmt"
"log"
"os"
"github.com/jmorganca/ollama/api"
)
// no-op stubs for mac
//go:embed llama.cpp/gguf/build/*/bin/ggml-metal.metal
var
libEmbed
embed
.
FS
func
newRocmShimExtServer
(
model
string
,
adapters
,
projectors
[]
string
,
numLayers
int64
,
opts
api
.
Options
)
(
extServer
,
error
)
{
// should never happen...
return
nil
,
fmt
.
Errorf
(
"ROCM GPUs not supported on Mac"
)
}
func
nativeInit
(
workDir
string
)
error
{
func
nativeInit
(
workdir
string
)
error
{
err
:=
extractLib
(
workdir
,
"llama.cpp/gguf/build/*/bin/ggml-metal.metal"
)
if
err
!=
nil
{
if
err
==
payloadMissing
{
// TODO perhaps consider this a hard failure on arm macs?
log
.
Printf
(
"ggml-meta.metal payload missing"
)
return
nil
}
return
err
}
os
.
Setenv
(
"GGML_METAL_PATH_RESOURCES"
,
workdir
)
return
nil
}
llm/shim_ext_server.go
View file @
54dbfa4c
...
...
@@ -14,7 +14,6 @@ import (
"embed"
"errors"
"fmt"
"io"
"io/fs"
"log"
"os"
...
...
@@ -109,13 +108,15 @@ func (llm *shimExtServer) Close() {
}
func
nativeInit
(
workdir
string
)
error
{
err
:=
extractLib
(
workdir
)
err
:=
extractLib
(
workdir
,
"llama.cpp/gguf/build/*/lib/*rocm_server*"
)
if
err
!=
nil
{
if
err
==
RocmShim
Missing
{
log
.
Printf
(
"%s"
,
err
)
if
err
==
payload
Missing
{
log
.
Printf
(
"%s"
,
RocmShimMissing
)
return
nil
}
return
err
}
else
{
ShimPresent
=
true
}
// Verify we have permissions - either running as root, or we have group access to the driver
...
...
@@ -168,44 +169,3 @@ func nativeInit(workdir string) error {
}
return
nil
}
func
extractLib
(
workDir
string
)
error
{
files
,
err
:=
fs
.
Glob
(
libEmbed
,
"llama.cpp/gguf/build/*/lib/*rocm_server*"
)
if
err
!=
nil
||
len
(
files
)
==
0
{
// this is expected, ollama may be compiled without shim library packed in
return
RocmShimMissing
}
if
len
(
files
)
!=
1
{
// Shouldn't happen, but just use the first one we find
log
.
Printf
(
"WARNING: multiple rocm libraries detected - using %s"
,
files
[
0
])
}
srcFile
,
err
:=
libEmbed
.
Open
(
files
[
0
])
if
err
!=
nil
{
return
fmt
.
Errorf
(
"read ROCm shim %s: %v"
,
files
[
0
],
err
)
}
defer
srcFile
.
Close
()
if
err
:=
os
.
MkdirAll
(
workDir
,
0
o755
);
err
!=
nil
{
return
fmt
.
Errorf
(
"create ROCm shim temp dir %s: %v"
,
workDir
,
err
)
}
destFile
:=
filepath
.
Join
(
workDir
,
filepath
.
Base
(
files
[
0
]))
_
,
err
=
os
.
Stat
(
destFile
)
switch
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
destFile
,
err
:=
os
.
OpenFile
(
destFile
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
0
o755
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"write ROCm shim %s: %v"
,
files
[
0
],
err
)
}
defer
destFile
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
srcFile
);
err
!=
nil
{
return
fmt
.
Errorf
(
"copy ROCm shim %s: %v"
,
files
[
0
],
err
)
}
case
err
!=
nil
:
return
fmt
.
Errorf
(
"stat ROCm shim %s: %v"
,
files
[
0
],
err
)
}
ShimPresent
=
true
return
nil
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment