Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
2dd040d0
Commit
2dd040d0
authored
Dec 09, 2023
by
Jeffrey Morgan
Browse files
do not use `--parallel 2` for old runners
parent
bbe41ce4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
9 deletions
+16
-9
llm/llama.go
llm/llama.go
+16
-9
No files found.
llm/llama.go
View file @
2dd040d0
...
...
@@ -59,6 +59,7 @@ ws ::= ([ \t\n] ws)?
var
llamaCppEmbed
embed
.
FS
type
ModelRunner
struct
{
Type
string
// "gguf" or "ggml"
Path
string
// path to the model runner executable
Accelerated
bool
}
...
...
@@ -72,25 +73,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
switch
runtime
.
GOOS
{
case
"darwin"
:
if
runtime
.
GOARCH
==
"arm64"
{
runners
=
[]
ModelRunner
{{
Path
:
path
.
Join
(
buildPath
,
"metal"
,
"bin"
,
"ollama-runner"
)}}
runners
=
[]
ModelRunner
{{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"metal"
,
"bin"
,
"ollama-runner"
)}}
}
else
{
runners
=
[]
ModelRunner
{{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)}}
runners
=
[]
ModelRunner
{{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)}}
}
case
"linux"
:
runners
=
[]
ModelRunner
{
{
Path
:
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"ollama-runner"
),
Accelerated
:
true
},
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)},
{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"ollama-runner"
),
Accelerated
:
true
},
{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)},
}
case
"windows"
:
// TODO: select windows GPU runner here when available
runners
=
[]
ModelRunner
{
{
Path
:
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
),
Accelerated
:
true
},
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
)},
{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
),
Accelerated
:
true
},
{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
)},
}
default
:
log
.
Printf
(
"unknown OS, running on CPU: %s"
,
runtime
.
GOOS
)
runners
=
[]
ModelRunner
{
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)},
{
Type
:
runnerType
,
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)},
}
}
...
...
@@ -148,6 +149,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
for
_
,
r
:=
range
runners
{
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority
=
append
(
localRunnersByPriority
,
ModelRunner
{
Type
:
r
.
Type
,
Path
:
filepath
.
Clean
(
path
.
Join
(
workDir
,
r
.
Path
)),
Accelerated
:
r
.
Accelerated
,
})
...
...
@@ -341,7 +343,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
"--ctx-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumCtx
),
"--batch-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumBatch
),
"--n-gpu-layers"
,
fmt
.
Sprintf
(
"%d"
,
numGPU
),
"--parallel"
,
"2"
,
"--embedding"
,
}
...
...
@@ -403,11 +404,17 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
}
port
:=
rand
.
Intn
(
65535
-
49152
)
+
49152
// get a random port in the ephemeral range
params
:=
append
(
params
,
"--port"
,
strconv
.
Itoa
(
port
))
if
runner
.
Type
==
"gguf"
{
params
=
append
(
params
,
"--parallel"
,
"2"
)
}
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
cmd
:=
exec
.
CommandContext
(
ctx
,
runner
.
Path
,
append
(
params
,
"--port"
,
strconv
.
Itoa
(
port
))
...
,
params
...
,
)
var
libraryPaths
[]
string
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment