Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
35afac09
Commit
35afac09
authored
Oct 13, 2023
by
Michael Yang
Browse files
do not use gpu binary when num_gpu == 0
parent
811c3d19
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
15 deletions
+28
-15
llm/llama.go
llm/llama.go
+28
-15
No files found.
llm/llama.go
View file @
35afac09
...
@@ -30,42 +30,43 @@ import (
...
@@ -30,42 +30,43 @@ import (
var
llamaCppEmbed
embed
.
FS
var
llamaCppEmbed
embed
.
FS
type
ModelRunner
struct
{
type
ModelRunner
struct
{
Path
string
// path to the model runner executable
Path
string
// path to the model runner executable
Accelerated
bool
}
}
func
chooseRunners
(
workDir
,
runnerType
string
)
[]
ModelRunner
{
func
chooseRunners
(
workDir
,
runnerType
string
)
[]
ModelRunner
{
buildPath
:=
path
.
Join
(
"llama.cpp"
,
runnerType
,
"build"
)
buildPath
:=
path
.
Join
(
"llama.cpp"
,
runnerType
,
"build"
)
var
runners
[]
string
var
runners
[]
ModelRunner
// set the runners based on the OS
// set the runners based on the OS
// IMPORTANT: the order of the runners in the array is the priority order
// IMPORTANT: the order of the runners in the array is the priority order
switch
runtime
.
GOOS
{
switch
runtime
.
GOOS
{
case
"darwin"
:
case
"darwin"
:
runners
=
[]
string
{
runners
=
[]
ModelRunner
{
path
.
Join
(
buildPath
,
"metal"
,
"bin"
,
"ollama-runner"
),
{
Path
:
path
.
Join
(
buildPath
,
"metal"
,
"bin"
,
"ollama-runner"
)
}
,
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
),
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)
}
,
}
}
case
"linux"
:
case
"linux"
:
runners
=
[]
string
{
runners
=
[]
ModelRunner
{
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"ollama-runner"
),
{
Path
:
path
.
Join
(
buildPath
,
"cuda"
,
"bin"
,
"ollama-runner"
),
Accelerated
:
true
},
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
),
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)
}
,
}
}
case
"windows"
:
case
"windows"
:
// TODO: select windows GPU runner here when available
// TODO: select windows GPU runner here when available
runners
=
[]
string
{
runners
=
[]
ModelRunner
{
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
),
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"Release"
,
"ollama-runner.exe"
)
}
,
}
}
default
:
default
:
log
.
Printf
(
"unknown OS, running on CPU: %s"
,
runtime
.
GOOS
)
log
.
Printf
(
"unknown OS, running on CPU: %s"
,
runtime
.
GOOS
)
runners
=
[]
string
{
runners
=
[]
ModelRunner
{
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
),
{
Path
:
path
.
Join
(
buildPath
,
"cpu"
,
"bin"
,
"ollama-runner"
)
}
,
}
}
}
}
runnerAvailable
:=
false
// if no runner files are found in the embed, this flag will cause a fast fail
runnerAvailable
:=
false
// if no runner files are found in the embed, this flag will cause a fast fail
for
_
,
r
:=
range
runners
{
for
_
,
r
:=
range
runners
{
// find all the files in the runner's bin directory
// find all the files in the runner's bin directory
files
,
err
:=
fs
.
Glob
(
llamaCppEmbed
,
path
.
Join
(
path
.
Dir
(
r
),
"*"
))
files
,
err
:=
fs
.
Glob
(
llamaCppEmbed
,
path
.
Join
(
path
.
Dir
(
r
.
Path
),
"*"
))
if
err
!=
nil
{
if
err
!=
nil
{
// this is expected, ollama may be compiled without all runners packed in
// this is expected, ollama may be compiled without all runners packed in
log
.
Printf
(
"%s runner not found: %v"
,
r
,
err
)
log
.
Printf
(
"%s runner not found: %v"
,
r
,
err
)
...
@@ -115,7 +116,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
...
@@ -115,7 +116,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
localRunnersByPriority
:=
[]
ModelRunner
{}
localRunnersByPriority
:=
[]
ModelRunner
{}
for
_
,
r
:=
range
runners
{
for
_
,
r
:=
range
runners
{
// clean the ModelRunner paths so that they match the OS we are running on
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority
=
append
(
localRunnersByPriority
,
ModelRunner
{
Path
:
filepath
.
Clean
(
path
.
Join
(
workDir
,
r
))})
localRunnersByPriority
=
append
(
localRunnersByPriority
,
ModelRunner
{
Path
:
filepath
.
Clean
(
path
.
Join
(
workDir
,
r
.
Path
)),
Accelerated
:
r
.
Accelerated
,
})
}
}
return
localRunnersByPriority
return
localRunnersByPriority
...
@@ -282,16 +286,20 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
...
@@ -282,16 +286,20 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
}
}
numGPU
:=
NumGPU
(
numLayers
,
fileInfo
.
Size
(),
opts
)
params
:=
[]
string
{
params
:=
[]
string
{
"--model"
,
model
,
"--model"
,
model
,
"--ctx-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumCtx
),
"--ctx-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumCtx
),
"--rope-freq-base"
,
fmt
.
Sprintf
(
"%f"
,
opts
.
RopeFrequencyBase
),
"--rope-freq-base"
,
fmt
.
Sprintf
(
"%f"
,
opts
.
RopeFrequencyBase
),
"--rope-freq-scale"
,
fmt
.
Sprintf
(
"%f"
,
opts
.
RopeFrequencyScale
),
"--rope-freq-scale"
,
fmt
.
Sprintf
(
"%f"
,
opts
.
RopeFrequencyScale
),
"--batch-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumBatch
),
"--batch-size"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumBatch
),
"--n-gpu-layers"
,
fmt
.
Sprintf
(
"%d"
,
NumGPU
(
numLayers
,
fileInfo
.
Size
(),
opts
)),
"--embedding"
,
"--embedding"
,
}
}
if
numGPU
>
0
{
params
=
append
(
params
,
"--n-gpu-layers"
,
fmt
.
Sprintf
(
"%d"
,
numGPU
))
}
if
opts
.
NumGQA
>
0
{
if
opts
.
NumGQA
>
0
{
params
=
append
(
params
,
"--gqa"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumGQA
))
params
=
append
(
params
,
"--gqa"
,
fmt
.
Sprintf
(
"%d"
,
opts
.
NumGQA
))
}
}
...
@@ -322,6 +330,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
...
@@ -322,6 +330,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
// start the llama.cpp server with a retry in case the port is already in use
// start the llama.cpp server with a retry in case the port is already in use
for
_
,
runner
:=
range
runners
{
for
_
,
runner
:=
range
runners
{
if
runner
.
Accelerated
&&
numGPU
==
0
{
log
.
Printf
(
"skipping accelerated runner because num_gpu=0"
)
continue
}
if
_
,
err
:=
os
.
Stat
(
runner
.
Path
);
err
!=
nil
{
if
_
,
err
:=
os
.
Stat
(
runner
.
Path
);
err
!=
nil
{
log
.
Printf
(
"llama runner not found: %v"
,
err
)
log
.
Printf
(
"llama runner not found: %v"
,
err
)
continue
continue
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment