Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
f387e963
"git@developer.sourcefind.cn:modelzoo/lpr.git" did not exist on "9adcf60dc6d3aebb22fcf4a20ed00e1f1bdb702d"
Commit
f387e963
authored
Jan 09, 2024
by
Jeffrey Morgan
Browse files
use runner if cuda alloc won't fit
parent
6566387a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
6 deletions
+3
-6
llm/llm.go
llm/llm.go
+3
-6
No files found.
llm/llm.go
View file @
f387e963
...
@@ -100,10 +100,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
...
@@ -100,10 +100,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
break
break
}
}
// This handles two cases:
// requiredAlloc is always loaded for the CUDA runner, so don't load it if it won't fit
// 1. overhead + tensors are always loaded into scratch memory even with num_gpu 0
if
requiredAlloc
>
available
{
// 2. it seems llama.cpp always tries to allocate the entire kv cache (even if later split into layers) into vram or crashes
if
requiredAlloc
>
available
||
requiredKv
>
available
{
log
.
Printf
(
"not enough vram available, falling back to CPU only"
)
log
.
Printf
(
"not enough vram available, falling back to CPU only"
)
library
=
"cpu"
library
=
"cpu"
opts
.
NumGPU
=
0
opts
.
NumGPU
=
0
...
@@ -127,8 +125,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
...
@@ -127,8 +125,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts
.
NumGQA
=
0
opts
.
NumGQA
=
0
opts
.
RopeFrequencyBase
=
0.0
opts
.
RopeFrequencyBase
=
0.0
opts
.
RopeFrequencyScale
=
0.0
opts
.
RopeFrequencyScale
=
0.0
gpuInfo
:=
gpu
.
GetGPUInfo
()
return
newLlmServer
(
library
,
model
,
adapters
,
projectors
,
opts
)
return
newLlmServer
(
gpuInfo
.
Library
,
model
,
adapters
,
projectors
,
opts
)
}
}
// Give any native cgo implementations an opportunity to initialize
// Give any native cgo implementations an opportunity to initialize
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment