Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0ee87615
Unverified
Commit
0ee87615
authored
Jul 06, 2024
by
Jeffrey Morgan
Committed by
GitHub
Jul 06, 2024
Browse files
sched: don't error if paging to disk on Windows and macOS (#5523)
parent
f8241bfb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
13 deletions
+24
-13
server/sched.go
server/sched.go
+24
-13
No files found.
server/sched.go
View file @
0ee87615
...
@@ -197,25 +197,36 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -197,25 +197,36 @@ func (s *Scheduler) processPending(ctx context.Context) {
break
break
}
}
// Block attempting to load a model larger than system memory + GPU memory
estimate
:=
llm
.
EstimateGPULayers
(
gpus
,
ggml
,
pending
.
model
.
ProjectorPaths
,
pending
.
opts
)
estimate
:=
llm
.
EstimateGPULayers
(
gpus
,
ggml
,
pending
.
model
.
ProjectorPaths
,
pending
.
opts
)
maxSize
:=
systemMem
.
FreeMemory
maxSize
:=
systemMem
.
FreeMemory
for
_
,
gpu
:=
range
gpus
{
if
gpu
.
Library
==
"cpu"
{
// Add available GPU memory to the total pool
continue
// macOS hardware has unified memory so don't double count
}
if
runtime
.
GOOS
!=
"darwin"
{
if
loadedCount
==
0
{
for
_
,
gpu
:=
range
gpus
{
// If no other models are loaded, set the limit based on what's available
if
gpu
.
Library
==
"cpu"
{
maxSize
+=
gpu
.
FreeMemory
continue
}
else
{
}
// Other models could be unloaded, favor total memory for limit
if
loadedCount
==
0
{
maxSize
+=
gpu
.
TotalMemory
// If no other models are loaded, set the limit based on what's available
maxSize
+=
gpu
.
FreeMemory
}
else
{
// Other models could be unloaded, favor total memory for limit
maxSize
+=
gpu
.
TotalMemory
}
}
}
}
}
// Block attempting to load a model larger than system memory + GPU memory
if
estimate
.
TotalSize
>
maxSize
{
if
estimate
.
TotalSize
>
maxSize
{
slog
.
Warn
(
"model request too large for system"
,
"requested"
,
format
.
HumanBytes2
(
estimate
.
TotalSize
),
"system"
,
format
.
HumanBytes2
(
maxSize
))
slog
.
Warn
(
"model request too large for system"
,
"requested"
,
format
.
HumanBytes2
(
estimate
.
TotalSize
),
"system"
,
format
.
HumanBytes2
(
maxSize
))
pending
.
errCh
<-
fmt
.
Errorf
(
"requested model (%s) is too large for this system (%s)"
,
format
.
HumanBytes2
(
estimate
.
TotalSize
),
format
.
HumanBytes2
(
maxSize
))
break
// Linux will crash if over-allocating memory - return an error to the user.
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
if
runtime
.
GOOS
==
"linux"
{
pending
.
errCh
<-
fmt
.
Errorf
(
"requested model (%s) is too large for this system (%s)"
,
format
.
HumanBytes2
(
estimate
.
TotalSize
),
format
.
HumanBytes2
(
maxSize
))
break
}
}
}
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
// Evaluate if the model will fit in the available system memory, or if we should unload a model first
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment