".github/vscode:/vscode.git/clone" did not exist on "12949c8acd9120a5c9d75a6b53ce667f586a82de"
Unverified Commit 0ee87615 authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

sched: don't error if paging to disk on Windows and macOS (#5523)

parent f8241bfb
...@@ -197,9 +197,12 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -197,9 +197,12 @@ func (s *Scheduler) processPending(ctx context.Context) {
break break
} }
// Block attempting to load a model larger than system memory + GPU memory
estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts) estimate := llm.EstimateGPULayers(gpus, ggml, pending.model.ProjectorPaths, pending.opts)
maxSize := systemMem.FreeMemory maxSize := systemMem.FreeMemory
// Add available GPU memory to the total pool
// macOS hardware has unified memory so don't double count
if runtime.GOOS != "darwin" {
for _, gpu := range gpus { for _, gpu := range gpus {
if gpu.Library == "cpu" { if gpu.Library == "cpu" {
continue continue
...@@ -212,11 +215,19 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -212,11 +215,19 @@ func (s *Scheduler) processPending(ctx context.Context) {
maxSize += gpu.TotalMemory maxSize += gpu.TotalMemory
} }
} }
}
// Block attempting to load a model larger than system memory + GPU memory
if estimate.TotalSize > maxSize { if estimate.TotalSize > maxSize {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize)) slog.Warn("model request too large for system", "requested", format.HumanBytes2(estimate.TotalSize), "system", format.HumanBytes2(maxSize))
// Linux will crash if over-allocating memory - return an error to the user.
// TODO (jmorganca): add reasonable upper limits for darwin and windows as well
if runtime.GOOS == "linux" {
pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize)) pending.errCh <- fmt.Errorf("requested model (%s) is too large for this system (%s)", format.HumanBytes2(estimate.TotalSize), format.HumanBytes2(maxSize))
break break
} }
}
// Evaluate if the model will fit in the available system memory, or if we should unload a model first // Evaluate if the model will fit in the available system memory, or if we should unload a model first
if len(gpus) == 1 && gpus[0].Library == "cpu" { if len(gpus) == 1 && gpus[0].Library == "cpu" {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment