"vscode:/vscode.git/clone" did not exist on "ca2e6ae0bbf3d25e6355c469635778f0deecb571"
Commit 5445aaa9 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Add back memory escape valve

If we get our predictions wrong, this can be used to
set a lower memory limit as a workaround.  Recent multi-gpu
refactoring accidentally removed it, so this adds it back.
parent 2ac3dd68
...@@ -3,6 +3,8 @@ package llm ...@@ -3,6 +3,8 @@ package llm
import ( import (
"fmt" "fmt"
"log/slog" "log/slog"
"os"
"strconv"
"strings" "strings"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
...@@ -49,6 +51,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts ...@@ -49,6 +51,17 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
for _, info := range gpus { for _, info := range gpus {
memoryAvailable += info.FreeMemory memoryAvailable += info.FreeMemory
} }
userLimit := os.Getenv("OLLAMA_MAX_VRAM")
if userLimit != "" {
avail, err := strconv.ParseUint(userLimit, 10, 64)
if err != nil {
slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
} else {
slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable)
memoryAvailable = avail
}
}
slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable)) slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))
// TODO - this is probably wrong, first GPU vs secondaries will have different overheads // TODO - this is probably wrong, first GPU vs secondaries will have different overheads
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment