Unverified Commit 63269668 authored by frob's avatar frob Committed by GitHub
Browse files

Prevent underflow when FreeMemory < overhead (#8014)


Co-authored-by: default avatarRichard Lyons <frob@cloudstaff.com>
parent 900f64e6
...@@ -182,7 +182,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, ...@@ -182,7 +182,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
gzo = gpuZeroOverhead gzo = gpuZeroOverhead
} }
// Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer // Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer
if (gpus[i].FreeMemory - overhead) < gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize { if gpus[i].FreeMemory < overhead+gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize {
slog.Debug("gpu has too little memory to allocate any layers", slog.Debug("gpu has too little memory to allocate any layers",
"id", gpus[i].ID, "id", gpus[i].ID,
"library", gpus[i].Library, "library", gpus[i].Library,
...@@ -228,7 +228,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, ...@@ -228,7 +228,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
for j := len(gpusWithSpace); j > 0; j-- { for j := len(gpusWithSpace); j > 0; j-- {
g := gpusWithSpace[i%j] g := gpusWithSpace[i%j]
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload) used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
if (g.g.FreeMemory - overhead) > used+layerSize { if g.g.FreeMemory > overhead+used+layerSize {
gpuAllocations[g.i] += layerSize gpuAllocations[g.i] += layerSize
layerCounts[g.i]++ layerCounts[g.i]++
layerCount++ layerCount++
...@@ -251,7 +251,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, ...@@ -251,7 +251,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
for j := len(gpusWithSpace); j > 0; j-- { for j := len(gpusWithSpace); j > 0; j-- {
g := gpusWithSpace[layerCount%j] g := gpusWithSpace[layerCount%j]
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload) used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
if (g.g.FreeMemory - overhead) > used+memoryLayerOutput { if g.g.FreeMemory > overhead+used+memoryLayerOutput {
gpuAllocations[g.i] += memoryLayerOutput gpuAllocations[g.i] += memoryLayerOutput
layerCounts[g.i]++ layerCounts[g.i]++
layerCount++ layerCount++
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment