Unverified Commit f0c454ab authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

gpu: add 512MiB to darwin minimum, metal doesn't have partial offloading overhead (#4068)

parent b9f74ff3
......@@ -10,6 +10,12 @@ package gpu
import "C"
import (
"runtime"
"github.com/ollama/ollama/format"
)
const (
metalMinimumMemory = 512 * format.MebiByte
)
func GetGPUInfo() GpuInfoList {
......@@ -32,7 +38,7 @@ func GetGPUInfo() GpuInfoList {
// TODO is there a way to gather actual allocated video memory? (currentAllocatedSize doesn't work)
info.FreeMemory = info.TotalMemory
info.MinimumMemory = 0
info.MinimumMemory = metalMinimumMemory
return []GpuInfo{info}
}
......
......@@ -88,6 +88,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
graphFullOffload *= uint64(len(gpus))
graphPartialOffload *= uint64(len(gpus))
// on metal there's no partial offload overhead
if gpus[0].Library == "metal" {
graphPartialOffload = graphFullOffload
}
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment