Unverified Commit 879e2caf authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #4329 from dhiltgen/zero_layers

Fall back to CPU runner with zero layers
parents be9efdb9 c4014e73
......@@ -105,6 +105,10 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
// disable partial offloading when model is greater than total system memory as this
// can lead to locking up the system
opts.NumGPU = 0
} else if gpus[0].Library != "metal" && layers == 0 {
// Don't bother loading into the GPU if no layers can fit
cpuRunner = serverForCpu()
gpuCount = 0
} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
opts.NumGPU = layers
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment