Commit 359b15a5 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Handle models with divergent layer sizes

The recent refactoring of the memory prediction assumed all layers
are the same size, but for some models (like deepseek-coder-v2) this
is not the case, so our predictions were significantly off.
parent b55958a5
package llm package llm
import ( import (
"fmt"
"log/slog" "log/slog"
"strconv" "strconv"
"strings" "strings"
...@@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts ...@@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
// For all the layers, find where they can fit on the GPU(s) // For all the layers, find where they can fit on the GPU(s)
for i := range int(ggml.KV().BlockCount()) { for i := range int(ggml.KV().BlockCount()) {
// Some models have inconsistent layer sizes
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
layerSize = blk.size()
layerSize += kv / ggml.KV().BlockCount()
}
memoryWeights += layerSize memoryWeights += layerSize
if opts.NumGPU >= 0 && layerCount >= opts.NumGPU { if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment