Merge pull request #5117 from dhiltgen/fix_prediction

Handle models with divergent layer sizes

Merge pull request #5117 from dhiltgen/fix_prediction
Handle models with divergent layer sizes
26d0bf92 · Daniel Hiltgen · GitHub · b55958a5 · 359b15a5 · 26d0bf92
Unverified Commit 26d0bf92 authored Jun 18, 2024 by Daniel Hiltgen Committed by GitHub Jun 18, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

llm/memory.go llm/memory.go +6 -0

No files found.
--- a/llm/memory.go
+++ b/llm/memory.go
 package llm

 import (
+	"fmt"
 	"log/slog"
 	"strconv"
 	"strings"
@@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts

 	// For all the layers, find where they can fit on the GPU(s)
 	for i := range int(ggml.KV().BlockCount()) {
+		// Some models have inconsistent layer sizes
+		if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
+			layerSize = blk.size()
+			layerSize += kv / ggml.KV().BlockCount()
+		}
 		memoryWeights += layerSize

 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {