better estimate scratch buffer size

58ce2d82 · Jeffrey Morgan · 18ddf6d5 · 58ce2d82
Commit 58ce2d82 authored Jan 08, 2024 by Jeffrey Morgan
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

llm/llm.go llm/llm.go +2 -2

No files found.
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	// this amount is the overhead + tensors in memory
 	// TODO: get this from the llama.cpp's graph calcluations instead of
-	// guessing it's ~1/7th of the kv cache times gqa
+	// estimating it's 1/6 * kv_cache_size * num_gqa
-	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7
+	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6
 	requiredTotal := requiredModel + requiredKv + requiredAlloc