Commit 58ce2d82 authored by Jeffrey Morgan's avatar Jeffrey Morgan
Browse files

better estimate scratch buffer size

parent 18ddf6d5
......@@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
// this amount is the overhead + tensors in memory
// TODO: get this from the llama.cpp's graph calcluations instead of
// guessing it's ~1/7th of the kv cache times gqa
requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7
// estimating it's 1/6 * kv_cache_size * num_gqa
requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6
requiredTotal := requiredModel + requiredKv + requiredAlloc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment