Unverified Commit fcec04bf authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

gptoss: fix memory calc (#11700)

parent ee92ca3e
...@@ -676,7 +676,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri ...@@ -676,7 +676,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
} }
} }
fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6 fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
partialOffload = 2 * fullOffload partialOffload = fullOffload
} }
return return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment