Commit 26df6747 authored by Michael Yang's avatar Michael Yang
Browse files

scale graph based on gpu count

parent 7c9792a6
...@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) { ...@@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
return memInfo{ return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()), TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0, FreeMemory: 0,
DeviceCount: 0, DeviceCount: 1,
}, nil }, nil
} }
...@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option ...@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload graphFullOffload = graphPartialOffload
} }
graphFullOffload *= uint64(info.DeviceCount)
graphPartialOffload *= uint64(info.DeviceCount)
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers) // memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload memoryRequiredTotal := memoryMinimum + graphFullOffload
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment