Commit f457d634 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Implement linux NUMA detection

If the system has multiple numa nodes, enable numa support in llama.cpp
If we detect numactl in the path, use that, else use the basic "distribute" mode.
parent 39f2bc6b
......@@ -231,7 +231,6 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory
type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
......@@ -615,7 +614,6 @@ func DefaultOptions() Options {
F16KV: true,
UseMLock: false,
UseMMap: nil,
UseNUMA: false,
},
}
}
......
package gpu
import (
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu"
)
......@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
// else LCD
return CPUCapabilityNone
}
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
}
......@@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock")
}
if opts.UseNUMA {
params = append(params, "--numa")
if gpu.IsNUMA() {
numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
numaMode = "numactl"
}
}
params = append(params, "--numa", numaMode)
}
params = append(params, "--parallel", strconv.Itoa(numParallel))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment