Unverified Commit 50ee8b5f authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #6186 from dhiltgen/numa

Implement linux NUMA detection
parents 03bdac05 f457d634
......@@ -231,7 +231,6 @@ type Options struct {
// Runner options which must be set when the model is loaded into memory
type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
......@@ -615,7 +614,6 @@ func DefaultOptions() Options {
F16KV: true,
UseMLock: false,
UseMMap: nil,
UseNUMA: false,
},
}
}
......
package gpu
import (
"os"
"path/filepath"
"runtime"
"strings"
"golang.org/x/sys/cpu"
)
......@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
// else LCD
return CPUCapabilityNone
}
func IsNUMA() bool {
if runtime.GOOS != "linux" {
// numa support in llama.cpp is linux only
return false
}
ids := map[string]interface{}{}
packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
for _, packageId := range packageIds {
id, err := os.ReadFile(packageId)
if err == nil {
ids[strings.TrimSpace(string(id))] = struct{}{}
}
}
return len(ids) > 1
}
......@@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--mlock")
}
if opts.UseNUMA {
params = append(params, "--numa")
if gpu.IsNUMA() {
numaMode := "distribute"
if runtime.GOOS == "linux" {
if _, err := exec.LookPath("numactl"); err == nil {
numaMode = "numactl"
}
}
params = append(params, "--numa", numaMode)
}
params = append(params, "--parallel", strconv.Itoa(numParallel))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment