Implement linux NUMA detection

If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode.

Implement linux NUMA detection
If the system has multiple numa nodes, enable numa support in llama.cpp If we detect numactl in the path, use that, else use the basic "distribute" mode.
f457d634 · Daniel Hiltgen · 39f2bc6b · f457d634 · f457d634 · f457d634
Commit f457d634 authored Aug 05, 2024 by Daniel Hiltgen
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 4 deletions

api/types.go api/types.go +0 -2

gpu/cpu_common.go gpu/cpu_common.go +21 -0

llm/server.go llm/server.go +8 -2

No files found.
--- a/api/types.go
+++ b/api/types.go
@@ -231,7 +231,6 @@ type Options struct {

 // Runner options which must be set when the model is loaded into memory
 type Runner struct {
-	UseNUMA   bool  `json:"numa,omitempty"`
 	NumCtx    int   `json:"num_ctx,omitempty"`
 	NumBatch  int   `json:"num_batch,omitempty"`
 	NumGPU    int   `json:"num_gpu,omitempty"`
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
 			F16KV:     true,
 			UseMLock:  false,
 			UseMMap:   nil,
-			UseNUMA:   false,
 		},
 	}
 }

--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
 package gpu

 import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+
 	"golang.org/x/sys/cpu"
 )

@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
 	// else LCD
 	return CPUCapabilityNone
 }
+
+func IsNUMA() bool {
+	if runtime.GOOS != "linux" {
+		// numa support in llama.cpp is linux only
+		return false
+	}
+	ids := map[string]interface{}{}
+	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
+	for _, packageId := range packageIds {
+		id, err := os.ReadFile(packageId)
+		if err == nil {
+			ids[strings.TrimSpace(string(id))] = struct{}{}
+		}
+	}
+	return len(ids) > 1
+}
--- a/llm/server.go
+++ b/llm/server.go
@@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		params = append(params, "--mlock")
 	}

-	if opts.UseNUMA {
-		params = append(params, "--numa")
+	if gpu.IsNUMA() {
+		numaMode := "distribute"
+		if runtime.GOOS == "linux" {
+			if _, err := exec.LookPath("numactl"); err == nil {
+				numaMode = "numactl"
+			}
+		}
+		params = append(params, "--numa", numaMode)
 	}

 	params = append(params, "--parallel", strconv.Itoa(numParallel))