ggml: Use assigned layers when reporting loading stats

Reporting params.NumGPULayers can be misleading because it is the requested number of layers, not the actual number that is loaded. While they are often the same, there are cases where they might mismatch, such as if the GPU backend is missing.

ggml: Use assigned layers when reporting loading stats
Reporting params.NumGPULayers can be misleading because it is the requested number of layers, not the actual number that is loaded. While they are often the same, there are cases where they might mismatch, such as if the GPU backend is missing.
acef9b4c · Jesse Gross · Jesse Gross · 9a43994c · acef9b4c
Commit acef9b4c authored Jul 07, 2025 by Jesse Gross Committed by Jesse Gross Jul 11, 2025
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 9 deletions

ml/backend/ggml/ggml.go ml/backend/ggml/ggml.go +11 -9

No files found.
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 	}

 	// Mimic llama runner logs summarizing layers and memory
-	slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
 	gpuLayers := 0
+	for _, layer := range layers {
+		if C.ggml_backend_dev_type(layer.d) == C.GGML_BACKEND_DEVICE_TYPE_GPU {
+			gpuLayers++
+		}
+	}
+	slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", gpuLayers))
+
 	switch C.ggml_backend_dev_type(output.d) {
-	case 0: // CPU
+	case C.GGML_BACKEND_DEVICE_TYPE_CPU:
 		slog.Info("offloading output layer to CPU")
-	case 1: // GPU
+	case C.GGML_BACKEND_DEVICE_TYPE_GPU:
 		slog.Info("offloading output layer to GPU")
 		gpuLayers++
-	case 2: // ACCEL
+	case C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
 		slog.Info("offloading output layer to ACCEL")
 	}
-	for _, layer := range layers {
-		if C.ggml_backend_dev_type(layer.d) == 1 {
-			gpuLayers++
-		}
-	}
 	slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
+
 	for bs := range maps.Values(bbs) {
 		slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
 	}