Commit acef9b4c authored by Jesse Gross's avatar Jesse Gross Committed by Jesse Gross
Browse files

ggml: Use assigned layers when reporting loading stats

Reporting params.NumGPULayers can be misleading because it is the
requested number of layers, not the actual number that is loaded.
While they are often the same, there are cases where they might mismatch,
such as if the GPU backend is missing.
parent 9a43994c
...@@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { ...@@ -356,23 +356,25 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
} }
// Mimic llama runner logs summarizing layers and memory // Mimic llama runner logs summarizing layers and memory
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", max(0, params.NumGPULayers-1)))
gpuLayers := 0 gpuLayers := 0
for _, layer := range layers {
if C.ggml_backend_dev_type(layer.d) == C.GGML_BACKEND_DEVICE_TYPE_GPU {
gpuLayers++
}
}
slog.Info(fmt.Sprintf("offloading %d repeating layers to GPU", gpuLayers))
switch C.ggml_backend_dev_type(output.d) { switch C.ggml_backend_dev_type(output.d) {
case 0: // CPU case C.GGML_BACKEND_DEVICE_TYPE_CPU:
slog.Info("offloading output layer to CPU") slog.Info("offloading output layer to CPU")
case 1: // GPU case C.GGML_BACKEND_DEVICE_TYPE_GPU:
slog.Info("offloading output layer to GPU") slog.Info("offloading output layer to GPU")
gpuLayers++ gpuLayers++
case 2: // ACCEL case C.GGML_BACKEND_DEVICE_TYPE_ACCEL:
slog.Info("offloading output layer to ACCEL") slog.Info("offloading output layer to ACCEL")
} }
for _, layer := range layers {
if C.ggml_backend_dev_type(layer.d) == 1 {
gpuLayers++
}
}
slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1)) slog.Info(fmt.Sprintf("offloaded %d/%d layers to GPU", gpuLayers, len(layers)+1))
for bs := range maps.Values(bbs) { for bs := range maps.Values(bbs) {
slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs)))) slog.Info("model weights", "buffer", C.GoString(C.ggml_backend_buffer_name(bs)), "size", format.HumanBytes2(uint64(C.ggml_backend_buffer_get_size(bs))))
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment