Unverified Commit 850da848 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

logs: fix bogus "0 MiB free" log line (#12590)

On the llama runner, after the recent GGML bump a new log line reports
incorrect 0 MiB free after our patch to remove memory from the props.  This
adjusts the llama.cpp code to fetch the actual free memory of the active device.
parent 2aba569a
...@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl( ...@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl(
for (auto * dev : model->devices) { for (auto * dev : model->devices) {
ggml_backend_dev_props props; ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props); ggml_backend_dev_get_props(dev, &props);
size_t memory_free, memory_total;
ggml_backend_dev_memory(dev, &memory_free, &memory_total);
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__, LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id", props.device_id ? props.device_id : "unknown id",
props.memory_free/1024/1024); memory_free/1024/1024);
} }
const int status = llama_model_load(path_model, splits, *model, params); const int status = llama_model_load(path_model, splits, *model, params);
......
...@@ -12,10 +12,11 @@ unused then it can be reset to free these data structures. ...@@ -12,10 +12,11 @@ unused then it can be reset to free these data structures.
ggml/src/ggml-backend.cpp | 8 ++++++++ ggml/src/ggml-backend.cpp | 8 ++++++++
ggml/src/ggml-cuda/ggml-cuda.cu | 16 +++++++++++++++- ggml/src/ggml-cuda/ggml-cuda.cu | 16 +++++++++++++++-
ggml/src/ggml-cuda/vendors/hip.h | 1 + ggml/src/ggml-cuda/vendors/hip.h | 1 +
5 files changed, 29 insertions(+), 1 deletion(-) src/llama.cpp | 4 +++-
6 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 1ff53ed0..ba181d09 100644 index 1ff53ed03..ba181d09d 100644
--- a/ggml/include/ggml-backend.h --- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h
@@ -178,6 +178,7 @@ extern "C" { @@ -178,6 +178,7 @@ extern "C" {
...@@ -27,7 +28,7 @@ index 1ff53ed0..ba181d09 100644 ...@@ -27,7 +28,7 @@ index 1ff53ed0..ba181d09 100644
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
index 3c3f22fc..43c91d9f 100644 index 3c3f22fc0..43c91d9f2 100644
--- a/ggml/src/ggml-backend-impl.h --- a/ggml/src/ggml-backend-impl.h
+++ b/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h
@@ -195,6 +195,10 @@ extern "C" { @@ -195,6 +195,10 @@ extern "C" {
...@@ -42,7 +43,7 @@ index 3c3f22fc..43c91d9f 100644 ...@@ -42,7 +43,7 @@ index 3c3f22fc..43c91d9f 100644
struct ggml_backend_device { struct ggml_backend_device {
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 6ef5eeaf..0b757af5 100644 index 6ef5eeafa..0b757af59 100644
--- a/ggml/src/ggml-backend.cpp --- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp
@@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par @@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par
...@@ -61,7 +62,7 @@ index 6ef5eeaf..0b757af5 100644 ...@@ -61,7 +62,7 @@ index 6ef5eeaf..0b757af5 100644
GGML_ASSERT(device); GGML_ASSERT(device);
return device->iface.get_buffer_type(device); return device->iface.get_buffer_type(device);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 811462c7..87c6c34a 100644 index 811462c79..87c6c34a4 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -107,6 +107,11 @@ int ggml_cuda_get_device() { @@ -107,6 +107,11 @@ int ggml_cuda_get_device() {
...@@ -109,7 +110,7 @@ index 811462c7..87c6c34a 100644 ...@@ -109,7 +110,7 @@ index 811462c7..87c6c34a 100644
// backend reg // backend reg
diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h
index 890c1036..1f06be80 100644 index 890c10364..1f06be80e 100644
--- a/ggml/src/ggml-cuda/vendors/hip.h --- a/ggml/src/ggml-cuda/vendors/hip.h
+++ b/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h
@@ -45,6 +45,7 @@ @@ -45,6 +45,7 @@
...@@ -120,3 +121,21 @@ index 890c1036..1f06be80 100644 ...@@ -120,3 +121,21 @@ index 890c1036..1f06be80 100644
#define cudaDeviceSynchronize hipDeviceSynchronize #define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaError_t hipError_t #define cudaError_t hipError_t
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
diff --git a/src/llama.cpp b/src/llama.cpp
index fe5a7a835..d821a96a0 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl(
for (auto * dev : model->devices) {
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
+ size_t memory_free, memory_total;
+ ggml_backend_dev_memory(dev, &memory_free, &memory_total);
LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id",
- props.memory_free/1024/1024);
+ memory_free/1024/1024);
}
const int status = llama_model_load(path_model, splits, *model, params);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment