0014-graph-memory-reporting-on-failure.patch 6.08 KB
Newer Older
1
2
3
4
5
6
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Fri, 18 Apr 2025 15:58:19 -0700
Subject: [PATCH] graph memory reporting on failure

---
7
8
 ggml/include/ggml-alloc.h   |  1 +
 ggml/include/ggml-backend.h |  1 +
Daniel Hiltgen's avatar
Daniel Hiltgen committed
9
 ggml/src/ggml-alloc.c       | 34 +++++++++++++++++++++++++++++++---
10
 ggml/src/ggml-backend.cpp   |  7 +++++++
Daniel Hiltgen's avatar
Daniel Hiltgen committed
11
 4 files changed, 40 insertions(+), 3 deletions(-)
12
13

diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h
Daniel Hiltgen's avatar
Daniel Hiltgen committed
14
index 2cb150fd2..7ab3f0192 100644
15
16
--- a/ggml/include/ggml-alloc.h
+++ b/ggml/include/ggml-alloc.h
17
18
@@ -65,6 +65,7 @@ GGML_API bool ggml_gallocr_reserve_n(
 GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
19
20
 
 GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
21
+GGML_API size_t ggml_gallocr_get_attempted_buffer_size(ggml_gallocr_t galloc, int buffer_id);
22
23
24
25
 
 // Utils
 // Create a buffer and allocate all the tensors in a ggml_context
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
Daniel Hiltgen's avatar
Daniel Hiltgen committed
26
index f1b740785..c54ff98bf 100644
27
28
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
29
@@ -318,6 +318,7 @@ extern "C" {
30
 
Daniel Hiltgen's avatar
Daniel Hiltgen committed
31
32
33
     GGML_API ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend);
     GGML_API size_t                     ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
+    GGML_API size_t                     ggml_backend_sched_get_attempted_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
34
35
36
37
 
     GGML_API void                 ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
     GGML_API ggml_backend_t       ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
index c830c0965..363853873 100644
39
40
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
41
@@ -486,6 +486,7 @@ struct node_alloc {
42
43
 struct ggml_gallocr {
     ggml_backend_buffer_type_t * bufts; // [n_buffers]
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
     struct vbuffer ** buffers; // [n_buffers]
45
46
47
48
+    size_t *buffer_sizes; // [n_buffers]
     struct ggml_dyn_tallocr ** buf_tallocs; // [n_buffers]
     int n_buffers;
 
49
@@ -509,6 +510,9 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
50
     galloc->buffers = calloc(n_bufs, sizeof(struct vbuffer *));
51
52
53
54
55
56
57
58
     GGML_ASSERT(galloc->buffers != NULL);
 
+    galloc->buffer_sizes = calloc(n_bufs, sizeof(size_t));
+    GGML_ASSERT(galloc->buffer_sizes != NULL);
+
     galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *));
     GGML_ASSERT(galloc->buf_tallocs != NULL);
 
59
@@ -576,6 +580,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
60
61
62
63
64
65
66
     ggml_hash_set_free(&galloc->hash_set);
     free(galloc->hash_values);
     free(galloc->bufts);
+    free(galloc->buffer_sizes);
     free(galloc->buffers);
     free(galloc->buf_tallocs);
     free(galloc->node_allocs);
Daniel Hiltgen's avatar
Daniel Hiltgen committed
67
@@ -891,6 +896,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
68
69
70
71
72
73
74
75
         }
     }
 
+    bool success = true;
+
     // reallocate buffers if needed
     for (int i = 0; i < galloc->n_buffers; i++) {
         // if the buffer type is used multiple times, we reuse the same buffer
Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
@@ -920,14 +927,19 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
77
 
Daniel Hiltgen's avatar
Daniel Hiltgen committed
78
79
             ggml_vbuffer_free(galloc->buffers[i]);
             galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
80
81
-            if (galloc->buffers[i] == NULL) {
+            if (galloc->buffers[i]) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
82
+                galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
83
84
85
86
87
88
89
+            } else {
                 GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
-                return false;
+                galloc->buffer_sizes[i] = new_size;
+                success = false;
             }
+        } else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
90
+            galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
91
92
93
94
95
96
97
98
         }
     }
 
-    return true;
+    return success;
 }
 
 bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
99
@@ -1082,6 +1094,22 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
     return ggml_vbuffer_size(galloc->buffers[buffer_id]);
101
102
 }
 
103
+size_t ggml_gallocr_get_attempted_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
104
105
106
107
108
109
110
111
+    GGML_ASSERT(buffer_id >= 0 && buffer_id < galloc->n_buffers);
+
+    for (int i = 0; i < buffer_id; i++) {
+        if (galloc->buf_tallocs[i] == galloc->buf_tallocs[buffer_id]) {
+            // This buffer is the same as a previous one due to the same buffer type being used multiple times
+            // (See above.) However, we need a different check because multiple buffers might be NULL in our
+            // case and we still want to know the attempted size.
+
112
+            return 0;
113
114
115
+        }
+    }
+
116
+    return galloc->buffer_sizes[buffer_id];
117
118
119
120
121
122
+}
+
 // utils
 
 static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
Daniel Hiltgen's avatar
Daniel Hiltgen committed
123
index 8ba86f824..cb2b99562 100644
124
125
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
@@ -1809,6 +1809,13 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
127
128
129
     return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
 }
 
130
+size_t ggml_backend_sched_get_attempted_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
131
132
133
+    int backend_index = ggml_backend_sched_backend_id(sched, backend);
+    GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
+
134
+    return ggml_gallocr_get_attempted_buffer_size(sched->galloc, backend_index);
135
136
137
+}
+
 void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
     GGML_ASSERT(sched);
139
     int backend_index = ggml_backend_sched_backend_id(sched, backend);