0001-ggml-backend-malloc-and-free-using-the-same-compiler.patch 9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Thu, 6 Jun 2024 23:55:47 -0700
Subject: [PATCH] ggml-backend: malloc and free using the same compiler

On Windows, the CUDA backend must be compiled with MSVC but generic
portions compiled with CGo use either GCC or Clang. Since
ggml_backend_buffer_t spans these two components, it can be allocated
and freed using different compilers. Specifically, it is malloced by
MSVC and freed by Clang, which can cause problems.

This moves freeing of the buffers into the backends to avoid the
problem.
---
 ggml/src/ggml-backend.cpp              | 9 +++++++--
 ggml/src/ggml-cann/ggml-cann.cpp       | 2 ++
 ggml/src/ggml-cuda/ggml-cuda.cu        | 3 +++
 ggml/src/ggml-kompute/ggml-kompute.cpp | 1 +
 ggml/src/ggml-metal/ggml-metal.m       | 1 +
 ggml/src/ggml-opencl/ggml-opencl.cpp   | 1 +
 ggml/src/ggml-rpc/ggml-rpc.cpp         | 1 +
 ggml/src/ggml-sycl/ggml-sycl.cpp       | 3 +++
 ggml/src/ggml-vulkan/ggml-vulkan.cpp   | 2 ++
 9 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
27
index 273075f4..dd11f304 100644
28
29
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
30
@@ -107,7 +107,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
31
32
33
34
35
36
37
     if (buffer->iface.free_buffer != NULL) {
         buffer->iface.free_buffer(buffer);
     }
-    delete buffer;
 }
 
 size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
38
@@ -544,6 +543,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
39
40
41
42
43
44
45
 
     free(ctx->buffers);
     free(ctx);
+    delete buffer;
 }
 
 static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
46
@@ -1867,6 +1867,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
47
48
49
50
51
52
53
54
55
56
57
 
 static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_aligned_free(buffer->context, buffer->size);
+    delete buffer;
+}
+
+static void ggml_backend_cpu_ptr_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+    delete buffer;
 }
 
 static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
58
@@ -1914,7 +1919,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
59
60
61
62
63
64
65
66
67
 };
 
 static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
-    /* .free_buffer     = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
+    /* .free_buffer     = */ ggml_backend_cpu_ptr_buffer_free_buffer, // ptr is not owned by the buffer but need to free the buffer itself
     /* .get_base        = */ ggml_backend_cpu_buffer_get_base,
     /* .init_tensor     = */ NULL, // no initialization required
     /* .memset_tensor   = */ ggml_backend_cpu_buffer_memset_tensor,
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
68
index e2617b06..242e50a7 100644
69
70
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
71
@@ -800,6 +800,7 @@ static void ggml_backend_cann_buffer_free_buffer(
72
73
74
75
76
77
78
     ggml_backend_cann_buffer_context* ctx =
         (ggml_backend_cann_buffer_context*)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 /**
79
@@ -1472,6 +1473,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
80
81
82
83
84
85
86
87
  */
 static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
     ACL_CHECK(aclrtFreeHost(buffer->context));
+    delete buffer;
 }
 
 /**
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
88
index 9fb2134f..04ce764e 100644
89
90
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
91
@@ -534,6 +534,7 @@ struct ggml_backend_cuda_buffer_context {
92
93
94
95
96
97
98
 static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
99
@@ -789,6 +790,7 @@ struct ggml_backend_cuda_split_buffer_context {
100
101
102
103
104
105
106
 static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
107
@@ -1062,6 +1064,7 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
 
 static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     CUDA_CHECK(cudaFreeHost(buffer->context));
+    delete buffer;
 }
 
 static void * ggml_cuda_host_malloc(size_t size) {
diff --git a/ggml/src/ggml-kompute/ggml-kompute.cpp b/ggml/src/ggml-kompute/ggml-kompute.cpp
index 50579227..2799a0a5 100644
--- a/ggml/src/ggml-kompute/ggml-kompute.cpp
+++ b/ggml/src/ggml-kompute/ggml-kompute.cpp
@@ -1911,6 +1911,7 @@ static void ggml_backend_kompute_buffer_free_buffer(ggml_backend_buffer_t buffer
         ggml_vk_free_memory(*memory);
     }
     delete memory;
+    delete buffer;
 }
 
 static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
128
index d92392ed..425524d0 100644
129
130
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
131
@@ -5077,6 +5077,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
132
133
134
135
136
137
138
139
     }
 
     free(ctx);
+    free(buffer);
 }
 
 static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
140
index 05a2f4e6..392cc18d 100644
141
142
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
143
@@ -1940,6 +1940,7 @@ struct ggml_backend_opencl_buffer_context {
144
145
146
147
148
149
150
151
 static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
152
index 140a775f..e33c4ba0 100644
153
154
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
155
@@ -477,6 +477,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
156
157
158
159
160
161
162
163
     bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
     GGML_ASSERT(status);
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
164
index 66b6f2cc..e3e6deae 100644
165
166
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
167
@@ -317,6 +317,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
168
169
170
171
172
173
174
     ggml_sycl_set_device(ctx->device);
 
     delete ctx;
+    delete buffer;
 }
 catch (sycl::exception const &exc) {
   std::cerr << exc.what() << "Exception caught at file:" << __FILE__
175
@@ -762,6 +763,7 @@ struct ggml_backend_sycl_split_buffer_context {
176
177
178
179
180
181
182
 static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
183
@@ -1096,6 +1098,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
184
185
186
187
188
189
190
191
 
 static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_sycl_host_free(buffer->context);
+    delete buffer;
 }
 
 static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
192
index c0bdb9e1..03d03064 100644
193
194
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
195
@@ -8660,6 +8660,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
196
197
198
199
200
201
202
     ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
     ggml_vk_destroy_buffer(ctx->dev_buffer);
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
203
@@ -8803,6 +8804,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
204
205
206
207
208
209
210
 static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
     ggml_vk_host_free(vk_instance.devices[0], buffer->context);
+    delete buffer;
 }
 
 static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {