0001-ggml-backend-malloc-and-free-using-the-same-compiler.patch 8.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Thu, 6 Jun 2024 23:55:47 -0700
Subject: [PATCH] ggml-backend: malloc and free using the same compiler

On Windows, the CUDA backend must be compiled with MSVC but generic
portions compiled with CGo use either GCC or Clang. Since
ggml_backend_buffer_t spans these two components, it can be allocated
and freed using different compilers. Specifically, it is malloced by
MSVC and freed by Clang, which can cause problems.

This moves freeing of the buffers into the backends to avoid the
problem.
---
15
16
17
18
19
20
21
22
23
 ggml/src/ggml-backend.cpp            | 9 +++++++--
 ggml/src/ggml-cann/ggml-cann.cpp     | 2 ++
 ggml/src/ggml-cuda/ggml-cuda.cu      | 3 +++
 ggml/src/ggml-metal/ggml-metal.m     | 1 +
 ggml/src/ggml-opencl/ggml-opencl.cpp | 1 +
 ggml/src/ggml-rpc/ggml-rpc.cpp       | 1 +
 ggml/src/ggml-sycl/ggml-sycl.cpp     | 3 +++
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 ++
 8 files changed, 20 insertions(+), 2 deletions(-)
24
25

diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
26
index 1b9d29e9..97f47abd 100644
27
28
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
29
@@ -107,7 +107,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
30
31
32
33
34
35
36
     if (buffer->iface.free_buffer != NULL) {
         buffer->iface.free_buffer(buffer);
     }
-    delete buffer;
 }
 
 size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
37
@@ -529,6 +528,7 @@ static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer)
38
39
40
41
42
43
44
 
     free(ctx->buffers);
     free(ctx);
+    delete buffer;
 }
 
 static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
45
@@ -1890,6 +1890,11 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
46
47
48
49
50
51
52
53
54
55
56
 
 static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_aligned_free(buffer->context, buffer->size);
+    delete buffer;
+}
+
+static void ggml_backend_cpu_ptr_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+    delete buffer;
 }
 
 static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
57
@@ -1937,7 +1942,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
58
59
60
61
62
63
64
65
66
 };
 
 static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
-    /* .free_buffer     = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
+    /* .free_buffer     = */ ggml_backend_cpu_ptr_buffer_free_buffer, // ptr is not owned by the buffer but need to free the buffer itself
     /* .get_base        = */ ggml_backend_cpu_buffer_get_base,
     /* .init_tensor     = */ NULL, // no initialization required
     /* .memset_tensor   = */ ggml_backend_cpu_buffer_memset_tensor,
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
67
index cf575b36..ca1addfa 100755
68
69
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
70
@@ -826,6 +826,7 @@ static void ggml_backend_cann_buffer_free_buffer(
71
72
73
74
75
76
77
     ggml_backend_cann_buffer_context* ctx =
         (ggml_backend_cann_buffer_context*)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 /**
78
@@ -1572,6 +1573,7 @@ static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buf
79
80
81
82
83
84
85
86
  */
 static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
     ACL_CHECK(aclrtFreeHost(buffer->context));
+    delete buffer;
 }
 
 /**
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
87
index d9110491..37ee2a6d 100644
88
89
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
90
@@ -567,6 +567,7 @@ struct ggml_backend_cuda_buffer_context {
91
92
93
94
95
96
97
 static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
98
@@ -822,6 +823,7 @@ struct ggml_backend_cuda_split_buffer_context {
99
100
101
102
103
104
105
 static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buffer) {
106
@@ -1103,6 +1105,7 @@ static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) {
107
108
109
110
111
112
113
114
 
 static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     CUDA_CHECK(cudaFreeHost(buffer->context));
+    delete buffer;
 }
 
 static void * ggml_cuda_host_malloc(size_t size) {
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
115
index cb8eff4a..7bccc7bf 100644
116
117
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
118
@@ -6032,6 +6032,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
119
120
121
122
123
124
125
126
     }
 
     free(ctx);
+    free(buffer);
 }
 
 static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp
127
index 8ba1e00d..8163e8dc 100644
128
129
--- a/ggml/src/ggml-opencl/ggml-opencl.cpp
+++ b/ggml/src/ggml-opencl/ggml-opencl.cpp
130
@@ -2745,6 +2745,7 @@ struct ggml_backend_opencl_buffer_context {
131
132
133
134
135
136
137
138
 static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp
139
index df6ba540..2e395968 100644
140
141
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
142
@@ -486,6 +486,7 @@ static void ggml_backend_rpc_buffer_free_buffer(ggml_backend_buffer_t buffer) {
143
     bool status = send_rpc_cmd(ctx->sock, RPC_CMD_FREE_BUFFER, &request, sizeof(request), nullptr, 0);
144
     RPC_STATUS_ASSERT(status);
145
146
147
148
149
150
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_rpc_buffer_get_base(ggml_backend_buffer_t buffer) {
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
151
index 3992dad0..67503951 100644
152
153
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
154
@@ -331,6 +331,7 @@ ggml_backend_sycl_buffer_free_buffer(ggml_backend_buffer_t buffer) try {
155
156
157
158
159
160
161
     ggml_sycl_set_device(ctx->device);
 
     delete ctx;
+    delete buffer;
 }
 catch (sycl::exception const &exc) {
   std::cerr << exc.what() << "Exception caught at file:" << __FILE__
162
@@ -792,6 +793,7 @@ struct ggml_backend_sycl_split_buffer_context {
163
164
165
166
167
168
169
 static void ggml_backend_sycl_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buffer) {
170
@@ -1134,6 +1136,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_
171
172
173
174
175
176
177
178
 
 static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     ggml_sycl_host_free(buffer->context);
+    delete buffer;
 }
 
 static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
179
index 4070e248..394a2839 100644
180
181
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
182
@@ -10209,6 +10209,7 @@ static void ggml_backend_vk_buffer_free_buffer(ggml_backend_buffer_t buffer) {
183
184
185
186
187
188
189
     ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
     ggml_vk_destroy_buffer(ctx->dev_buffer);
     delete ctx;
+    delete buffer;
 }
 
 static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t buffer) {
190
@@ -10352,6 +10353,7 @@ static const char * ggml_backend_vk_host_buffer_name(ggml_backend_buffer_t buffe
191
192
193
194
195
196
197
 static void ggml_backend_vk_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
     VK_LOG_MEMORY("ggml_backend_vk_host_buffer_free_buffer()");
     ggml_vk_host_free(vk_instance.devices[0], buffer->context);
+    delete buffer;
 }
 
 static ggml_backend_buffer_t ggml_backend_vk_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {