Unverified Commit 544b6739 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

ggml update to b6840 (#12791)

parent c4ba257c
...@@ -219,7 +219,7 @@ index 41eef3b5f..c81a2e48a 100644 ...@@ -219,7 +219,7 @@ index 41eef3b5f..c81a2e48a 100644
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index e0abde542..e98044bd8 100644 index 41ff89c4d..2931c15ca 100644
--- a/ggml/src/ggml-cuda/common.cuh --- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh
@@ -35,6 +35,41 @@ @@ -35,6 +35,41 @@
...@@ -274,7 +274,7 @@ index e0abde542..e98044bd8 100644 ...@@ -274,7 +274,7 @@ index e0abde542..e98044bd8 100644
}; };
template<typename T> template<typename T>
@@ -999,11 +1037,11 @@ struct ggml_backend_cuda_context { @@ -992,11 +1030,11 @@ struct ggml_backend_cuda_context {
// pool // pool
std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES]; std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES];
...@@ -288,7 +288,7 @@ index e0abde542..e98044bd8 100644 ...@@ -288,7 +288,7 @@ index e0abde542..e98044bd8 100644
} }
return *pools[device]; return *pools[device];
} }
@@ -1011,4 +1049,20 @@ struct ggml_backend_cuda_context { @@ -1004,4 +1042,20 @@ struct ggml_backend_cuda_context {
ggml_cuda_pool & pool() { ggml_cuda_pool & pool() {
return pool(device); return pool(device);
} }
...@@ -310,10 +310,10 @@ index e0abde542..e98044bd8 100644 ...@@ -310,10 +310,10 @@ index e0abde542..e98044bd8 100644
+ } + }
}; };
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index c555cd30f..eb3db0f19 100644 index 02d413467..f79e5d65c 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -350,6 +350,8 @@ const ggml_cuda_device_info & ggml_cuda_info() { @@ -359,6 +359,8 @@ const ggml_cuda_device_info & ggml_cuda_info() {
// #define DEBUG_CUDA_MALLOC // #define DEBUG_CUDA_MALLOC
...@@ -322,7 +322,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -322,7 +322,7 @@ index c555cd30f..eb3db0f19 100644
// buffer pool for cuda (legacy) // buffer pool for cuda (legacy)
struct ggml_cuda_pool_leg : public ggml_cuda_pool { struct ggml_cuda_pool_leg : public ggml_cuda_pool {
static const int MAX_BUFFERS = 256; static const int MAX_BUFFERS = 256;
@@ -362,9 +364,12 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -371,9 +373,12 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
ggml_cuda_buffer buffer_pool[MAX_BUFFERS] = {}; ggml_cuda_buffer buffer_pool[MAX_BUFFERS] = {};
size_t pool_size = 0; size_t pool_size = 0;
...@@ -337,7 +337,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -337,7 +337,7 @@ index c555cd30f..eb3db0f19 100644
} }
~ggml_cuda_pool_leg() { ~ggml_cuda_pool_leg() {
@@ -372,7 +377,9 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -381,7 +386,9 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
for (int i = 0; i < MAX_BUFFERS; ++i) { for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cuda_buffer & b = buffer_pool[i]; ggml_cuda_buffer & b = buffer_pool[i];
if (b.ptr != nullptr) { if (b.ptr != nullptr) {
...@@ -348,7 +348,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -348,7 +348,7 @@ index c555cd30f..eb3db0f19 100644
pool_size -= b.size; pool_size -= b.size;
} }
} }
@@ -420,8 +427,15 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -429,8 +436,15 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
void * ptr; void * ptr;
size_t look_ahead_size = (size_t) (1.05 * size); size_t look_ahead_size = (size_t) (1.05 * size);
look_ahead_size = 256 * ((look_ahead_size + 255)/256); look_ahead_size = 256 * ((look_ahead_size + 255)/256);
...@@ -366,7 +366,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -366,7 +366,7 @@ index c555cd30f..eb3db0f19 100644
*actual_size = look_ahead_size; *actual_size = look_ahead_size;
pool_size += look_ahead_size; pool_size += look_ahead_size;
#ifdef DEBUG_CUDA_MALLOC #ifdef DEBUG_CUDA_MALLOC
@@ -441,10 +455,20 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -450,10 +464,20 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
} }
} }
GGML_LOG_DEBUG(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n"); GGML_LOG_DEBUG(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n");
...@@ -389,7 +389,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -389,7 +389,7 @@ index c555cd30f..eb3db0f19 100644
}; };
// pool with virtual memory // pool with virtual memory
@@ -456,18 +480,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -465,18 +489,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
CUdeviceptr pool_addr = 0; CUdeviceptr pool_addr = 0;
size_t pool_used = 0; size_t pool_used = 0;
size_t pool_size = 0; size_t pool_size = 0;
...@@ -417,7 +417,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -417,7 +417,7 @@ index c555cd30f..eb3db0f19 100644
#if defined(GGML_USE_HIP) #if defined(GGML_USE_HIP)
// Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285 // Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285
for (std::pair<CUdeviceptr, size_t> & mapping : mappings) { for (std::pair<CUdeviceptr, size_t> & mapping : mappings) {
@@ -494,35 +524,49 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -503,35 +533,49 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
GGML_ASSERT(pool_size + reserve_size <= CUDA_POOL_VMM_MAX_SIZE); GGML_ASSERT(pool_size + reserve_size <= CUDA_POOL_VMM_MAX_SIZE);
...@@ -493,7 +493,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -493,7 +493,7 @@ index c555cd30f..eb3db0f19 100644
// add to the pool // add to the pool
pool_size += reserve_size; pool_size += reserve_size;
@@ -555,16 +599,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -564,16 +608,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
// all deallocations must be in reverse order of the allocations // all deallocations must be in reverse order of the allocations
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used)); GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
} }
...@@ -521,7 +521,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -521,7 +521,7 @@ index c555cd30f..eb3db0f19 100644
} }
// destroying a cuBLAS handle while a graph is being captured in a different thread can result in a CUDA error // destroying a cuBLAS handle while a graph is being captured in a different thread can result in a CUDA error
@@ -748,11 +800,20 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac @@ -757,11 +809,20 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac
} }
static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
...@@ -543,7 +543,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -543,7 +543,7 @@ index c555cd30f..eb3db0f19 100644
static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
size_t size = ggml_nbytes(tensor); size_t size = ggml_nbytes(tensor);
int64_t ne0 = tensor->ne[0]; int64_t ne0 = tensor->ne[0];
@@ -776,6 +837,7 @@ static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface @@ -785,6 +846,7 @@ static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface
/* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size, /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
/* .is_host = */ NULL, /* .is_host = */ NULL,
...@@ -551,7 +551,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -551,7 +551,7 @@ index c555cd30f..eb3db0f19 100644
}; };
ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) { ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
@@ -3003,6 +3065,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, @@ -2986,6 +3048,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) { bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) {
...@@ -559,7 +559,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -559,7 +559,7 @@ index c555cd30f..eb3db0f19 100644
// flag used to determine whether it is an integrated_gpu // flag used to determine whether it is an integrated_gpu
const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated; const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated;
@@ -3018,6 +3081,11 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx @@ -3001,6 +3064,11 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
continue; continue;
} }
...@@ -571,7 +571,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -571,7 +571,7 @@ index c555cd30f..eb3db0f19 100644
static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr); static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr);
if (!disable_fusion) { if (!disable_fusion) {
@@ -3144,6 +3212,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx @@ -3140,6 +3208,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, int batch_size) { static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, int batch_size) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
...@@ -579,7 +579,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -579,7 +579,7 @@ index c555cd30f..eb3db0f19 100644
ggml_cuda_set_device(cuda_ctx->device); ggml_cuda_set_device(cuda_ctx->device);
@@ -3223,6 +3292,71 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, @@ -3215,6 +3284,71 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
return GGML_STATUS_SUCCESS; return GGML_STATUS_SUCCESS;
} }
...@@ -651,7 +651,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -651,7 +651,7 @@ index c555cd30f..eb3db0f19 100644
static void ggml_backend_cuda_event_record(ggml_backend_t backend, ggml_backend_event_t event) { static void ggml_backend_cuda_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
@@ -3263,6 +3397,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = { @@ -3255,6 +3389,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
/* .event_record = */ ggml_backend_cuda_event_record, /* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait, /* .event_wait = */ ggml_backend_cuda_event_wait,
/* .graph_optimize = */ NULL, /* .graph_optimize = */ NULL,
......
...@@ -8,7 +8,7 @@ Subject: [PATCH] decode: disable output_all ...@@ -8,7 +8,7 @@ Subject: [PATCH] decode: disable output_all
1 file changed, 1 insertion(+), 2 deletions(-) 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index e7526e7d..53a5e3a9 100644 index bd348bcad..8b4a89d38 100644
--- a/src/llama-context.cpp --- a/src/llama-context.cpp
+++ b/src/llama-context.cpp +++ b/src/llama-context.cpp
@@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) { @@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
......
...@@ -16,7 +16,7 @@ unused then it can be reset to free these data structures. ...@@ -16,7 +16,7 @@ unused then it can be reset to free these data structures.
6 files changed, 32 insertions(+), 2 deletions(-) 6 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 1ff53ed03..ba181d09d 100644 index b3b5b356a..69223c488 100644
--- a/ggml/include/ggml-backend.h --- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h
@@ -178,6 +178,7 @@ extern "C" { @@ -178,6 +178,7 @@ extern "C" {
...@@ -28,7 +28,7 @@ index 1ff53ed03..ba181d09d 100644 ...@@ -28,7 +28,7 @@ index 1ff53ed03..ba181d09d 100644
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
index 3c3f22fc0..43c91d9f2 100644 index 7bdf9d81f..21b35ac5c 100644
--- a/ggml/src/ggml-backend-impl.h --- a/ggml/src/ggml-backend-impl.h
+++ b/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h
@@ -195,6 +195,10 @@ extern "C" { @@ -195,6 +195,10 @@ extern "C" {
...@@ -43,7 +43,7 @@ index 3c3f22fc0..43c91d9f2 100644 ...@@ -43,7 +43,7 @@ index 3c3f22fc0..43c91d9f2 100644
struct ggml_backend_device { struct ggml_backend_device {
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 6ef5eeafa..0b757af59 100644 index c81a2e48a..9b0a9b91f 100644
--- a/ggml/src/ggml-backend.cpp --- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp
@@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par @@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par
...@@ -62,7 +62,7 @@ index 6ef5eeafa..0b757af59 100644 ...@@ -62,7 +62,7 @@ index 6ef5eeafa..0b757af59 100644
GGML_ASSERT(device); GGML_ASSERT(device);
return device->iface.get_buffer_type(device); return device->iface.get_buffer_type(device);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 811462c79..87c6c34a4 100644 index f79e5d65c..c9333689f 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -107,6 +107,11 @@ int ggml_cuda_get_device() { @@ -107,6 +107,11 @@ int ggml_cuda_get_device() {
...@@ -77,7 +77,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -77,7 +77,7 @@ index 811462c79..87c6c34a4 100644
static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) { static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) {
ggml_cuda_set_device(device); ggml_cuda_set_device(device);
cudaError_t err; cudaError_t err;
@@ -3515,7 +3520,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back @@ -3499,7 +3504,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
props->id = ggml_backend_cuda_device_get_id(dev); props->id = ggml_backend_cuda_device_get_id(dev);
props->type = ggml_backend_cuda_device_get_type(dev); props->type = ggml_backend_cuda_device_get_type(dev);
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str(); props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
...@@ -89,7 +89,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -89,7 +89,7 @@ index 811462c79..87c6c34a4 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr; bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY #ifdef GGML_CUDA_NO_PEER_COPY
@@ -3948,6 +3956,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g @@ -3936,6 +3944,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context)); CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
} }
...@@ -101,7 +101,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -101,7 +101,7 @@ index 811462c79..87c6c34a4 100644
static const ggml_backend_device_i ggml_backend_cuda_device_interface = { static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .get_name = */ ggml_backend_cuda_device_get_name, /* .get_name = */ ggml_backend_cuda_device_get_name,
/* .get_description = */ ggml_backend_cuda_device_get_description, /* .get_description = */ ggml_backend_cuda_device_get_description,
@@ -3964,6 +3977,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = { @@ -3952,6 +3965,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .event_new = */ ggml_backend_cuda_device_event_new, /* .event_new = */ ggml_backend_cuda_device_event_new,
/* .event_free = */ ggml_backend_cuda_device_event_free, /* .event_free = */ ggml_backend_cuda_device_event_free,
/* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize, /* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize,
...@@ -122,10 +122,10 @@ index 890c10364..1f06be80e 100644 ...@@ -122,10 +122,10 @@ index 890c10364..1f06be80e 100644
#define cudaError_t hipError_t #define cudaError_t hipError_t
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
diff --git a/src/llama.cpp b/src/llama.cpp diff --git a/src/llama.cpp b/src/llama.cpp
index fe5a7a835..d821a96a0 100644 index ab2e9868a..74c49e651 100644
--- a/src/llama.cpp --- a/src/llama.cpp
+++ b/src/llama.cpp +++ b/src/llama.cpp
@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl( @@ -270,10 +270,12 @@ static struct llama_model * llama_model_load_from_file_impl(
for (auto * dev : model->devices) { for (auto * dev : model->devices) {
ggml_backend_dev_props props; ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props); ggml_backend_dev_get_props(dev, &props);
......
...@@ -8,7 +8,7 @@ Subject: [PATCH] harden uncaught exception registration ...@@ -8,7 +8,7 @@ Subject: [PATCH] harden uncaught exception registration
1 file changed, 6 insertions(+), 2 deletions(-) 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/ggml/src/ggml.cpp b/ggml/src/ggml.cpp diff --git a/ggml/src/ggml.cpp b/ggml/src/ggml.cpp
index 0d388d45..f5bcb446 100644 index 0d388d455..f5bcb446d 100644
--- a/ggml/src/ggml.cpp --- a/ggml/src/ggml.cpp
+++ b/ggml/src/ggml.cpp +++ b/ggml/src/ggml.cpp
@@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{ @@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{
......
...@@ -45,7 +45,7 @@ index 69223c488..6510e0cba 100644 ...@@ -45,7 +45,7 @@ index 69223c488..6510e0cba 100644
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device); GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 0609c6503..aefe43bdd 100644 index f9a6587f1..03f359ae9 100644
--- a/ggml/src/CMakeLists.txt --- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt
@@ -209,6 +209,8 @@ add_library(ggml-base @@ -209,6 +209,8 @@ add_library(ggml-base
...@@ -58,7 +58,7 @@ index 0609c6503..aefe43bdd 100644 ...@@ -58,7 +58,7 @@ index 0609c6503..aefe43bdd 100644
target_include_directories(ggml-base PRIVATE .) target_include_directories(ggml-base PRIVATE .)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 5787e8cd5..d232bf828 100644 index c9333689f..41b00af83 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() { @@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
...@@ -90,7 +90,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -90,7 +90,7 @@ index 5787e8cd5..d232bf828 100644
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
ggml_cuda_parse_uuid(prop, id).c_str()); ggml_cuda_parse_uuid(prop, id).c_str());
@@ -3476,6 +3491,11 @@ struct ggml_backend_cuda_device_context { @@ -3468,6 +3483,11 @@ struct ggml_backend_cuda_device_context {
std::string description; std::string description;
std::string pci_bus_id; std::string pci_bus_id;
std::string id; std::string id;
...@@ -102,7 +102,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -102,7 +102,7 @@ index 5787e8cd5..d232bf828 100644
}; };
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -3496,6 +3516,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { @@ -3488,6 +3508,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device); ggml_cuda_set_device(ctx->device);
...@@ -131,7 +131,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -131,7 +131,7 @@ index 5787e8cd5..d232bf828 100644
CUDA_CHECK(cudaMemGetInfo(free, total)); CUDA_CHECK(cudaMemGetInfo(free, total));
} }
@@ -3504,6 +3546,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend @@ -3496,6 +3538,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
return GGML_BACKEND_DEVICE_TYPE_GPU; return GGML_BACKEND_DEVICE_TYPE_GPU;
} }
...@@ -139,7 +139,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -139,7 +139,7 @@ index 5787e8cd5..d232bf828 100644
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
@@ -3517,6 +3560,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back @@ -3509,6 +3552,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
// If you need the memory data, call ggml_backend_dev_memory() explicitly. // If you need the memory data, call ggml_backend_dev_memory() explicitly.
props->memory_total = props->memory_free = 0; props->memory_total = props->memory_free = 0;
...@@ -159,7 +159,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -159,7 +159,7 @@ index 5787e8cd5..d232bf828 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr; bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY #ifdef GGML_CUDA_NO_PEER_COPY
bool events = false; bool events = false;
@@ -4079,6 +4135,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { @@ -4075,6 +4131,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
if (!initialized) { if (!initialized) {
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context; ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
...@@ -167,7 +167,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -167,7 +167,7 @@ index 5787e8cd5..d232bf828 100644
for (int i = 0; i < ggml_cuda_info().device_count; i++) { for (int i = 0; i < ggml_cuda_info().device_count; i++) {
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context; ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
@@ -4094,6 +4151,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { @@ -4090,6 +4147,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID); snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
dev_ctx->pci_bus_id = pci_bus_id; dev_ctx->pci_bus_id = pci_bus_id;
...@@ -204,11 +204,11 @@ index 1f06be80e..2f9ef2dc0 100644 ...@@ -204,11 +204,11 @@ index 1f06be80e..2f9ef2dc0 100644
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index d0fb3bcca..b63edd0c1 100644 index e9201cdc6..44ae76d66 100644
--- a/ggml/src/ggml-impl.h --- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h
@@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx @@ -677,6 +677,14 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops); return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
} }
+// Management libraries for fetching more accurate free VRAM data +// Management libraries for fetching more accurate free VRAM data
...@@ -243,10 +243,10 @@ index 05ff6a5a6..032dee76d 100644 ...@@ -243,10 +243,10 @@ index 05ff6a5a6..032dee76d 100644
/* .async = */ true, /* .async = */ true,
/* .host_buffer = */ false, /* .host_buffer = */ false,
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index bd3ece516..7cfb14a54 100644 index 3a6bbe564..d2c278a35 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -231,6 +231,7 @@ class vk_memory_logger; @@ -229,6 +229,7 @@ class vk_memory_logger;
#endif #endif
class vk_perf_logger; class vk_perf_logger;
static void ggml_vk_destroy_buffer(vk_buffer& buf); static void ggml_vk_destroy_buffer(vk_buffer& buf);
...@@ -254,7 +254,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -254,7 +254,7 @@ index bd3ece516..7cfb14a54 100644
static constexpr uint32_t mul_mat_vec_max_cols = 8; static constexpr uint32_t mul_mat_vec_max_cols = 8;
static constexpr uint32_t p021_max_gqa_ratio = 8; static constexpr uint32_t p021_max_gqa_ratio = 8;
@@ -11585,6 +11586,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_ @@ -11813,6 +11814,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
snprintf(description, description_size, "%s", props.deviceName.data()); snprintf(description, description_size, "%s", props.deviceName.data());
} }
...@@ -284,7 +284,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -284,7 +284,7 @@ index bd3ece516..7cfb14a54 100644
// backend interface // backend interface
#define UNUSED GGML_UNUSED #define UNUSED GGML_UNUSED
@@ -12392,31 +12416,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size @@ -12761,31 +12785,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
ggml_vk_get_device_description(dev_idx, description, description_size); ggml_vk_get_device_description(dev_idx, description, description_size);
} }
...@@ -404,7 +404,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -404,7 +404,7 @@ index bd3ece516..7cfb14a54 100644
break; break;
} }
} }
@@ -12449,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) { @@ -12818,8 +12913,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
} }
} }
...@@ -419,7 +419,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -419,7 +419,7 @@ index bd3ece516..7cfb14a54 100644
} }
vk::PhysicalDeviceProperties2 props = {}; vk::PhysicalDeviceProperties2 props = {};
@@ -12467,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) { @@ -12836,19 +12936,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
char pci_bus_id[16] = {}; char pci_bus_id[16] = {};
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function); snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
...@@ -453,7 +453,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -453,7 +453,7 @@ index bd3ece516..7cfb14a54 100644
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
@@ -12491,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de @@ -12860,9 +12965,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
return ctx->description.c_str(); return ctx->description.c_str();
} }
...@@ -469,7 +469,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -469,7 +469,7 @@ index bd3ece516..7cfb14a54 100644
} }
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) { static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
@@ -12517,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml @@ -12886,8 +12996,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
props->name = ggml_backend_vk_device_get_name(dev); props->name = ggml_backend_vk_device_get_name(dev);
props->description = ggml_backend_vk_device_get_description(dev); props->description = ggml_backend_vk_device_get_description(dev);
...@@ -480,7 +480,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -480,7 +480,7 @@ index bd3ece516..7cfb14a54 100644
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total); ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = { props->caps = {
/* .async = */ false, /* .async = */ false,
@@ -12526,6 +12637,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml @@ -12895,6 +13006,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
/* .buffer_from_host_ptr = */ false, /* .buffer_from_host_ptr = */ false,
/* .events = */ false, /* .events = */ false,
}; };
...@@ -494,7 +494,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -494,7 +494,7 @@ index bd3ece516..7cfb14a54 100644
} }
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) { static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -12954,6 +13072,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13365,6 +13483,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
static std::mutex mutex; static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
if (!initialized) { if (!initialized) {
...@@ -503,7 +503,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -503,7 +503,7 @@ index bd3ece516..7cfb14a54 100644
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) { for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context; ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
char desc[256]; char desc[256];
@@ -12962,12 +13082,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13373,12 +13493,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
ctx->name = GGML_VK_NAME + std::to_string(i); ctx->name = GGML_VK_NAME + std::to_string(i);
ctx->description = desc; ctx->description = desc;
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu; ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
......
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Julius Tischbein <ju.tischbein@gmail.com>
Date: Wed, 15 Oct 2025 13:54:15 +0200
Subject: [PATCH] CUDA: Changing the CUDA scheduling strategy to spin (#16585)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* CUDA set scheduling strategy to spinning for cc121
* Using prop.major and prop.minor, include HIP and MUSA
* Exclude HIP and MUSA
* Remove trailing whitespace
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
* Remove empty line
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---------
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---
ggml/src/ggml-cuda/ggml-cuda.cu | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index b075a18be..d62f412d6 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
} else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
turing_devices_without_mma.push_back({ id, device_name });
}
+
+ // Temporary performance fix:
+ // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls.
+ // TODO: Check for future drivers the default scheduling strategy and
+ // remove this call again when cudaDeviceScheduleSpin is default.
+ if (prop.major == 12 && prop.minor == 1) {
+ CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+ }
+
#endif // defined(GGML_USE_HIP)
}
...@@ -8,10 +8,10 @@ Subject: [PATCH] report LoadLibrary failures ...@@ -8,10 +8,10 @@ Subject: [PATCH] report LoadLibrary failures
1 file changed, 12 insertions(+) 1 file changed, 12 insertions(+)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index f794d9cfa..3a855ab2e 100644 index a55d9b280..ec6f7f1e9 100644
--- a/ggml/src/ggml-backend-reg.cpp --- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp
@@ -118,6 +118,18 @@ static dl_handle * dl_load_library(const fs::path & path) { @@ -122,6 +122,18 @@ static dl_handle * dl_load_library(const fs::path & path) {
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryW(path.wstring().c_str()); HMODULE handle = LoadLibraryW(path.wstring().c_str());
......
...@@ -13,7 +13,7 @@ interleaved version used for qwen3vl ...@@ -13,7 +13,7 @@ interleaved version used for qwen3vl
4 files changed, 11 insertions(+), 30 deletions(-) 4 files changed, 11 insertions(+), 30 deletions(-)
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index 31478dd8e..4d1ed207e 100644 index 902fdad69..70955347d 100644
--- a/ggml/src/ggml-cpu/ops.cpp --- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp
@@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init( @@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init(
...@@ -62,10 +62,10 @@ index d058504cd..287fe9d2c 100644 ...@@ -62,10 +62,10 @@ index d058504cd..287fe9d2c 100644
const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f; const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f;
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index 375a0c7fd..9866c96b4 100644 index 50b8071de..65a3183c8 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal --- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -3858,15 +3858,11 @@ kernel void kernel_rope_multi( @@ -3888,15 +3888,11 @@ kernel void kernel_rope_multi(
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2 const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sector = ic % sect_dims; const int sector = ic % sect_dims;
......
...@@ -12,7 +12,7 @@ Subject: [PATCH] Add memory detection using DXGI + PDH ...@@ -12,7 +12,7 @@ Subject: [PATCH] Add memory detection using DXGI + PDH
create mode 100644 ggml/src/mem_dxgi_pdh.cpp create mode 100644 ggml/src/mem_dxgi_pdh.cpp
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index aefe43bdd..21fe4640c 100644 index 03f359ae9..4b3e5efb5 100644
--- a/ggml/src/CMakeLists.txt --- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt
@@ -211,6 +211,7 @@ add_library(ggml-base @@ -211,6 +211,7 @@ add_library(ggml-base
...@@ -24,10 +24,10 @@ index aefe43bdd..21fe4640c 100644 ...@@ -24,10 +24,10 @@ index aefe43bdd..21fe4640c 100644
target_include_directories(ggml-base PRIVATE .) target_include_directories(ggml-base PRIVATE .)
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index b63edd0c1..81cad8cf3 100644 index 44ae76d66..639d551a2 100644
--- a/ggml/src/ggml-impl.h --- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h
@@ -645,6 +645,9 @@ GGML_API void ggml_nvml_release(); @@ -684,6 +684,9 @@ GGML_API void ggml_nvml_release();
GGML_API int ggml_hip_mgmt_init(); GGML_API int ggml_hip_mgmt_init();
GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total); GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
GGML_API void ggml_hip_mgmt_release(); GGML_API void ggml_hip_mgmt_release();
...@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644 ...@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644
#ifdef __cplusplus #ifdef __cplusplus
} }
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 7cfb14a54..a1c46d0b3 100644 index d2c278a35..221e29509 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher(); @@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
...@@ -49,7 +49,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -49,7 +49,7 @@ index 7cfb14a54..a1c46d0b3 100644
typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR { typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR {
VkStructureType sType; VkStructureType sType;
@@ -12433,6 +12434,7 @@ struct ggml_backend_vk_device_context { @@ -12802,6 +12803,7 @@ struct ggml_backend_vk_device_context {
std::string pci_id; std::string pci_id;
std::string id; std::string id;
std::string uuid; std::string uuid;
...@@ -57,7 +57,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -57,7 +57,7 @@ index 7cfb14a54..a1c46d0b3 100644
int major; int major;
int minor; int minor;
int driver_major; int driver_major;
@@ -12448,8 +12450,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size @@ -12817,8 +12819,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties(); vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
vk::PhysicalDeviceProperties2 props2; vk::PhysicalDeviceProperties2 props2;
vkdev.getProperties2(&props2); vkdev.getProperties2(&props2);
...@@ -81,7 +81,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -81,7 +81,7 @@ index 7cfb14a54..a1c46d0b3 100644
{ {
// Use vendor specific management libraries for best VRAM reporting if available // Use vendor specific management libraries for best VRAM reporting if available
switch (props2.properties.vendorID) { switch (props2.properties.vendorID) {
@@ -12477,8 +12493,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size @@ -12846,8 +12862,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
break; break;
} }
} }
...@@ -91,7 +91,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -91,7 +91,7 @@ index 7cfb14a54..a1c46d0b3 100644
*total = 0; *total = 0;
*free = 0; *free = 0;
vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props; vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
@@ -13089,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13500,7 +13516,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
/* .reg = */ reg, /* .reg = */ reg,
/* .context = */ ctx, /* .context = */ ctx,
}); });
...@@ -99,7 +99,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -99,7 +99,7 @@ index 7cfb14a54..a1c46d0b3 100644
// Gather additional information about the device // Gather additional information about the device
int dev_idx = vk_instance.device_indices[i]; int dev_idx = vk_instance.device_indices[i];
vk::PhysicalDeviceProperties props1; vk::PhysicalDeviceProperties props1;
@@ -13112,6 +13127,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13523,6 +13538,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
} }
} }
ctx->uuid = oss.str(); ctx->uuid = oss.str();
......
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(void);
GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
#ifdef __cplusplus
}
#endif
...@@ -21,8 +21,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c ...@@ -21,8 +21,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total); GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir, GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
size_t n_threads, size_t n_devices, size_t n_threads, size_t n_devices, ggml_backend_dev_t * devices);
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);
......
...@@ -577,6 +577,10 @@ extern "C" { ...@@ -577,6 +577,10 @@ extern "C" {
GGML_UNARY_OP_EXP, GGML_UNARY_OP_EXP,
GGML_UNARY_OP_GELU_ERF, GGML_UNARY_OP_GELU_ERF,
GGML_UNARY_OP_XIELU, GGML_UNARY_OP_XIELU,
GGML_UNARY_OP_FLOOR,
GGML_UNARY_OP_CEIL,
GGML_UNARY_OP_ROUND,
GGML_UNARY_OP_TRUNC,
GGML_UNARY_OP_COUNT, GGML_UNARY_OP_COUNT,
}; };
...@@ -1151,6 +1155,46 @@ extern "C" { ...@@ -1151,6 +1155,46 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a); struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_floor(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_floor_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_ceil(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_ceil_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_round(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_round_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
/**
* Truncates the fractional part of each element in the tensor (towards zero).
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
* Similar to std::trunc in C/C++.
*/
GGML_API struct ggml_tensor * ggml_trunc(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_trunc_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
// xIELU activation function // xIELU activation function
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0) // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
......
...@@ -310,6 +310,10 @@ function(ggml_add_cpu_backend_variant tag_name) ...@@ -310,6 +310,10 @@ function(ggml_add_cpu_backend_variant tag_name)
foreach (feat ${ARGN}) foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON) set(GGML_INTERNAL_${feat} ON)
endforeach() endforeach()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON)
endforeach()
endif() endif()
ggml_add_cpu_backend_variant_impl(${tag_name}) ggml_add_cpu_backend_variant_impl(${tag_name})
...@@ -372,6 +376,14 @@ if (GGML_CPU_ALL_VARIANTS) ...@@ -372,6 +376,14 @@ if (GGML_CPU_ALL_VARIANTS)
else() else()
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
endif() endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
ggml_add_cpu_backend_variant(s390x_z15 Z15 VXE)
# ggml_add_cpu_backend_variant(s390x_z16 Z16 VXE)
# ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE)
else()
message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
endif()
else() else()
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
endif() endif()
...@@ -391,6 +403,7 @@ ggml_add_backend(Vulkan) ...@@ -391,6 +403,7 @@ ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU) ggml_add_backend(WebGPU)
ggml_add_backend(zDNN) ggml_add_backend(zDNN)
ggml_add_backend(OpenCL) ggml_add_backend(OpenCL)
ggml_add_backend(Hexagon)
foreach (target ggml-base ggml) foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>) target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
......
...@@ -603,6 +603,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -603,6 +603,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated; return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
} }
// free the extra space at the end if the new tensor is smaller
static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_tensor * node, struct ggml_tensor * parent) {
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent);
size_t parent_size = ggml_backend_buft_get_alloc_size(galloc->bufts[p_hn->buffer_id], parent);
size_t node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
GGML_ASSERT(parent_size >= node_size);
if (parent_size > node_size) {
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
struct buffer_address p_addr = p_hn->addr;
p_addr.offset += node_size;
size_t extra_size = parent_size - node_size;
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
}
}
static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) { static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
GGML_ASSERT(buffer_id >= 0); GGML_ASSERT(buffer_id >= 0);
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
...@@ -648,6 +668,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -648,6 +668,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
hn->addr = p_hn->addr; hn->addr = p_hn->addr;
p_hn->allocated = false; // avoid freeing the parent p_hn->allocated = false; // avoid freeing the parent
view_src_hn->allocated = false; view_src_hn->allocated = false;
ggml_gallocr_free_extra_space(galloc, node, view_src);
return; return;
} }
} else { } else {
...@@ -655,6 +676,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -655,6 +676,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
hn->buffer_id = p_hn->buffer_id; hn->buffer_id = p_hn->buffer_id;
hn->addr = p_hn->addr; hn->addr = p_hn->addr;
p_hn->allocated = false; // avoid freeing the parent p_hn->allocated = false; // avoid freeing the parent
ggml_gallocr_free_extra_space(galloc, node, parent);
return; return;
} }
} }
......
...@@ -57,6 +57,10 @@ ...@@ -57,6 +57,10 @@
#include "ggml-opencl.h" #include "ggml-opencl.h"
#endif #endif
#ifdef GGML_USE_HEXAGON
#include "ggml-hexagon.h"
#endif
#ifdef GGML_USE_BLAS #ifdef GGML_USE_BLAS
#include "ggml-blas.h" #include "ggml-blas.h"
#endif #endif
...@@ -211,6 +215,9 @@ struct ggml_backend_registry { ...@@ -211,6 +215,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_OPENCL #ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg()); register_backend(ggml_backend_opencl_reg());
#endif #endif
#ifdef GGML_USE_HEXAGON
register_backend(ggml_backend_hexagon_reg());
#endif
#ifdef GGML_USE_CANN #ifdef GGML_USE_CANN
register_backend(ggml_backend_cann_reg()); register_backend(ggml_backend_cann_reg());
#endif #endif
...@@ -615,6 +622,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ...@@ -615,6 +622,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path); ggml_backend_load_best("cpu", silent, dir_path);
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
......
...@@ -466,7 +466,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) ...@@ -466,7 +466,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d) list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
message(STATUS "s390x detected") message(STATUS "s390x detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c) list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/s390/quants.c)
# for native compilation
if (GGML_NATIVE)
# check machine level to determine target
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS}) string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
...@@ -487,8 +492,19 @@ function(ggml_add_cpu_backend_variant_impl tag_name) ...@@ -487,8 +492,19 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
list(APPEND ARCH_FLAGS -march=native -mtune=native) list(APPEND ARCH_FLAGS -march=native -mtune=native)
endif() endif()
# for cross-compilation
elseif(GGML_CPU_ALL_VARIANTS)
# range through IBM z15 to z17
# NOTE: update when a new hardware level is released
foreach (ZHW RANGE 15 17)
if(DEFINED GGML_INTERNAL_Z${ZHW})
message(STATUS "z${ZHW} cross-compile target")
list(APPEND ARCH_FLAGS -march=z${ZHW})
endif()
endforeach()
endif()
if (GGML_VXE) if (GGML_VXE OR GGML_INTERNAL_VXE)
message(STATUS "VX/VXE/VXE2 enabled") message(STATUS "VX/VXE/VXE2 enabled")
list(APPEND ARCH_FLAGS -mvx -mzvector) list(APPEND ARCH_FLAGS -mvx -mzvector)
list(APPEND ARCH_DEFINITIONS GGML_VXE) list(APPEND ARCH_DEFINITIONS GGML_VXE)
......
...@@ -2186,6 +2186,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { ...@@ -2186,6 +2186,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_HARDSWISH:
case GGML_UNARY_OP_HARDSIGMOID: case GGML_UNARY_OP_HARDSIGMOID:
case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_EXP:
case GGML_UNARY_OP_FLOOR:
case GGML_UNARY_OP_CEIL:
case GGML_UNARY_OP_ROUND:
case GGML_UNARY_OP_TRUNC:
{ {
n_tasks = 1; n_tasks = 1;
} break; } break;
...@@ -3569,13 +3573,17 @@ void ggml_cpu_init(void) { ...@@ -3569,13 +3573,17 @@ void ggml_cpu_init(void) {
#ifdef GGML_USE_OPENMP #ifdef GGML_USE_OPENMP
//if (!getenv("OMP_WAIT_POLICY")) { //if (!getenv("OMP_WAIT_POLICY")) {
// // set the wait policy to active, so that OpenMP threads don't sleep // // set the wait policy to active, so that OpenMP threads don't sleep
// putenv("OMP_WAIT_POLICY=active"); // setenv("OMP_WAIT_POLICY", "active", 0)
//} //}
if (!getenv("KMP_BLOCKTIME")) { if (!getenv("KMP_BLOCKTIME")) {
// set the time to wait before sleeping a thread // set the time to wait before sleeping a thread
// this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases // this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
putenv("KMP_BLOCKTIME=200"); // 200ms #ifdef _WIN32
_putenv_s("KMP_BLOCKTIME", "200"); // 200ms
#else
setenv("KMP_BLOCKTIME", "200", 0); // 200ms
#endif
} }
#endif #endif
} }
......
...@@ -9033,6 +9033,22 @@ void ggml_compute_forward_unary( ...@@ -9033,6 +9033,22 @@ void ggml_compute_forward_unary(
{ {
ggml_compute_forward_exp(params, dst); ggml_compute_forward_exp(params, dst);
} break; } break;
case GGML_UNARY_OP_FLOOR:
{
ggml_compute_forward_floor(params, dst);
} break;
case GGML_UNARY_OP_CEIL:
{
ggml_compute_forward_ceil(params, dst);
} break;
case GGML_UNARY_OP_ROUND:
{
ggml_compute_forward_round(params, dst);
} break;
case GGML_UNARY_OP_TRUNC:
{
ggml_compute_forward_trunc(params, dst);
} break;
case GGML_UNARY_OP_XIELU: case GGML_UNARY_OP_XIELU:
{ {
ggml_compute_forward_xielu(params, dst); ggml_compute_forward_xielu(params, dst);
......
...@@ -73,6 +73,22 @@ static inline float op_log(float x) { ...@@ -73,6 +73,22 @@ static inline float op_log(float x) {
return logf(x); return logf(x);
} }
static inline float op_floor(float x) {
return floorf(x);
}
static inline float op_ceil(float x) {
return ceilf(x);
}
static inline float op_round(float x) {
return roundf(x);
}
static inline float op_trunc(float x) {
return truncf(x);
}
template <float (*op)(float), typename src0_t, typename dst_t> template <float (*op)(float), typename src0_t, typename dst_t>
static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) { static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32; constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
...@@ -274,6 +290,22 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * ...@@ -274,6 +290,22 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor *
unary_op<op_log>(params, dst); unary_op<op_log>(params, dst);
} }
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_floor>(params, dst);
}
void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_ceil>(params, dst);
}
void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_round>(params, dst);
}
void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_trunc>(params, dst);
}
void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) { void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) {
const float alpha_n = ggml_get_op_params_f32(dst, 1); const float alpha_n = ggml_get_op_params_f32(dst, 1);
const float alpha_p = ggml_get_op_params_f32(dst, 2); const float alpha_p = ggml_get_op_params_f32(dst, 2);
......
...@@ -22,6 +22,10 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct ...@@ -22,6 +22,10 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_xielu(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_xielu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment