Unverified Commit 544b6739 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

ggml update to b6840 (#12791)

parent c4ba257c
...@@ -219,7 +219,7 @@ index 41eef3b5f..c81a2e48a 100644 ...@@ -219,7 +219,7 @@ index 41eef3b5f..c81a2e48a 100644
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
index e0abde542..e98044bd8 100644 index 41ff89c4d..2931c15ca 100644
--- a/ggml/src/ggml-cuda/common.cuh --- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh
@@ -35,6 +35,41 @@ @@ -35,6 +35,41 @@
...@@ -274,7 +274,7 @@ index e0abde542..e98044bd8 100644 ...@@ -274,7 +274,7 @@ index e0abde542..e98044bd8 100644
}; };
template<typename T> template<typename T>
@@ -999,11 +1037,11 @@ struct ggml_backend_cuda_context { @@ -992,11 +1030,11 @@ struct ggml_backend_cuda_context {
// pool // pool
std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES]; std::unique_ptr<ggml_cuda_pool> pools[GGML_CUDA_MAX_DEVICES];
...@@ -288,7 +288,7 @@ index e0abde542..e98044bd8 100644 ...@@ -288,7 +288,7 @@ index e0abde542..e98044bd8 100644
} }
return *pools[device]; return *pools[device];
} }
@@ -1011,4 +1049,20 @@ struct ggml_backend_cuda_context { @@ -1004,4 +1042,20 @@ struct ggml_backend_cuda_context {
ggml_cuda_pool & pool() { ggml_cuda_pool & pool() {
return pool(device); return pool(device);
} }
...@@ -310,10 +310,10 @@ index e0abde542..e98044bd8 100644 ...@@ -310,10 +310,10 @@ index e0abde542..e98044bd8 100644
+ } + }
}; };
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index c555cd30f..eb3db0f19 100644 index 02d413467..f79e5d65c 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -350,6 +350,8 @@ const ggml_cuda_device_info & ggml_cuda_info() { @@ -359,6 +359,8 @@ const ggml_cuda_device_info & ggml_cuda_info() {
// #define DEBUG_CUDA_MALLOC // #define DEBUG_CUDA_MALLOC
...@@ -322,7 +322,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -322,7 +322,7 @@ index c555cd30f..eb3db0f19 100644
// buffer pool for cuda (legacy) // buffer pool for cuda (legacy)
struct ggml_cuda_pool_leg : public ggml_cuda_pool { struct ggml_cuda_pool_leg : public ggml_cuda_pool {
static const int MAX_BUFFERS = 256; static const int MAX_BUFFERS = 256;
@@ -362,9 +364,12 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -371,9 +373,12 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
ggml_cuda_buffer buffer_pool[MAX_BUFFERS] = {}; ggml_cuda_buffer buffer_pool[MAX_BUFFERS] = {};
size_t pool_size = 0; size_t pool_size = 0;
...@@ -337,7 +337,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -337,7 +337,7 @@ index c555cd30f..eb3db0f19 100644
} }
~ggml_cuda_pool_leg() { ~ggml_cuda_pool_leg() {
@@ -372,7 +377,9 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -381,7 +386,9 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
for (int i = 0; i < MAX_BUFFERS; ++i) { for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cuda_buffer & b = buffer_pool[i]; ggml_cuda_buffer & b = buffer_pool[i];
if (b.ptr != nullptr) { if (b.ptr != nullptr) {
...@@ -348,7 +348,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -348,7 +348,7 @@ index c555cd30f..eb3db0f19 100644
pool_size -= b.size; pool_size -= b.size;
} }
} }
@@ -420,8 +427,15 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -429,8 +436,15 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
void * ptr; void * ptr;
size_t look_ahead_size = (size_t) (1.05 * size); size_t look_ahead_size = (size_t) (1.05 * size);
look_ahead_size = 256 * ((look_ahead_size + 255)/256); look_ahead_size = 256 * ((look_ahead_size + 255)/256);
...@@ -366,7 +366,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -366,7 +366,7 @@ index c555cd30f..eb3db0f19 100644
*actual_size = look_ahead_size; *actual_size = look_ahead_size;
pool_size += look_ahead_size; pool_size += look_ahead_size;
#ifdef DEBUG_CUDA_MALLOC #ifdef DEBUG_CUDA_MALLOC
@@ -441,10 +455,20 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { @@ -450,10 +464,20 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
} }
} }
GGML_LOG_DEBUG(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n"); GGML_LOG_DEBUG(GGML_CUDA_NAME " buffer pool full, increase MAX_CUDA_BUFFERS\n");
...@@ -389,7 +389,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -389,7 +389,7 @@ index c555cd30f..eb3db0f19 100644
}; };
// pool with virtual memory // pool with virtual memory
@@ -456,18 +480,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -465,18 +489,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
CUdeviceptr pool_addr = 0; CUdeviceptr pool_addr = 0;
size_t pool_used = 0; size_t pool_used = 0;
size_t pool_size = 0; size_t pool_size = 0;
...@@ -417,7 +417,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -417,7 +417,7 @@ index c555cd30f..eb3db0f19 100644
#if defined(GGML_USE_HIP) #if defined(GGML_USE_HIP)
// Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285 // Workaround for https://github.com/ROCm/ROCR-Runtime/issues/285
for (std::pair<CUdeviceptr, size_t> & mapping : mappings) { for (std::pair<CUdeviceptr, size_t> & mapping : mappings) {
@@ -494,35 +524,49 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -503,35 +533,49 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
GGML_ASSERT(pool_size + reserve_size <= CUDA_POOL_VMM_MAX_SIZE); GGML_ASSERT(pool_size + reserve_size <= CUDA_POOL_VMM_MAX_SIZE);
...@@ -493,7 +493,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -493,7 +493,7 @@ index c555cd30f..eb3db0f19 100644
// add to the pool // add to the pool
pool_size += reserve_size; pool_size += reserve_size;
@@ -555,16 +599,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { @@ -564,16 +608,24 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
// all deallocations must be in reverse order of the allocations // all deallocations must be in reverse order of the allocations
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used)); GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
} }
...@@ -521,7 +521,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -521,7 +521,7 @@ index c555cd30f..eb3db0f19 100644
} }
// destroying a cuBLAS handle while a graph is being captured in a different thread can result in a CUDA error // destroying a cuBLAS handle while a graph is being captured in a different thread can result in a CUDA error
@@ -748,11 +800,20 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac @@ -757,11 +809,20 @@ static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffer(ggml_bac
} }
static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
...@@ -543,7 +543,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -543,7 +543,7 @@ index c555cd30f..eb3db0f19 100644
static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
size_t size = ggml_nbytes(tensor); size_t size = ggml_nbytes(tensor);
int64_t ne0 = tensor->ne[0]; int64_t ne0 = tensor->ne[0];
@@ -776,6 +837,7 @@ static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface @@ -785,6 +846,7 @@ static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface
/* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size, /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
/* .is_host = */ NULL, /* .is_host = */ NULL,
...@@ -551,7 +551,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -551,7 +551,7 @@ index c555cd30f..eb3db0f19 100644
}; };
ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) { ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
@@ -3003,6 +3065,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, @@ -2986,6 +3048,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) { bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) {
...@@ -559,7 +559,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -559,7 +559,7 @@ index c555cd30f..eb3db0f19 100644
// flag used to determine whether it is an integrated_gpu // flag used to determine whether it is an integrated_gpu
const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated; const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated;
@@ -3018,6 +3081,11 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx @@ -3001,6 +3064,11 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
continue; continue;
} }
...@@ -571,7 +571,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -571,7 +571,7 @@ index c555cd30f..eb3db0f19 100644
static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr); static bool disable_fusion = (getenv("GGML_CUDA_DISABLE_FUSION") != nullptr);
if (!disable_fusion) { if (!disable_fusion) {
@@ -3144,6 +3212,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx @@ -3140,6 +3208,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, int batch_size) { static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph, int batch_size) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
...@@ -579,7 +579,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -579,7 +579,7 @@ index c555cd30f..eb3db0f19 100644
ggml_cuda_set_device(cuda_ctx->device); ggml_cuda_set_device(cuda_ctx->device);
@@ -3223,6 +3292,71 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, @@ -3215,6 +3284,71 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
return GGML_STATUS_SUCCESS; return GGML_STATUS_SUCCESS;
} }
...@@ -651,7 +651,7 @@ index c555cd30f..eb3db0f19 100644 ...@@ -651,7 +651,7 @@ index c555cd30f..eb3db0f19 100644
static void ggml_backend_cuda_event_record(ggml_backend_t backend, ggml_backend_event_t event) { static void ggml_backend_cuda_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
@@ -3263,6 +3397,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = { @@ -3255,6 +3389,9 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
/* .event_record = */ ggml_backend_cuda_event_record, /* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait, /* .event_wait = */ ggml_backend_cuda_event_wait,
/* .graph_optimize = */ NULL, /* .graph_optimize = */ NULL,
......
...@@ -8,7 +8,7 @@ Subject: [PATCH] decode: disable output_all ...@@ -8,7 +8,7 @@ Subject: [PATCH] decode: disable output_all
1 file changed, 1 insertion(+), 2 deletions(-) 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index e7526e7d..53a5e3a9 100644 index bd348bcad..8b4a89d38 100644
--- a/src/llama-context.cpp --- a/src/llama-context.cpp
+++ b/src/llama-context.cpp +++ b/src/llama-context.cpp
@@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) { @@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
......
...@@ -16,7 +16,7 @@ unused then it can be reset to free these data structures. ...@@ -16,7 +16,7 @@ unused then it can be reset to free these data structures.
6 files changed, 32 insertions(+), 2 deletions(-) 6 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 1ff53ed03..ba181d09d 100644 index b3b5b356a..69223c488 100644
--- a/ggml/include/ggml-backend.h --- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h
@@ -178,6 +178,7 @@ extern "C" { @@ -178,6 +178,7 @@ extern "C" {
...@@ -28,7 +28,7 @@ index 1ff53ed03..ba181d09d 100644 ...@@ -28,7 +28,7 @@ index 1ff53ed03..ba181d09d 100644
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device); GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size); GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
index 3c3f22fc0..43c91d9f2 100644 index 7bdf9d81f..21b35ac5c 100644
--- a/ggml/src/ggml-backend-impl.h --- a/ggml/src/ggml-backend-impl.h
+++ b/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h
@@ -195,6 +195,10 @@ extern "C" { @@ -195,6 +195,10 @@ extern "C" {
...@@ -43,7 +43,7 @@ index 3c3f22fc0..43c91d9f2 100644 ...@@ -43,7 +43,7 @@ index 3c3f22fc0..43c91d9f2 100644
struct ggml_backend_device { struct ggml_backend_device {
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 6ef5eeafa..0b757af59 100644 index c81a2e48a..9b0a9b91f 100644
--- a/ggml/src/ggml-backend.cpp --- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp
@@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par @@ -526,6 +526,14 @@ ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * par
...@@ -62,7 +62,7 @@ index 6ef5eeafa..0b757af59 100644 ...@@ -62,7 +62,7 @@ index 6ef5eeafa..0b757af59 100644
GGML_ASSERT(device); GGML_ASSERT(device);
return device->iface.get_buffer_type(device); return device->iface.get_buffer_type(device);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 811462c79..87c6c34a4 100644 index f79e5d65c..c9333689f 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -107,6 +107,11 @@ int ggml_cuda_get_device() { @@ -107,6 +107,11 @@ int ggml_cuda_get_device() {
...@@ -77,7 +77,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -77,7 +77,7 @@ index 811462c79..87c6c34a4 100644
static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) { static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) {
ggml_cuda_set_device(device); ggml_cuda_set_device(device);
cudaError_t err; cudaError_t err;
@@ -3515,7 +3520,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back @@ -3499,7 +3504,10 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
props->id = ggml_backend_cuda_device_get_id(dev); props->id = ggml_backend_cuda_device_get_id(dev);
props->type = ggml_backend_cuda_device_get_type(dev); props->type = ggml_backend_cuda_device_get_type(dev);
props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str(); props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
...@@ -89,7 +89,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -89,7 +89,7 @@ index 811462c79..87c6c34a4 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr; bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY #ifdef GGML_CUDA_NO_PEER_COPY
@@ -3948,6 +3956,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g @@ -3936,6 +3944,11 @@ static void ggml_backend_cuda_device_event_synchronize(ggml_backend_dev_t dev, g
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context)); CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
} }
...@@ -101,7 +101,7 @@ index 811462c79..87c6c34a4 100644 ...@@ -101,7 +101,7 @@ index 811462c79..87c6c34a4 100644
static const ggml_backend_device_i ggml_backend_cuda_device_interface = { static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .get_name = */ ggml_backend_cuda_device_get_name, /* .get_name = */ ggml_backend_cuda_device_get_name,
/* .get_description = */ ggml_backend_cuda_device_get_description, /* .get_description = */ ggml_backend_cuda_device_get_description,
@@ -3964,6 +3977,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = { @@ -3952,6 +3965,7 @@ static const ggml_backend_device_i ggml_backend_cuda_device_interface = {
/* .event_new = */ ggml_backend_cuda_device_event_new, /* .event_new = */ ggml_backend_cuda_device_event_new,
/* .event_free = */ ggml_backend_cuda_device_event_free, /* .event_free = */ ggml_backend_cuda_device_event_free,
/* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize, /* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize,
...@@ -122,10 +122,10 @@ index 890c10364..1f06be80e 100644 ...@@ -122,10 +122,10 @@ index 890c10364..1f06be80e 100644
#define cudaError_t hipError_t #define cudaError_t hipError_t
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
diff --git a/src/llama.cpp b/src/llama.cpp diff --git a/src/llama.cpp b/src/llama.cpp
index fe5a7a835..d821a96a0 100644 index ab2e9868a..74c49e651 100644
--- a/src/llama.cpp --- a/src/llama.cpp
+++ b/src/llama.cpp +++ b/src/llama.cpp
@@ -267,10 +267,12 @@ static struct llama_model * llama_model_load_from_file_impl( @@ -270,10 +270,12 @@ static struct llama_model * llama_model_load_from_file_impl(
for (auto * dev : model->devices) { for (auto * dev : model->devices) {
ggml_backend_dev_props props; ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props); ggml_backend_dev_get_props(dev, &props);
......
...@@ -8,7 +8,7 @@ Subject: [PATCH] harden uncaught exception registration ...@@ -8,7 +8,7 @@ Subject: [PATCH] harden uncaught exception registration
1 file changed, 6 insertions(+), 2 deletions(-) 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/ggml/src/ggml.cpp b/ggml/src/ggml.cpp diff --git a/ggml/src/ggml.cpp b/ggml/src/ggml.cpp
index 0d388d45..f5bcb446 100644 index 0d388d455..f5bcb446d 100644
--- a/ggml/src/ggml.cpp --- a/ggml/src/ggml.cpp
+++ b/ggml/src/ggml.cpp +++ b/ggml/src/ggml.cpp
@@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{ @@ -19,8 +19,12 @@ static bool ggml_uncaught_exception_init = []{
......
...@@ -45,7 +45,7 @@ index 69223c488..6510e0cba 100644 ...@@ -45,7 +45,7 @@ index 69223c488..6510e0cba 100644
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device); GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 0609c6503..aefe43bdd 100644 index f9a6587f1..03f359ae9 100644
--- a/ggml/src/CMakeLists.txt --- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt
@@ -209,6 +209,8 @@ add_library(ggml-base @@ -209,6 +209,8 @@ add_library(ggml-base
...@@ -58,7 +58,7 @@ index 0609c6503..aefe43bdd 100644 ...@@ -58,7 +58,7 @@ index 0609c6503..aefe43bdd 100644
target_include_directories(ggml-base PRIVATE .) target_include_directories(ggml-base PRIVATE .)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 5787e8cd5..d232bf828 100644 index c9333689f..41b00af83 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() { @@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
...@@ -90,7 +90,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -90,7 +90,7 @@ index 5787e8cd5..d232bf828 100644
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n", GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
ggml_cuda_parse_uuid(prop, id).c_str()); ggml_cuda_parse_uuid(prop, id).c_str());
@@ -3476,6 +3491,11 @@ struct ggml_backend_cuda_device_context { @@ -3468,6 +3483,11 @@ struct ggml_backend_cuda_device_context {
std::string description; std::string description;
std::string pci_bus_id; std::string pci_bus_id;
std::string id; std::string id;
...@@ -102,7 +102,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -102,7 +102,7 @@ index 5787e8cd5..d232bf828 100644
}; };
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -3496,6 +3516,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) { @@ -3488,6 +3508,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
ggml_cuda_set_device(ctx->device); ggml_cuda_set_device(ctx->device);
...@@ -131,7 +131,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -131,7 +131,7 @@ index 5787e8cd5..d232bf828 100644
CUDA_CHECK(cudaMemGetInfo(free, total)); CUDA_CHECK(cudaMemGetInfo(free, total));
} }
@@ -3504,6 +3546,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend @@ -3496,6 +3538,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
return GGML_BACKEND_DEVICE_TYPE_GPU; return GGML_BACKEND_DEVICE_TYPE_GPU;
} }
...@@ -139,7 +139,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -139,7 +139,7 @@ index 5787e8cd5..d232bf828 100644
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context; ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
@@ -3517,6 +3560,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back @@ -3509,6 +3552,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
// If you need the memory data, call ggml_backend_dev_memory() explicitly. // If you need the memory data, call ggml_backend_dev_memory() explicitly.
props->memory_total = props->memory_free = 0; props->memory_total = props->memory_free = 0;
...@@ -159,7 +159,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -159,7 +159,7 @@ index 5787e8cd5..d232bf828 100644
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr; bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
#ifdef GGML_CUDA_NO_PEER_COPY #ifdef GGML_CUDA_NO_PEER_COPY
bool events = false; bool events = false;
@@ -4079,6 +4135,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { @@ -4075,6 +4131,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
if (!initialized) { if (!initialized) {
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context; ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
...@@ -167,7 +167,7 @@ index 5787e8cd5..d232bf828 100644 ...@@ -167,7 +167,7 @@ index 5787e8cd5..d232bf828 100644
for (int i = 0; i < ggml_cuda_info().device_count; i++) { for (int i = 0; i < ggml_cuda_info().device_count; i++) {
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context; ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
@@ -4094,6 +4151,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() { @@ -4090,6 +4147,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID); snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
dev_ctx->pci_bus_id = pci_bus_id; dev_ctx->pci_bus_id = pci_bus_id;
...@@ -204,11 +204,11 @@ index 1f06be80e..2f9ef2dc0 100644 ...@@ -204,11 +204,11 @@ index 1f06be80e..2f9ef2dc0 100644
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index d0fb3bcca..b63edd0c1 100644 index e9201cdc6..44ae76d66 100644
--- a/ggml/src/ggml-impl.h --- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h
@@ -638,6 +638,14 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx @@ -677,6 +677,14 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops); return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
} }
+// Management libraries for fetching more accurate free VRAM data +// Management libraries for fetching more accurate free VRAM data
...@@ -243,10 +243,10 @@ index 05ff6a5a6..032dee76d 100644 ...@@ -243,10 +243,10 @@ index 05ff6a5a6..032dee76d 100644
/* .async = */ true, /* .async = */ true,
/* .host_buffer = */ false, /* .host_buffer = */ false,
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index bd3ece516..7cfb14a54 100644 index 3a6bbe564..d2c278a35 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -231,6 +231,7 @@ class vk_memory_logger; @@ -229,6 +229,7 @@ class vk_memory_logger;
#endif #endif
class vk_perf_logger; class vk_perf_logger;
static void ggml_vk_destroy_buffer(vk_buffer& buf); static void ggml_vk_destroy_buffer(vk_buffer& buf);
...@@ -254,7 +254,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -254,7 +254,7 @@ index bd3ece516..7cfb14a54 100644
static constexpr uint32_t mul_mat_vec_max_cols = 8; static constexpr uint32_t mul_mat_vec_max_cols = 8;
static constexpr uint32_t p021_max_gqa_ratio = 8; static constexpr uint32_t p021_max_gqa_ratio = 8;
@@ -11585,6 +11586,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_ @@ -11813,6 +11814,29 @@ static void ggml_vk_get_device_description(int device, char * description, size_
snprintf(description, description_size, "%s", props.deviceName.data()); snprintf(description, description_size, "%s", props.deviceName.data());
} }
...@@ -284,7 +284,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -284,7 +284,7 @@ index bd3ece516..7cfb14a54 100644
// backend interface // backend interface
#define UNUSED GGML_UNUSED #define UNUSED GGML_UNUSED
@@ -12392,31 +12416,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size @@ -12761,31 +12785,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
ggml_vk_get_device_description(dev_idx, description, description_size); ggml_vk_get_device_description(dev_idx, description, description_size);
} }
...@@ -404,7 +404,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -404,7 +404,7 @@ index bd3ece516..7cfb14a54 100644
break; break;
} }
} }
@@ -12449,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) { @@ -12818,8 +12913,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
} }
} }
...@@ -419,7 +419,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -419,7 +419,7 @@ index bd3ece516..7cfb14a54 100644
} }
vk::PhysicalDeviceProperties2 props = {}; vk::PhysicalDeviceProperties2 props = {};
@@ -12467,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) { @@ -12836,19 +12936,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
char pci_bus_id[16] = {}; char pci_bus_id[16] = {};
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function); snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
...@@ -453,7 +453,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -453,7 +453,7 @@ index bd3ece516..7cfb14a54 100644
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
@@ -12491,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de @@ -12860,9 +12965,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
return ctx->description.c_str(); return ctx->description.c_str();
} }
...@@ -469,7 +469,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -469,7 +469,7 @@ index bd3ece516..7cfb14a54 100644
} }
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) { static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
@@ -12517,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml @@ -12886,8 +12996,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
props->name = ggml_backend_vk_device_get_name(dev); props->name = ggml_backend_vk_device_get_name(dev);
props->description = ggml_backend_vk_device_get_description(dev); props->description = ggml_backend_vk_device_get_description(dev);
...@@ -480,7 +480,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -480,7 +480,7 @@ index bd3ece516..7cfb14a54 100644
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total); ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = { props->caps = {
/* .async = */ false, /* .async = */ false,
@@ -12526,6 +12637,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml @@ -12895,6 +13006,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
/* .buffer_from_host_ptr = */ false, /* .buffer_from_host_ptr = */ false,
/* .events = */ false, /* .events = */ false,
}; };
...@@ -494,7 +494,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -494,7 +494,7 @@ index bd3ece516..7cfb14a54 100644
} }
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) { static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -12954,6 +13072,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13365,6 +13483,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
static std::mutex mutex; static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
if (!initialized) { if (!initialized) {
...@@ -503,7 +503,7 @@ index bd3ece516..7cfb14a54 100644 ...@@ -503,7 +503,7 @@ index bd3ece516..7cfb14a54 100644
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) { for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context; ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
char desc[256]; char desc[256];
@@ -12962,12 +13082,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13373,12 +13493,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
ctx->name = GGML_VK_NAME + std::to_string(i); ctx->name = GGML_VK_NAME + std::to_string(i);
ctx->description = desc; ctx->description = desc;
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu; ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
......
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Julius Tischbein <ju.tischbein@gmail.com>
Date: Wed, 15 Oct 2025 13:54:15 +0200
Subject: [PATCH] CUDA: Changing the CUDA scheduling strategy to spin (#16585)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* CUDA set scheduling strategy to spinning for cc121
* Using prop.major and prop.minor, include HIP and MUSA
* Exclude HIP and MUSA
* Remove trailing whitespace
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
* Remove empty line
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---------
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---
ggml/src/ggml-cuda/ggml-cuda.cu | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index b075a18be..d62f412d6 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
} else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") {
turing_devices_without_mma.push_back({ id, device_name });
}
+
+ // Temporary performance fix:
+ // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls.
+ // TODO: Check for future drivers the default scheduling strategy and
+ // remove this call again when cudaDeviceScheduleSpin is default.
+ if (prop.major == 12 && prop.minor == 1) {
+ CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin));
+ }
+
#endif // defined(GGML_USE_HIP)
}
...@@ -8,10 +8,10 @@ Subject: [PATCH] report LoadLibrary failures ...@@ -8,10 +8,10 @@ Subject: [PATCH] report LoadLibrary failures
1 file changed, 12 insertions(+) 1 file changed, 12 insertions(+)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index f794d9cfa..3a855ab2e 100644 index a55d9b280..ec6f7f1e9 100644
--- a/ggml/src/ggml-backend-reg.cpp --- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp
@@ -118,6 +118,18 @@ static dl_handle * dl_load_library(const fs::path & path) { @@ -122,6 +122,18 @@ static dl_handle * dl_load_library(const fs::path & path) {
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
HMODULE handle = LoadLibraryW(path.wstring().c_str()); HMODULE handle = LoadLibraryW(path.wstring().c_str());
......
...@@ -13,7 +13,7 @@ interleaved version used for qwen3vl ...@@ -13,7 +13,7 @@ interleaved version used for qwen3vl
4 files changed, 11 insertions(+), 30 deletions(-) 4 files changed, 11 insertions(+), 30 deletions(-)
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index 31478dd8e..4d1ed207e 100644 index 902fdad69..70955347d 100644
--- a/ggml/src/ggml-cpu/ops.cpp --- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp
@@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init( @@ -5509,15 +5509,12 @@ static void ggml_mrope_cache_init(
...@@ -62,10 +62,10 @@ index d058504cd..287fe9d2c 100644 ...@@ -62,10 +62,10 @@ index d058504cd..287fe9d2c 100644
const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f; const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f;
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index 375a0c7fd..9866c96b4 100644 index 50b8071de..65a3183c8 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal --- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -3858,15 +3858,11 @@ kernel void kernel_rope_multi( @@ -3888,15 +3888,11 @@ kernel void kernel_rope_multi(
const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2 const int sec_w012 = args.sect_0 + args.sect_1 + args.sect_2; // end of section 2
const int sector = ic % sect_dims; const int sector = ic % sect_dims;
......
...@@ -12,7 +12,7 @@ Subject: [PATCH] Add memory detection using DXGI + PDH ...@@ -12,7 +12,7 @@ Subject: [PATCH] Add memory detection using DXGI + PDH
create mode 100644 ggml/src/mem_dxgi_pdh.cpp create mode 100644 ggml/src/mem_dxgi_pdh.cpp
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index aefe43bdd..21fe4640c 100644 index 03f359ae9..4b3e5efb5 100644
--- a/ggml/src/CMakeLists.txt --- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt
@@ -211,6 +211,7 @@ add_library(ggml-base @@ -211,6 +211,7 @@ add_library(ggml-base
...@@ -24,10 +24,10 @@ index aefe43bdd..21fe4640c 100644 ...@@ -24,10 +24,10 @@ index aefe43bdd..21fe4640c 100644
target_include_directories(ggml-base PRIVATE .) target_include_directories(ggml-base PRIVATE .)
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index b63edd0c1..81cad8cf3 100644 index 44ae76d66..639d551a2 100644
--- a/ggml/src/ggml-impl.h --- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h
@@ -645,6 +645,9 @@ GGML_API void ggml_nvml_release(); @@ -684,6 +684,9 @@ GGML_API void ggml_nvml_release();
GGML_API int ggml_hip_mgmt_init(); GGML_API int ggml_hip_mgmt_init();
GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total); GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
GGML_API void ggml_hip_mgmt_release(); GGML_API void ggml_hip_mgmt_release();
...@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644 ...@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644
#ifdef __cplusplus #ifdef __cplusplus
} }
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 7cfb14a54..a1c46d0b3 100644 index d2c278a35..221e29509 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher(); @@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
...@@ -49,7 +49,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -49,7 +49,7 @@ index 7cfb14a54..a1c46d0b3 100644
typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR { typedef struct VkPhysicalDeviceShaderBfloat16FeaturesKHR {
VkStructureType sType; VkStructureType sType;
@@ -12433,6 +12434,7 @@ struct ggml_backend_vk_device_context { @@ -12802,6 +12803,7 @@ struct ggml_backend_vk_device_context {
std::string pci_id; std::string pci_id;
std::string id; std::string id;
std::string uuid; std::string uuid;
...@@ -57,7 +57,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -57,7 +57,7 @@ index 7cfb14a54..a1c46d0b3 100644
int major; int major;
int minor; int minor;
int driver_major; int driver_major;
@@ -12448,8 +12450,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size @@ -12817,8 +12819,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties(); vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
vk::PhysicalDeviceProperties2 props2; vk::PhysicalDeviceProperties2 props2;
vkdev.getProperties2(&props2); vkdev.getProperties2(&props2);
...@@ -81,7 +81,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -81,7 +81,7 @@ index 7cfb14a54..a1c46d0b3 100644
{ {
// Use vendor specific management libraries for best VRAM reporting if available // Use vendor specific management libraries for best VRAM reporting if available
switch (props2.properties.vendorID) { switch (props2.properties.vendorID) {
@@ -12477,8 +12493,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size @@ -12846,8 +12862,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
break; break;
} }
} }
...@@ -91,7 +91,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -91,7 +91,7 @@ index 7cfb14a54..a1c46d0b3 100644
*total = 0; *total = 0;
*free = 0; *free = 0;
vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props; vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
@@ -13089,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13500,7 +13516,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
/* .reg = */ reg, /* .reg = */ reg,
/* .context = */ ctx, /* .context = */ ctx,
}); });
...@@ -99,7 +99,7 @@ index 7cfb14a54..a1c46d0b3 100644 ...@@ -99,7 +99,7 @@ index 7cfb14a54..a1c46d0b3 100644
// Gather additional information about the device // Gather additional information about the device
int dev_idx = vk_instance.device_indices[i]; int dev_idx = vk_instance.device_indices[i];
vk::PhysicalDeviceProperties props1; vk::PhysicalDeviceProperties props1;
@@ -13112,6 +13127,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg, @@ -13523,6 +13538,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
} }
} }
ctx->uuid = oss.str(); ctx->uuid = oss.str();
......
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(void);
GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
#ifdef __cplusplus
}
#endif
...@@ -21,8 +21,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c ...@@ -21,8 +21,7 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const c
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total); GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir, GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
size_t n_threads, size_t n_devices, size_t n_threads, size_t n_devices, ggml_backend_dev_t * devices);
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);
......
...@@ -577,6 +577,10 @@ extern "C" { ...@@ -577,6 +577,10 @@ extern "C" {
GGML_UNARY_OP_EXP, GGML_UNARY_OP_EXP,
GGML_UNARY_OP_GELU_ERF, GGML_UNARY_OP_GELU_ERF,
GGML_UNARY_OP_XIELU, GGML_UNARY_OP_XIELU,
GGML_UNARY_OP_FLOOR,
GGML_UNARY_OP_CEIL,
GGML_UNARY_OP_ROUND,
GGML_UNARY_OP_TRUNC,
GGML_UNARY_OP_COUNT, GGML_UNARY_OP_COUNT,
}; };
...@@ -1151,6 +1155,46 @@ extern "C" { ...@@ -1151,6 +1155,46 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a); struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_floor(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_floor_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_ceil(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_ceil_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_round(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_round_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
/**
* Truncates the fractional part of each element in the tensor (towards zero).
* For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
* Similar to std::trunc in C/C++.
*/
GGML_API struct ggml_tensor * ggml_trunc(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_trunc_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
// xIELU activation function // xIELU activation function
// x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0) // x = x * (c_a(alpha_n) + c_b(alpha_p, beta) * sigmoid(beta * x)) + eps * (x > 0)
// where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions // where c_a = softplus and c_b(a, b) = softplus(a) + b are constraining functions
......
...@@ -310,6 +310,10 @@ function(ggml_add_cpu_backend_variant tag_name) ...@@ -310,6 +310,10 @@ function(ggml_add_cpu_backend_variant tag_name)
foreach (feat ${ARGN}) foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON) set(GGML_INTERNAL_${feat} ON)
endforeach() endforeach()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON)
endforeach()
endif() endif()
ggml_add_cpu_backend_variant_impl(${tag_name}) ggml_add_cpu_backend_variant_impl(${tag_name})
...@@ -372,6 +376,14 @@ if (GGML_CPU_ALL_VARIANTS) ...@@ -372,6 +376,14 @@ if (GGML_CPU_ALL_VARIANTS)
else() else()
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
endif() endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
ggml_add_cpu_backend_variant(s390x_z15 Z15 VXE)
# ggml_add_cpu_backend_variant(s390x_z16 Z16 VXE)
# ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE)
else()
message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
endif()
else() else()
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
endif() endif()
...@@ -391,6 +403,7 @@ ggml_add_backend(Vulkan) ...@@ -391,6 +403,7 @@ ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU) ggml_add_backend(WebGPU)
ggml_add_backend(zDNN) ggml_add_backend(zDNN)
ggml_add_backend(OpenCL) ggml_add_backend(OpenCL)
ggml_add_backend(Hexagon)
foreach (target ggml-base ggml) foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>) target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
......
...@@ -603,6 +603,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -603,6 +603,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated; return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
} }
// free the extra space at the end if the new tensor is smaller
static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_tensor * node, struct ggml_tensor * parent) {
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent);
size_t parent_size = ggml_backend_buft_get_alloc_size(galloc->bufts[p_hn->buffer_id], parent);
size_t node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
GGML_ASSERT(parent_size >= node_size);
if (parent_size > node_size) {
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
struct buffer_address p_addr = p_hn->addr;
p_addr.offset += node_size;
size_t extra_size = parent_size - node_size;
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
}
}
static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) { static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
GGML_ASSERT(buffer_id >= 0); GGML_ASSERT(buffer_id >= 0);
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node); struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
...@@ -648,6 +668,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -648,6 +668,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
hn->addr = p_hn->addr; hn->addr = p_hn->addr;
p_hn->allocated = false; // avoid freeing the parent p_hn->allocated = false; // avoid freeing the parent
view_src_hn->allocated = false; view_src_hn->allocated = false;
ggml_gallocr_free_extra_space(galloc, node, view_src);
return; return;
} }
} else { } else {
...@@ -655,6 +676,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor ...@@ -655,6 +676,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
hn->buffer_id = p_hn->buffer_id; hn->buffer_id = p_hn->buffer_id;
hn->addr = p_hn->addr; hn->addr = p_hn->addr;
p_hn->allocated = false; // avoid freeing the parent p_hn->allocated = false; // avoid freeing the parent
ggml_gallocr_free_extra_space(galloc, node, parent);
return; return;
} }
} }
......
...@@ -57,6 +57,10 @@ ...@@ -57,6 +57,10 @@
#include "ggml-opencl.h" #include "ggml-opencl.h"
#endif #endif
#ifdef GGML_USE_HEXAGON
#include "ggml-hexagon.h"
#endif
#ifdef GGML_USE_BLAS #ifdef GGML_USE_BLAS
#include "ggml-blas.h" #include "ggml-blas.h"
#endif #endif
...@@ -211,6 +215,9 @@ struct ggml_backend_registry { ...@@ -211,6 +215,9 @@ struct ggml_backend_registry {
#ifdef GGML_USE_OPENCL #ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg()); register_backend(ggml_backend_opencl_reg());
#endif #endif
#ifdef GGML_USE_HEXAGON
register_backend(ggml_backend_hexagon_reg());
#endif
#ifdef GGML_USE_CANN #ifdef GGML_USE_CANN
register_backend(ggml_backend_cann_reg()); register_backend(ggml_backend_cann_reg());
#endif #endif
...@@ -615,6 +622,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ...@@ -615,6 +622,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path); ggml_backend_load_best("cpu", silent, dir_path);
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
......
...@@ -466,29 +466,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name) ...@@ -466,29 +466,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d) list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
message(STATUS "s390x detected") message(STATUS "s390x detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c) list(APPEND GGML_CPU_SOURCES
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) ggml-cpu/arch/s390/quants.c)
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
# for native compilation
# TODO: Separation to determine activation of VX/VXE/VXE2 if (GGML_NATIVE)
if (${S390X_M} MATCHES "8561|8562") # check machine level to determine target
message(STATUS "z15 target") file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
list(APPEND ARCH_FLAGS -march=z15) string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
elseif (${S390X_M} MATCHES "3931")
message(STATUS "z16 target") # TODO: Separation to determine activation of VX/VXE/VXE2
list(APPEND ARCH_FLAGS -march=z16) if (${S390X_M} MATCHES "8561|8562")
elseif (${S390X_M} MATCHES "9175|9176") message(STATUS "z15 target")
# NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. list(APPEND ARCH_FLAGS -march=z15)
# binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. elseif (${S390X_M} MATCHES "3931")
message(STATUS "z17 target") message(STATUS "z16 target")
list(APPEND ARCH_FLAGS -march=arch15) list(APPEND ARCH_FLAGS -march=z16)
else() elseif (${S390X_M} MATCHES "9175|9176")
message(STATUS "Unknown target") # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
list(APPEND ARCH_FLAGS -march=native -mtune=native) message(STATUS "z17 target")
list(APPEND ARCH_FLAGS -march=arch15)
else()
message(STATUS "Unknown target")
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
list(APPEND ARCH_FLAGS -march=native -mtune=native)
endif()
# for cross-compilation
elseif(GGML_CPU_ALL_VARIANTS)
# range through IBM z15 to z17
# NOTE: update when a new hardware level is released
foreach (ZHW RANGE 15 17)
if(DEFINED GGML_INTERNAL_Z${ZHW})
message(STATUS "z${ZHW} cross-compile target")
list(APPEND ARCH_FLAGS -march=z${ZHW})
endif()
endforeach()
endif() endif()
if (GGML_VXE) if (GGML_VXE OR GGML_INTERNAL_VXE)
message(STATUS "VX/VXE/VXE2 enabled") message(STATUS "VX/VXE/VXE2 enabled")
list(APPEND ARCH_FLAGS -mvx -mzvector) list(APPEND ARCH_FLAGS -mvx -mzvector)
list(APPEND ARCH_DEFINITIONS GGML_VXE) list(APPEND ARCH_DEFINITIONS GGML_VXE)
......
...@@ -2186,6 +2186,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { ...@@ -2186,6 +2186,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_HARDSWISH:
case GGML_UNARY_OP_HARDSIGMOID: case GGML_UNARY_OP_HARDSIGMOID:
case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_EXP:
case GGML_UNARY_OP_FLOOR:
case GGML_UNARY_OP_CEIL:
case GGML_UNARY_OP_ROUND:
case GGML_UNARY_OP_TRUNC:
{ {
n_tasks = 1; n_tasks = 1;
} break; } break;
...@@ -3569,13 +3573,17 @@ void ggml_cpu_init(void) { ...@@ -3569,13 +3573,17 @@ void ggml_cpu_init(void) {
#ifdef GGML_USE_OPENMP #ifdef GGML_USE_OPENMP
//if (!getenv("OMP_WAIT_POLICY")) { //if (!getenv("OMP_WAIT_POLICY")) {
// // set the wait policy to active, so that OpenMP threads don't sleep // // set the wait policy to active, so that OpenMP threads don't sleep
// putenv("OMP_WAIT_POLICY=active"); // setenv("OMP_WAIT_POLICY", "active", 0)
//} //}
if (!getenv("KMP_BLOCKTIME")) { if (!getenv("KMP_BLOCKTIME")) {
// set the time to wait before sleeping a thread // set the time to wait before sleeping a thread
// this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases // this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases
putenv("KMP_BLOCKTIME=200"); // 200ms #ifdef _WIN32
_putenv_s("KMP_BLOCKTIME", "200"); // 200ms
#else
setenv("KMP_BLOCKTIME", "200", 0); // 200ms
#endif
} }
#endif #endif
} }
......
...@@ -9033,6 +9033,22 @@ void ggml_compute_forward_unary( ...@@ -9033,6 +9033,22 @@ void ggml_compute_forward_unary(
{ {
ggml_compute_forward_exp(params, dst); ggml_compute_forward_exp(params, dst);
} break; } break;
case GGML_UNARY_OP_FLOOR:
{
ggml_compute_forward_floor(params, dst);
} break;
case GGML_UNARY_OP_CEIL:
{
ggml_compute_forward_ceil(params, dst);
} break;
case GGML_UNARY_OP_ROUND:
{
ggml_compute_forward_round(params, dst);
} break;
case GGML_UNARY_OP_TRUNC:
{
ggml_compute_forward_trunc(params, dst);
} break;
case GGML_UNARY_OP_XIELU: case GGML_UNARY_OP_XIELU:
{ {
ggml_compute_forward_xielu(params, dst); ggml_compute_forward_xielu(params, dst);
......
...@@ -73,6 +73,22 @@ static inline float op_log(float x) { ...@@ -73,6 +73,22 @@ static inline float op_log(float x) {
return logf(x); return logf(x);
} }
static inline float op_floor(float x) {
return floorf(x);
}
static inline float op_ceil(float x) {
return ceilf(x);
}
static inline float op_round(float x) {
return roundf(x);
}
static inline float op_trunc(float x) {
return truncf(x);
}
template <float (*op)(float), typename src0_t, typename dst_t> template <float (*op)(float), typename src0_t, typename dst_t>
static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) { static inline void vec_unary_op(int64_t n, dst_t * y, const src0_t * x) {
constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32; constexpr auto src0_to_f32 = type_conversion_table<src0_t>::to_f32;
...@@ -274,6 +290,22 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor * ...@@ -274,6 +290,22 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor *
unary_op<op_log>(params, dst); unary_op<op_log>(params, dst);
} }
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_floor>(params, dst);
}
void ggml_compute_forward_ceil(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_ceil>(params, dst);
}
void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_round>(params, dst);
}
void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) {
unary_op<op_trunc>(params, dst);
}
void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) { void ggml_compute_forward_xielu(const ggml_compute_params * params, ggml_tensor * dst) {
const float alpha_n = ggml_get_op_params_f32(dst, 1); const float alpha_n = ggml_get_op_params_f32(dst, 1);
const float alpha_p = ggml_get_op_params_f32(dst, 2); const float alpha_p = ggml_get_op_params_f32(dst, 2);
......
...@@ -22,6 +22,10 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct ...@@ -22,6 +22,10 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_xielu(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_xielu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment