Unverified Commit e9e5f61c authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

llama: update to commit 2016f07b (#10352)

parent 11dde418
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Wed, 9 Oct 2024 17:26:23 -0700
Subject: [PATCH] conditional-fattn
---
ggml/src/ggml-cuda/ggml-cuda.cu | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 59a49560..b70c6a32 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2338,9 +2338,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_ARGSORT:
ggml_cuda_op_argsort(ctx, dst);
break;
+#if !defined(GGML_DISABLE_FLASH_ATTN)
case GGML_OP_FLASH_ATTN_EXT:
ggml_cuda_flash_attn_ext(ctx, dst);
break;
+#endif
case GGML_OP_CROSS_ENTROPY_LOSS:
ggml_cuda_cross_entropy_loss(ctx, dst);
break;
...@@ -147,10 +147,10 @@ index 410a3720..3eca1cf8 100644 ...@@ -147,10 +147,10 @@ index 410a3720..3eca1cf8 100644
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index b70c6a32..67208cba 100644 index 31750b6f..0fef9522 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu --- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2245,6 +2245,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg @@ -2246,6 +2246,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_PAD: case GGML_OP_PAD:
ggml_cuda_op_pad(ctx, dst); ggml_cuda_op_pad(ctx, dst);
break; break;
...@@ -160,7 +160,7 @@ index b70c6a32..67208cba 100644 ...@@ -160,7 +160,7 @@ index b70c6a32..67208cba 100644
case GGML_OP_ARANGE: case GGML_OP_ARANGE:
ggml_cuda_op_arange(ctx, dst); ggml_cuda_op_arange(ctx, dst);
break; break;
@@ -3223,6 +3226,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g @@ -3222,6 +3225,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_UPSCALE: case GGML_OP_UPSCALE:
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST; return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
case GGML_OP_PAD: case GGML_OP_PAD:
...@@ -233,7 +233,7 @@ index 8fd386b0..e2ededc3 100644 ...@@ -233,7 +233,7 @@ index 8fd386b0..e2ededc3 100644
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index 310afe8a..b121ab9e 100644 index 12886cd3..b2e95a66 100644
--- a/ggml/src/ggml-metal/ggml-metal.m --- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -341,6 +341,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte @@ -341,6 +341,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
...@@ -244,7 +244,7 @@ index 310afe8a..b121ab9e 100644 ...@@ -244,7 +244,7 @@ index 310afe8a..b121ab9e 100644
GGML_METAL_KERNEL_TYPE_ARANGE_F32, GGML_METAL_KERNEL_TYPE_ARANGE_F32,
GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32,
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC,
@@ -998,6 +999,7 @@ @implementation GGMLMetalClass @@ -1020,6 +1021,7 @@ @implementation GGMLMetalClass
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32, pad_reflect_1d_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32, pad_reflect_1d_f32, true);
...@@ -252,7 +252,7 @@ index 310afe8a..b121ab9e 100644 ...@@ -252,7 +252,7 @@ index 310afe8a..b121ab9e 100644
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, timestep_embedding_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, timestep_embedding_f32, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARANGE_F32, arange_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARANGE_F32, arange_f32, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, argsort_f32_i32_asc, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, argsort_f32_i32_asc, true);
@@ -1339,6 +1341,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex @@ -1384,6 +1386,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
case GGML_OP_POOL_2D: case GGML_OP_POOL_2D:
case GGML_OP_PAD: case GGML_OP_PAD:
case GGML_OP_PAD_REFLECT_1D: case GGML_OP_PAD_REFLECT_1D:
...@@ -260,7 +260,7 @@ index 310afe8a..b121ab9e 100644 ...@@ -260,7 +260,7 @@ index 310afe8a..b121ab9e 100644
case GGML_OP_TIMESTEP_EMBEDDING: case GGML_OP_TIMESTEP_EMBEDDING:
case GGML_OP_ARGSORT: case GGML_OP_ARGSORT:
case GGML_OP_LEAKY_RELU: case GGML_OP_LEAKY_RELU:
@@ -3669,6 +3672,36 @@ static void ggml_metal_encode_node( @@ -3731,6 +3734,36 @@ static void ggml_metal_encode_node(
const int nth = MIN(1024, ne0); const int nth = MIN(1024, ne0);
...@@ -298,10 +298,10 @@ index 310afe8a..b121ab9e 100644 ...@@ -298,10 +298,10 @@ index 310afe8a..b121ab9e 100644
} break; } break;
case GGML_OP_ARANGE: case GGML_OP_ARANGE:
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index b08666e2..e3185e5b 100644 index 8d6e99e6..71f0f97f 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal --- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -2968,6 +2968,51 @@ kernel void kernel_pad_reflect_1d_f32( @@ -2975,6 +2975,51 @@ kernel void kernel_pad_reflect_1d_f32(
} }
} }
......
...@@ -12,7 +12,7 @@ regex ...@@ -12,7 +12,7 @@ regex
2 files changed, 22 insertions(+), 1 deletion(-) 2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 0125ee53..d74919d2 100644 index a35b498c..032019c9 100644
--- a/src/llama-vocab.cpp --- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp
@@ -296,7 +296,7 @@ struct llm_tokenizer_bpe : llm_tokenizer { @@ -296,7 +296,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
......
...@@ -22,10 +22,10 @@ multiple batches of processing until everything is complete. ...@@ -22,10 +22,10 @@ multiple batches of processing until everything is complete.
4 files changed, 51 insertions(+), 106 deletions(-) 4 files changed, 51 insertions(+), 106 deletions(-)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index afe6f552..d6e7b3af 100644 index 0343ba8a..4b3e6a83 100644
--- a/src/llama-context.cpp --- a/src/llama-context.cpp
+++ b/src/llama-context.cpp +++ b/src/llama-context.cpp
@@ -590,13 +590,12 @@ llm_graph_result_ptr llama_context::build_kv_self_shift( @@ -594,13 +594,12 @@ llm_graph_result_ptr llama_context::build_kv_self_shift(
llm_graph_result_ptr llama_context::build_kv_self_defrag( llm_graph_result_ptr llama_context::build_kv_self_defrag(
ggml_context * ctx0, ggml_context * ctx0,
...@@ -41,7 +41,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -41,7 +41,7 @@ index afe6f552..d6e7b3af 100644
#if 0 #if 0
// CPU defrag // CPU defrag
// //
@@ -668,32 +667,20 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag( @@ -672,32 +671,20 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
ggml_backend_tensor_set(v_l[il], buf_v.data(), 0, buf_v.size()); ggml_backend_tensor_set(v_l[il], buf_v.data(), 0, buf_v.size());
} }
#else #else
...@@ -79,7 +79,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -79,7 +79,7 @@ index afe6f552..d6e7b3af 100644
ggml_tensor * view_v_src; ggml_tensor * view_v_src;
ggml_tensor * view_v_dst; ggml_tensor * view_v_dst;
@@ -701,34 +688,30 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag( @@ -705,34 +692,30 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
if (cparams.flash_attn) { if (cparams.flash_attn) {
// NOTE: the V cache is not transposed when using flash attention // NOTE: the V cache is not transposed when using flash attention
view_v_src = ggml_view_2d(ctx0, kv_self->v_l[il], view_v_src = ggml_view_2d(ctx0, kv_self->v_l[il],
...@@ -122,7 +122,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -122,7 +122,7 @@ index afe6f552..d6e7b3af 100644
#endif #endif
return res; return res;
@@ -737,8 +720,6 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag( @@ -741,8 +724,6 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
void llama_context::kv_self_update() { void llama_context::kv_self_update() {
auto & kv = kv_self; auto & kv = kv_self;
...@@ -131,7 +131,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -131,7 +131,7 @@ index afe6f552..d6e7b3af 100644
if (kv->has_shift) { if (kv->has_shift) {
if (!kv->get_can_shift()) { if (!kv->get_can_shift()) {
GGML_ABORT("The current context does not support K-shift"); GGML_ABORT("The current context does not support K-shift");
@@ -759,8 +740,6 @@ void llama_context::kv_self_update() { @@ -763,8 +744,6 @@ void llama_context::kv_self_update() {
res->set_inputs(nullptr); res->set_inputs(nullptr);
graph_compute(gf, false); graph_compute(gf, false);
...@@ -140,7 +140,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -140,7 +140,7 @@ index afe6f552..d6e7b3af 100644
} }
{ {
@@ -775,49 +754,28 @@ void llama_context::kv_self_update() { @@ -779,49 +758,28 @@ void llama_context::kv_self_update() {
// defragment the KV cache if needed // defragment the KV cache if needed
if (kv->do_defrag) { if (kv->do_defrag) {
LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__); LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__);
...@@ -202,7 +202,7 @@ index afe6f552..d6e7b3af 100644 ...@@ -202,7 +202,7 @@ index afe6f552..d6e7b3af 100644
} }
enum llama_pooling_type llama_context::pooling_type() const { enum llama_pooling_type llama_context::pooling_type() const {
@@ -1301,9 +1259,12 @@ int llama_context::decode(llama_batch & inp_batch) { @@ -1305,9 +1263,12 @@ int llama_context::decode(llama_batch & inp_batch) {
// find KV slot // find KV slot
{ {
if (!kv_self->find_slot(ubatch)) { if (!kv_self->find_slot(ubatch)) {
...@@ -241,7 +241,7 @@ index baa03276..a59ff8fd 100644 ...@@ -241,7 +241,7 @@ index baa03276..a59ff8fd 100644
// TODO: read/write lora adapters and cvec // TODO: read/write lora adapters and cvec
size_t state_write_data(llama_io_write_i & io); size_t state_write_data(llama_io_write_i & io);
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
index 9310f262..5c941e7c 100644 index 69f8d35a..35a750d3 100644
--- a/src/llama-kv-cache.cpp --- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp +++ b/src/llama-kv-cache.cpp
@@ -781,17 +781,7 @@ bool llama_kv_cache_unified::defrag_prepare(int32_t n_max_nodes) { @@ -781,17 +781,7 @@ bool llama_kv_cache_unified::defrag_prepare(int32_t n_max_nodes) {
......
...@@ -53,7 +53,7 @@ index 381a9c7d..e45b453d 100644 ...@@ -53,7 +53,7 @@ index 381a9c7d..e45b453d 100644
} }
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index d74919d2..c90f636c 100644 index 032019c9..ba37df35 100644
--- a/src/llama-vocab.cpp --- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp
@@ -1459,7 +1459,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { @@ -1459,7 +1459,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
......
...@@ -13,7 +13,7 @@ models not supported in llama.cpp ...@@ -13,7 +13,7 @@ models not supported in llama.cpp
4 files changed, 24 insertions(+) 4 files changed, 24 insertions(+)
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index c1f78618..bdf3d898 100644 index 0568565f..dd01df60 100644
--- a/src/llama-arch.cpp --- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp +++ b/src/llama-arch.cpp
@@ -73,6 +73,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = { @@ -73,6 +73,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
...@@ -24,7 +24,7 @@ index c1f78618..bdf3d898 100644 ...@@ -24,7 +24,7 @@ index c1f78618..bdf3d898 100644
{ LLM_ARCH_UNKNOWN, "(unknown)" }, { LLM_ARCH_UNKNOWN, "(unknown)" },
}; };
@@ -1582,6 +1583,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N @@ -1586,6 +1587,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
}, },
}, },
...@@ -48,7 +48,7 @@ index c1f78618..bdf3d898 100644 ...@@ -48,7 +48,7 @@ index c1f78618..bdf3d898 100644
LLM_ARCH_UNKNOWN, LLM_ARCH_UNKNOWN,
{ {
diff --git a/src/llama-arch.h b/src/llama-arch.h diff --git a/src/llama-arch.h b/src/llama-arch.h
index f987844d..ee081fbf 100644 index 6a989034..b6227eeb 100644
--- a/src/llama-arch.h --- a/src/llama-arch.h
+++ b/src/llama-arch.h +++ b/src/llama-arch.h
@@ -75,6 +75,7 @@ enum llm_arch { @@ -75,6 +75,7 @@ enum llm_arch {
...@@ -60,10 +60,10 @@ index f987844d..ee081fbf 100644 ...@@ -60,10 +60,10 @@ index f987844d..ee081fbf 100644
LLM_ARCH_BAILINGMOE, LLM_ARCH_BAILINGMOE,
LLM_ARCH_UNKNOWN, LLM_ARCH_UNKNOWN,
diff --git a/src/llama-model.cpp b/src/llama-model.cpp diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index d5ad466e..cd1d239c 100644 index d051696c..c8374159 100644
--- a/src/llama-model.cpp --- a/src/llama-model.cpp
+++ b/src/llama-model.cpp +++ b/src/llama-model.cpp
@@ -1423,6 +1423,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { @@ -1425,6 +1425,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
default: type = LLM_TYPE_UNKNOWN; default: type = LLM_TYPE_UNKNOWN;
} }
} break; } break;
...@@ -71,7 +71,7 @@ index d5ad466e..cd1d239c 100644 ...@@ -71,7 +71,7 @@ index d5ad466e..cd1d239c 100644
default: throw std::runtime_error("unsupported model architecture"); default: throw std::runtime_error("unsupported model architecture");
} }
@@ -13652,6 +13653,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { @@ -13704,6 +13705,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
case LLM_ARCH_CHAMELEON: case LLM_ARCH_CHAMELEON:
case LLM_ARCH_SOLAR: case LLM_ARCH_SOLAR:
case LLM_ARCH_BAILINGMOE: case LLM_ARCH_BAILINGMOE:
......
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Tue, 8 Apr 2025 20:41:24 -0700
Subject: [PATCH] add op_neg
adds the neg operator to ggml
---
ggml/src/ggml-metal/ggml-metal.m | 15 +++++++++++++++
ggml/src/ggml-metal/ggml-metal.metal | 7 +++++++
2 files changed, 22 insertions(+)
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
index b121ab9e..fea50521 100644
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -461,6 +461,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
GGML_METAL_KERNEL_TYPE_SQRT,
GGML_METAL_KERNEL_TYPE_SIN,
GGML_METAL_KERNEL_TYPE_COS,
+ GGML_METAL_KERNEL_TYPE_NEG,
GGML_METAL_KERNEL_TYPE_SUM_ROWS,
GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32,
GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,
@@ -1119,6 +1120,7 @@ @implementation GGMLMetalClass
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SQRT, sqrt, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SIN, sin, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_COS, cos, true);
+ GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NEG, neg, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, pool_2d_avg_f32, true);
@@ -1280,6 +1282,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_SILU:
case GGML_UNARY_OP_ELU:
+ case GGML_UNARY_OP_NEG:
return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32;
default:
return false;
@@ -1966,6 +1969,18 @@ static void ggml_metal_encode_node(
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;
+ case GGML_UNARY_OP_NEG:
+ {
+ id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_NEG].pipeline;
+
+ [encoder setComputePipelineState:pipeline];
+ [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
+ [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
+
+ const int64_t n = ggml_nelements(dst);
+
+ [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
+ } break;
default:
{
GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index e3185e5b..ede9d1e6 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -949,6 +949,13 @@ kernel void kernel_cos(
dst[tpig] = cos(src0[tpig]);
}
+kernel void kernel_neg(
+ device const float * src0,
+ device float * dst,
+ uint tpig[[thread_position_in_grid]]) {
+ dst[tpig] = -src0[tpig];
+}
+
kernel void kernel_sum_rows(
device const float * src0,
device float * dst,
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Tue, 8 Apr 2025 20:49:50 -0700
Subject: [PATCH] fix compiler error in clip.h
fixes an error that occurs in clip.h when compiling
using CGo
---
examples/llava/clip.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/examples/llava/clip.h b/examples/llava/clip.h
index cc133a58..5fc45d3e 100644
--- a/examples/llava/clip.h
+++ b/examples/llava/clip.h
@@ -30,12 +30,13 @@ struct clip_image_size {
int height;
};
+struct clip_image_f32;
struct clip_image_u8_batch;
struct clip_image_f32_batch;
struct clip_context_params {
bool use_gpu;
- ggml_log_level verbosity;
+ enum ggml_log_level verbosity;
};
// deprecated, use clip_init
@@ -84,7 +85,7 @@ CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
CLIP_API size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size()
CLIP_API size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx
CLIP_API size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
-CLIP_API clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
+CLIP_API struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
/**
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
extern "C" { extern "C" {
#endif #endif
#define RPC_PROTO_MAJOR_VERSION 1
#define RPC_PROTO_MINOR_VERSION 0
#define RPC_PROTO_PATCH_VERSION 0
#define GGML_RPC_MAX_SERVERS 16 #define GGML_RPC_MAX_SERVERS 16
// backend API // backend API
......
...@@ -425,6 +425,8 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st ...@@ -425,6 +425,8 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
} }
case GGML_OP_IM2COL_BACK: case GGML_OP_IM2COL_BACK:
return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32; return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
case GGML_OP_GET_ROWS_BACK:
return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
case GGML_OP_OUT_PROD: case GGML_OP_OUT_PROD:
return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) && return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32; src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
......
...@@ -729,7 +729,13 @@ struct ggml_cuda_graph { ...@@ -729,7 +729,13 @@ struct ggml_cuda_graph {
bool disable_due_to_failed_graph_capture = false; bool disable_due_to_failed_graph_capture = false;
int number_consecutive_updates = 0; int number_consecutive_updates = 0;
std::vector<ggml_graph_node_properties> ggml_graph_properties; std::vector<ggml_graph_node_properties> ggml_graph_properties;
std::vector<char **> updated_kernel_arg; bool use_cpy_indirection = false;
std::vector<char *> cpy_dest_ptrs;
char ** dest_ptrs_d;
int dest_ptrs_size = 0;
// Index to allow each cpy kernel to be aware of it's position within the graph
// relative to other cpy nodes.
int graph_cpynode_index = -1;
#endif #endif
}; };
......
This diff is collapsed.
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
#define CUDA_CPY_BLOCK_SIZE 64 #define CUDA_CPY_BLOCK_SIZE 64
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1); void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = false);
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1); void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);
void ggml_cuda_cpy_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment