Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0796d79d
Commit
0796d79d
authored
Nov 18, 2025
by
Michael Yang
Committed by
Michael Yang
Nov 18, 2025
Browse files
cuda: skip large batches
cuda panics on batches larger than 1024 so skip those and fallback to cpu
parent
92981ae3
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
35 additions
and
7 deletions
+35
-7
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
+1
-1
llama/patches/0029-vulkan-Call-ggml_vk_buffer_write_2d-from-ggml_vk_buf.patch
...ulkan-Call-ggml_vk_buffer_write_2d-from-ggml_vk_buf.patch
+1
-1
llama/patches/0030-Vulkan-MMQ-Integer-Dot-Refactor-and-K-Quant-support-.patch
...ulkan-MMQ-Integer-Dot-Refactor-and-K-Quant-support-.patch
+1
-1
llama/patches/0031-vulkan-Update-topk_moe-fusion-to-handle-gpt-s-late-s.patch
...ulkan-Update-topk_moe-fusion-to-handle-gpt-s-late-s.patch
+1
-1
llama/patches/0032-vulkan-Fuse-rope-set_rows-16769.patch
llama/patches/0032-vulkan-Fuse-rope-set_rows-16769.patch
+1
-1
llama/patches/0033-vulkan-Handle-argsort-with-a-large-number-of-rows-16.patch
...ulkan-Handle-argsort-with-a-large-number-of-rows-16.patch
+1
-1
llama/patches/0035-vulkan-Fix-crash-when-FP16-mul_mat-accumulation-is-n.patch
...ulkan-Fix-crash-when-FP16-mul_mat-accumulation-is-n.patch
+1
-1
llama/patches/0036-ggml-cuda-skip-large-batches.patch
llama/patches/0036-ggml-cuda-skip-large-batches.patch
+25
-0
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+3
-0
No files found.
llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
View file @
0796d79d
...
@@ -38,7 +38,7 @@ index 44ae76d66..639d551a2 100644
...
@@ -38,7 +38,7 @@ index 44ae76d66..639d551a2 100644
#ifdef __cplusplus
#ifdef __cplusplus
}
}
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
d2c278a35..221e29509
100644
index
ca02ea079..c12b069e5
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -73,6 +73,7 @@
DispatchLoaderDynamic & ggml_vk_default_dispatcher();
@@ -73,6 +73,7 @@
DispatchLoaderDynamic & ggml_vk_default_dispatcher();
...
...
llama/patches/0029-vulkan-Call-ggml_vk_buffer_write_2d-from-ggml_vk_buf.patch
View file @
0796d79d
...
@@ -11,7 +11,7 @@ vidmem optimization.
...
@@ -11,7 +11,7 @@ vidmem optimization.
1 file changed, 1 insertion(+), 4 deletions(-)
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
221e29509..18b7cbccf
100644
index
c12b069e5..76c78c2ea
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -5654,14 +5654,11 @@
static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr
@@ -5654,14 +5654,11 @@
static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr
...
...
llama/patches/0030-Vulkan-MMQ-Integer-Dot-Refactor-and-K-Quant-support-.patch
View file @
0796d79d
...
@@ -50,7 +50,7 @@ Subject: [PATCH] Vulkan MMQ Integer Dot Refactor and K-Quant support (#16536)
...
@@ -50,7 +50,7 @@ Subject: [PATCH] Vulkan MMQ Integer Dot Refactor and K-Quant support (#16536)
create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl
create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
18b7cbccf..53b57c179
100644
index
76c78c2ea..7669ed206
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -488,6 +488,7 @@
struct vk_device_struct {
@@ -488,6 +488,7 @@
struct vk_device_struct {
...
...
llama/patches/0031-vulkan-Update-topk_moe-fusion-to-handle-gpt-s-late-s.patch
View file @
0796d79d
...
@@ -58,7 +58,7 @@ index 639d551a2..e5c446d1d 100644
...
@@ -58,7 +58,7 @@ index 639d551a2..e5c446d1d 100644
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
53b57c179..b2855b078
100644
index
7669ed206..63a762ec2
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -387,12 +387,76 @@
static constexpr uint32_t num_argsort_pipelines = 11;
@@ -387,12 +387,76 @@
static constexpr uint32_t num_argsort_pipelines = 11;
...
...
llama/patches/0032-vulkan-Fuse-rope-set_rows-16769.patch
View file @
0796d79d
...
@@ -31,7 +31,7 @@ Add new backend tests.
...
@@ -31,7 +31,7 @@ Add new backend tests.
6 files changed, 371 insertions(+), 117 deletions(-)
6 files changed, 371 insertions(+), 117 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
b2855b078..aaf4334b5
100644
index
63a762ec2..db92a7901
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -458,6 +458,11 @@
static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {
@@ -458,6 +458,11 @@
static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {
...
...
llama/patches/0033-vulkan-Handle-argsort-with-a-large-number-of-rows-16.patch
View file @
0796d79d
...
@@ -9,7 +9,7 @@ Subject: [PATCH] vulkan: Handle argsort with a large number of rows (#16851)
...
@@ -9,7 +9,7 @@ Subject: [PATCH] vulkan: Handle argsort with a large number of rows (#16851)
2 files changed, 16 insertions(+), 4 deletions(-)
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
aaf4334b5..3604ceb04
100644
index
db92a7901..e959674d1
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1084,6 +1084,7 @@
struct vk_op_soft_max_push_constants {
@@ -1084,6 +1084,7 @@
struct vk_op_soft_max_push_constants {
...
...
llama/patches/0035-vulkan-Fix-crash-when-FP16-mul_mat-accumulation-is-n.patch
View file @
0796d79d
...
@@ -20,7 +20,7 @@ Subject: [PATCH] vulkan: Fix crash when FP16 mul_mat accumulation is not
...
@@ -20,7 +20,7 @@ Subject: [PATCH] vulkan: Fix crash when FP16 mul_mat accumulation is not
1 file changed, 13 insertions(+), 7 deletions(-)
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index
3604ceb04..80185d9f0
100644
index
e959674d1..903050b0b
100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -146,8 +146,13 @@
static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
@@ -146,8 +146,13 @@
static void ggml_vk_destroy_pipeline(vk::Device& device, vk_pipeline& pipeline);
...
...
llama/patches/0036-ggml-cuda-skip-large-batches.patch
0 → 100644
View file @
0796d79d
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Michael Yang <git@mxy.ng>
Date: Tue, 18 Nov 2025 11:13:04 -0800
Subject: [PATCH] ggml-cuda: skip large batches
cuda panics on batches larger than 1024 so mark it as unsupported to
fallback to cpu
---
ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++
1 file changed, 3 insertions(+)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index f1a20e7fe..1a71e07c9 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3677,6 +3677,9 @@
static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
return false;
}
+ if (op->op == GGML_OP_MUL_MAT && b->ne[2] * b->ne[3] > 1024) {
+ return false;
+ }
#ifdef GGML_USE_MUSA
const int cc = ggml_cuda_info().devices[dev_ctx->device].cc;
if (b->ne[2]*b->ne[3] > 1 && !ggml_is_transposed(a) && !ggml_is_transposed(b)) {
ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
View file @
0796d79d
...
@@ -3677,6 +3677,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
...
@@ -3677,6 +3677,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
if
(
b
->
type
==
GGML_TYPE_F16
&&
a
->
type
!=
GGML_TYPE_F16
)
{
if
(
b
->
type
==
GGML_TYPE_F16
&&
a
->
type
!=
GGML_TYPE_F16
)
{
return
false
;
return
false
;
}
}
if
(
op
->
op
==
GGML_OP_MUL_MAT
&&
b
->
ne
[
2
]
*
b
->
ne
[
3
]
>
1024
)
{
return
false
;
}
#ifdef GGML_USE_MUSA
#ifdef GGML_USE_MUSA
const
int
cc
=
ggml_cuda_info
().
devices
[
dev_ctx
->
device
].
cc
;
const
int
cc
=
ggml_cuda_info
().
devices
[
dev_ctx
->
device
].
cc
;
if
(
b
->
ne
[
2
]
*
b
->
ne
[
3
]
>
1
&&
!
ggml_is_transposed
(
a
)
&&
!
ggml_is_transposed
(
b
))
{
if
(
b
->
ne
[
2
]
*
b
->
ne
[
3
]
>
1
&&
!
ggml_is_transposed
(
a
)
&&
!
ggml_is_transposed
(
b
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment