Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
cd3fbf1c
Unverified
Commit
cd3fbf1c
authored
Feb 05, 2025
by
Jeffrey Morgan
Committed by
GitHub
Feb 05, 2025
Browse files
llama: use dynamic backend loading for mllama and clip (#8835)
parent
c852b8e0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
82 deletions
+36
-82
llama/llama.cpp/examples/llava/clip.cpp
llama/llama.cpp/examples/llava/clip.cpp
+8
-28
llama/mllama.cpp
llama/mllama.cpp
+8
-23
llama/patches/0013-use-dynamic-backend-loading-for-clip.patch
...a/patches/0013-use-dynamic-backend-loading-for-clip.patch
+20
-31
No files found.
llama/llama.cpp/examples/llava/clip.cpp
View file @
cd3fbf1c
...
...
@@ -1235,35 +1235,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
}
}
#ifdef GGML_USE_CUDA
new_clip
->
backend
=
ggml_backend_cuda_init
(
0
);
LOG_INF
(
"%s: CLIP using CUDA backend
\n
"
,
__func__
);
#endif
#ifdef GGML_USE_METAL
new_clip
->
backend
=
ggml_backend_metal_init
();
LOG_INF
(
"%s: CLIP using Metal backend
\n
"
,
__func__
);
#endif
#ifdef GGML_USE_CANN
new_clip
->
backend
=
ggml_backend_cann_init
(
0
);
LOG_INF
(
"%s: CLIP using CANN backend
\n
"
,
__func__
);
#endif
#ifdef GGML_USE_VULKAN
new_clip
->
backend
=
ggml_backend_vk_init
(
0
);
LOG_INF
(
"%s: CLIP using Vulkan backend
\n
"
,
__func__
);
#endif
#ifdef GGML_USE_SYCL
new_clip
->
backend
=
ggml_backend_sycl_init
(
0
);
LOG_INF
(
"%s: CLIP using SYCL backend
\n
"
,
__func__
);
#endif
if
(
!
new_clip
->
backend
)
{
new_clip
->
backend
=
ggml_backend_cpu_init
();
LOG_INF
(
"%s: CLIP using CPU backend
\n
"
,
__func__
);
ggml_backend_t
backend
=
ggml_backend_init_best
();
if
(
backend
==
nullptr
)
{
LOG_ERR
(
"%s: failed to initialize backend
\n
"
,
__func__
);
clip_free
(
new_clip
);
gguf_free
(
ctx
);
return
nullptr
;
}
LOG_INF
(
"%s: using %s backend
\n
"
,
__func__
,
ggml_backend_name
(
backend
));
new_clip
->
backend
=
backend
;
// model size and capabilities
{
...
...
llama/mllama.cpp
View file @
cd3fbf1c
...
...
@@ -558,30 +558,15 @@ struct mllama_ctx *mllama_model_load(const char *fname, const int verbosity = 1)
mllama_ctx
*
new_mllama
=
new
mllama_ctx
{};
#ifdef GGML_USE_CUDA
new_mllama
->
backend
=
ggml_backend_cuda_init
(
0
);
LOG
(
"vision using CUDA backend"
);
#endif
#ifdef GGML_USE_METAL
new_mllama
->
backend
=
ggml_backend_metal_init
();
LOG
(
"vision using Metal backend"
);
#endif
#ifdef GGML_USE_CANN
new_mllama
->
backend
=
ggml_backend_cann_init
(
0
);
LOG
(
"vision using CANN backend"
);
#endif
#ifdef GGML_USE_VULKAN
new_mllama
->
backend
=
ggml_backend_vk_init
(
0
);
LOG
(
"vision using Vulkan backend"
);
#endif
if
(
!
new_mllama
->
backend
)
{
new_mllama
->
backend
=
ggml_backend_cpu_init
();
LOG
(
"vision using CPU backend"
);
ggml_backend_t
backend
=
ggml_backend_init_best
();
if
(
backend
==
nullptr
)
{
LOG
(
"%s: failed to initialize backend
\n
"
,
__func__
);
mllama_free
(
new_mllama
);
gguf_free
(
ctx
);
return
nullptr
;
}
LOG
(
"%s: using %s backend
\n
"
,
__func__
,
ggml_backend_name
(
backend
));
new_mllama
->
backend
=
backend
;
// load tensors
{
...
...
llama/patches/0013-
re-enable-gpu
-for-clip.patch
→
llama/patches/0013-
use-dynamic-backend-loading
-for-clip.patch
View file @
cd3fbf1c
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Sat, 4 Jan 2025 22:52:48 -0800
Subject: [PATCH]
re-enable gpu
for clip
Subject: [PATCH]
use dynamic backend loading
for clip
---
examples/llava/clip.cpp |
86
+++++++++++++++
+++++
---------------------
1 file changed,
43
insertions(+), 4
3
deletions(-)
examples/llava/clip.cpp |
74
+++++++++++++++
-----
---------------------
1 file changed,
27
insertions(+), 4
7
deletions(-)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index b3c1829f..
718052e1
100644
index b3c1829f..
86b91d5c
100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -8,25 +8,25 @@
...
...
@@ -56,7 +56,7 @@ index b3c1829f..718052e1 100644
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
@@ -1235,3
0
+1235,
30
@@
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
@@ -1235,3
5
+1235,
15
@@
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
}
}
...
...
@@ -84,30 +84,19 @@ index b3c1829f..718052e1 100644
-// new_clip->backend = ggml_backend_sycl_init(0);
-// LOG_INF("%s: CLIP using SYCL backend\n", __func__);
-//#endif
+#ifdef GGML_USE_CUDA
+ new_clip->backend = ggml_backend_cuda_init(0);
+ LOG_INF("%s: CLIP using CUDA backend\n", __func__);
+#endif
+
+#ifdef GGML_USE_METAL
+ new_clip->backend = ggml_backend_metal_init();
+ LOG_INF("%s: CLIP using Metal backend\n", __func__);
+#endif
+
+#ifdef GGML_USE_CANN
+ new_clip->backend = ggml_backend_cann_init(0);
+ LOG_INF("%s: CLIP using CANN backend\n", __func__);
+#endif
+
+#ifdef GGML_USE_VULKAN
+ new_clip->backend = ggml_backend_vk_init(0);
+ LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
+#endif
+
+#ifdef GGML_USE_SYCL
+ new_clip->backend = ggml_backend_sycl_init(0);
+ LOG_INF("%s: CLIP using SYCL backend\n", __func__);
+#endif
-
- if (!new_clip->backend) {
- new_clip->backend = ggml_backend_cpu_init();
- LOG_INF("%s: CLIP using CPU backend\n", __func__);
+ ggml_backend_t backend = ggml_backend_init_best();
+ if (backend == nullptr) {
+ LOG_ERR("%s: failed to initialize backend\n", __func__);
+ clip_free(new_clip);
+ gguf_free(ctx);
+ return nullptr;
}
+ LOG_INF("%s: using %s backend\n", __func__, ggml_backend_name(backend));
+ new_clip->backend = backend;
if (!new_clip->backend) {
new_clip->backend = ggml_backend_cpu_init();
// model size and capabilities
{
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment