Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
21347e1e
Unverified
Commit
21347e1e
authored
Mar 01, 2024
by
Jeffrey Morgan
Committed by
GitHub
Mar 01, 2024
Browse files
update llama.cpp submodule to `c29af7e` (#2868)
parent
3b4bab3d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
25 deletions
+24
-25
llm/ext_server/ext_server.cpp
llm/ext_server/ext_server.cpp
+3
-3
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/01-cache.diff
llm/patches/01-cache.diff
+3
-3
llm/patches/02-cudaleaks.diff
llm/patches/02-cudaleaks.diff
+17
-18
No files found.
llm/ext_server/ext_server.cpp
View file @
21347e1e
...
@@ -146,9 +146,9 @@ void llama_server_start() {
...
@@ -146,9 +146,9 @@ void llama_server_start() {
llama
->
queue_tasks
.
on_new_task
(
std
::
bind
(
llama
->
queue_tasks
.
on_new_task
(
std
::
bind
(
&
llama_server_context
::
process_single_task
,
llama
,
std
::
placeholders
::
_1
));
&
llama_server_context
::
process_single_task
,
llama
,
std
::
placeholders
::
_1
));
llama
->
queue_tasks
.
on_finish_multitask
(
std
::
bind
(
llama
->
queue_tasks
.
on_finish_multitask
(
std
::
bind
(
&
llama_server_context
::
on_finish_multitask
,
llama
,
std
::
placeholders
::
_1
));
&
llama_server_context
::
on_finish_multitask
,
llama
,
std
::
placeholders
::
_1
));
llama
->
queue_tasks
.
on_
all_tasks_finished
(
std
::
bind
(
llama
->
queue_tasks
.
on_
run_slots
(
std
::
bind
(
&
llama_server_context
::
run_on_all_tasks_finished
,
llama
));
&
llama_server_context
::
update_slots
,
llama
));
llama
->
queue_results
.
on_multitask_update
(
std
::
bind
(
llama
->
queue_results
.
on_multitask_update
(
std
::
bind
(
&
llama_server_queue
::
update_multitask
,
&
llama_server_queue
::
update_multitask
,
&
llama
->
queue_tasks
,
&
llama
->
queue_tasks
,
...
...
llama.cpp
@
c29af7e2
Compare
87c91c07
...
c29af7e2
Subproject commit
87c91c07663b707e831c59ec373b5e665ff9d64a
Subproject commit
c29af7e2252d288f2ea58a7d437c1cb7c0abf160
llm/patches/01-cache.diff
View file @
21347e1e
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index
d86d7e04..2694e92e
100644
index
2b2f4a0f..afac49af
100644
--- a/examples/server/server.cpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -9
01
,13 +9
01
,15 @@
struct llama_server_context
@@ -9
97
,13 +9
97
,15 @@
struct llama_server_context
slot.sent_
coun
t += result.text_to_send.size();
slot.
n_
sent_
tex
t += result.text_to_send.size();
// add the token to slot queue and cache
// add the token to slot queue and cache
}
}
- slot.add_token_string(result);
- slot.add_token_string(result);
...
...
llm/patches/02-cudaleaks.diff
View file @
21347e1e
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index
7800c6e7..be30db23
100644
index
2b2f4a0f..25857bdd
100644
--- a/examples/server/server.cpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3
0
,6 +3
0
,10 @@
@@ -3
1
,6 +3
1
,10 @@
#include <atomic>
#include <atomic>
#include <signal.h>
#include <signal.h>
...
@@ -12,8 +12,8 @@ index 7800c6e7..be30db23 100644
...
@@ -12,8 +12,8 @@ index 7800c6e7..be30db23 100644
+
+
using json = nlohmann::json;
using json = nlohmann::json;
struct server_params
struct server_params
{
@@ -3
5
3,6 +3
5
7,9 @@
struct llama_server_context
@@ -3
6
3,6 +3
6
7,9 @@
struct llama_server_context
llama_free_model(model);
llama_free_model(model);
model = nullptr;
model = nullptr;
}
}
...
@@ -23,7 +23,7 @@ index 7800c6e7..be30db23 100644
...
@@ -23,7 +23,7 @@ index 7800c6e7..be30db23 100644
}
}
bool load_model(const gpt_params ¶ms_)
bool load_model(const gpt_params ¶ms_)
@@ -3
143
,6 +3
1
50,7 @@
int main(int argc, char **argv)
@@ -3
494
,6 +350
1
,7 @@
int main(int argc, char **argv)
sigemptyset (&sigint_action.sa_mask);
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
sigaction(SIGINT, &sigint_action, NULL);
...
@@ -32,10 +32,10 @@ index 7800c6e7..be30db23 100644
...
@@ -32,10 +32,10 @@ index 7800c6e7..be30db23 100644
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index
933ebbc4..88a4f664
100644
index
0c6501e9..75c12723
100644
--- a/ggml-cuda.cu
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -3
9
,6 +3
9
,7 @@
@@ -
4
3,6 +
4
3,7 @@
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
#define cublasCreate hipblasCreate
#define cublasCreate hipblasCreate
...
@@ -43,7 +43,7 @@ index 933ebbc4..88a4f664 100644
...
@@ -43,7 +43,7 @@ index 933ebbc4..88a4f664 100644
#define cublasGemmEx hipblasGemmEx
#define cublasGemmEx hipblasGemmEx
#define cublasGemmBatchedEx hipblasGemmBatchedEx
#define cublasGemmBatchedEx hipblasGemmBatchedEx
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
@@ -
7991
,10 +
7992
,10 @@
GGML_CALL bool ggml_cublas_loaded(void) {
@@ -
8694
,10 +
8695
,10 @@
GGML_CALL bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
return g_cublas_loaded;
}
}
...
@@ -57,7 +57,7 @@ index 933ebbc4..88a4f664 100644
...
@@ -57,7 +57,7 @@ index 933ebbc4..88a4f664 100644
#ifdef __HIP_PLATFORM_AMD__
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
// Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -8
004
,7 +8
005
,7 @@
GGML_CALL void ggml_init_cublas() {
@@ -8
707
,7 +8
708
,7 @@
GGML_CALL void ggml_init_cublas() {
#endif
#endif
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
...
@@ -66,7 +66,7 @@ index 933ebbc4..88a4f664 100644
...
@@ -66,7 +66,7 @@ index 933ebbc4..88a4f664 100644
g_cublas_loaded = false;
g_cublas_loaded = false;
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
return;
return;
@@ -8
075
,7 +8
076
,7 @@
GGML_CALL void ggml_init_cublas() {
@@ -8
778
,7 +8
779
,7 @@
GGML_CALL void ggml_init_cublas() {
// configure logging to stdout
// configure logging to stdout
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
...
@@ -75,12 +75,11 @@ index 933ebbc4..88a4f664 100644
...
@@ -75,12 +75,11 @@ index 933ebbc4..88a4f664 100644
g_cublas_loaded = true;
g_cublas_loaded = true;
}
}
}
}
@@ -1
1604
,3 +1
1605
,2
3
@@
GGML_CALL int ggml_backend_cuda_reg_devices() {
@@ -1
2345
,3 +1
2346
,2
2
@@
GGML_CALL int ggml_backend_cuda_reg_devices() {
}
}
return device_count;
return device_count;
}
}
+
+
+
+extern "C" GGML_CALL void ggml_free_cublas(void);
+extern "C" GGML_CALL void ggml_free_cublas(void);
+GGML_CALL void ggml_free_cublas(void) {
+GGML_CALL void ggml_free_cublas(void) {
+ for (int id = 0; id < g_device_count; ++id) {
+ for (int id = 0; id < g_device_count; ++id) {
...
@@ -100,16 +99,16 @@ index 933ebbc4..88a4f664 100644
...
@@ -100,16 +99,16 @@ index 933ebbc4..88a4f664 100644
+ g_cublas_initialized = false;
+ g_cublas_initialized = false;
+}
+}
diff --git a/ggml-cuda.h b/ggml-cuda.h
diff --git a/ggml-cuda.h b/ggml-cuda.h
index b1ebd61d..
b4c80c2c
100644
index b1ebd61d..
6dd58ddf
100644
--- a/ggml-cuda.h
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
+++ b/ggml-cuda.h
@@ -2
0
,6 +2
0
,9 @@
extern "C" {
@@ -2
3
,6 +2
3
,9 @@
GGML_API GGML_CALL void ggml_init_cublas(void);
//
Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded
`.
//
Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false
`.
GGML_API GGML_CALL
void
ggml_
init_
cublas(void);
GGML_API GGML_CALL
bool
ggml_cublas
_loaded
(void);
+// Release CUDA resources
+// Release CUDA resources
+GGML_API GGML_CALL void ggml_free_cublas(void);
+GGML_API GGML_CALL void ggml_free_cublas(void);
+
+
// Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
GGML_API GGML_CALL void * ggml_cuda_host_malloc(size_t size);
GGML_API GGML_CALL
bool
ggml_cu
blas_loaded(void
);
GGML_API GGML_CALL
void
ggml_cu
da_host_free(void * ptr
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment