Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c9dfa6e5
Unverified
Commit
c9dfa6e5
authored
Feb 07, 2024
by
Daniel Hiltgen
Committed by
GitHub
Feb 07, 2024
Browse files
Merge pull request #2377 from dhiltgen/bump_llamacpp
Bump llama.cpp to b2081
parents
3dcbcd36
de76b95d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
22 deletions
+17
-22
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/01-cache.diff
llm/patches/01-cache.diff
+5
-5
llm/patches/02-shutdown.diff
llm/patches/02-shutdown.diff
+11
-16
No files found.
llama.cpp
@
f57fadc0
Compare
d2f650cb
...
f57fadc0
Subproject commit
d2f650cb5b04ee2726663e79b47da5efe196ce00
Subproject commit
f57fadc009cbff741a1961cb7896c47d73978d2c
llm/patches/01-cache.diff
View file @
c9dfa6e5
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index
a48582ad..9fffffd8
100644
index
d86d7e04..7d71c766
100644
--- a/examples/server/server.cpp
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -15
64
,12 +15
64
,6 @@
struct llama_server_context
@@ -15
98
,12 +15
98
,6 @@
struct llama_server_context
LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
}
}
...
@@ -15,7 +15,7 @@ index a48582ad..9fffffd8 100644
...
@@ -15,7 +15,7 @@ index a48582ad..9fffffd8 100644
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
{
{
// we have to evaluate at least 1 token to generate logits.
// we have to evaluate at least 1 token to generate logits.
@@ -1
581
,6 +1
575
,12 @@
struct llama_server_context
@@ -1
615
,6 +1
609
,12 @@
struct llama_server_context
}
}
}
}
...
...
llm/patches/02-shutdown.diff
View file @
c9dfa6e5
...
@@ -37,26 +37,18 @@ index 11dd82c3..311495a8 100644
...
@@ -37,26 +37,18 @@ index 11dd82c3..311495a8 100644
llama_backend_free();
llama_backend_free();
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 70cce072..
2acb1eab
100644
index 70cce072..
9124869a
100644
--- a/examples/server/utils.hpp
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -6,6 +6,7 @@
@@ -190,6 +190,7 @@
inline std::string format_chatml(std::vector<json> messages)
#include <mutex>
#include <condition_variable>
#include <unordered_map>
+#include <atomic>
#include "json.hpp"
@@ -190,6 +191,7 @@
inline std::string format_chatml(std::vector<json> messages)
struct llama_server_queue {
struct llama_server_queue {
int id = 0;
int id = 0;
std::mutex mutex_tasks;
std::mutex mutex_tasks;
+
std::atomic<
bool
>
running;
+ bool running;
// queues
// queues
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks_deferred;
std::vector<task_server> queue_tasks_deferred;
@@ -248,9 +2
50
,1
5
@@
struct llama_server_queue {
@@ -248,9 +2
49
,1
8
@@
struct llama_server_queue {
queue_tasks_deferred.clear();
queue_tasks_deferred.clear();
}
}
...
@@ -64,7 +56,10 @@ index 70cce072..2acb1eab 100644
...
@@ -64,7 +56,10 @@ index 70cce072..2acb1eab 100644
- [[noreturn]]
- [[noreturn]]
+ // end the start_loop routine
+ // end the start_loop routine
+ void terminate() {
+ void terminate() {
+ {
+ std::unique_lock<std::mutex> lock(mutex_tasks);
+ running = false;
+ running = false;
+ }
+ condition_tasks.notify_all();
+ condition_tasks.notify_all();
+ }
+ }
+
+
...
@@ -74,17 +69,17 @@ index 70cce072..2acb1eab 100644
...
@@ -74,17 +69,17 @@ index 70cce072..2acb1eab 100644
while (true) {
while (true) {
// new task arrived
// new task arrived
LOG_VERBOSE("have new task", {});
LOG_VERBOSE("have new task", {});
@@ -294,8 +30
2
,12 @@
struct llama_server_queue {
@@ -294,8 +30
4
,12 @@
struct llama_server_queue {
{
{
std::unique_lock<std::mutex> lock(mutex_tasks);
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
if (queue_tasks.empty()) {
+ if (!running
.load()
) {
+ if (!running) {
+ LOG_VERBOSE("ending start_loop", {});
+ LOG_VERBOSE("ending start_loop", {});
+ return;
+ return;
+ }
+ }
condition_tasks.wait(lock, [&]{
condition_tasks.wait(lock, [&]{
- return !queue_tasks.empty();
- return !queue_tasks.empty();
+ return (!queue_tasks.empty() || !running
.load()
);
+ return (!queue_tasks.empty() || !running);
});
});
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment