Merge pull request #2377 from dhiltgen/bump_llamacpp

Bump llama.cpp to b2081

Merge pull request #2377 from dhiltgen/bump_llamacpp
Bump llama.cpp to b2081
c9dfa6e5 · Daniel Hiltgen · GitHub · 3dcbcd36 · de76b95d · f57fadc0
Unverified Commit c9dfa6e5 authored Feb 07, 2024 by Daniel Hiltgen Committed by GitHub Feb 07, 2024
Show whitespace changes
Inline Side-by-side

Showing with 17 additions and 22 deletions

llm/llama.cpp llm/llama.cpp +1 -1

llm/patches/01-cache.diff llm/patches/01-cache.diff +5 -5

llm/patches/02-shutdown.diff llm/patches/02-shutdown.diff +11 -16

No files found.
--- a/llama.cpp @ f57fadc0
+++ b/llama.cpp @ f57fadc0
-Subproject commit d2f650cb5b04ee2726663e79b47da5efe196ce00
+Subproject commit f57fadc009cbff741a1961cb7896c47d73978d2c
--- a/llm/patches/01-cache.diff
+++ b/llm/patches/01-cache.diff
 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
-index a48582ad..9fffffd8 100644
+index d86d7e04..7d71c766 100644
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -1564,12 +1564,6 @@ struct llama_server_context
+@@ -1598,12 +1598,6 @@ struct llama_server_context
                         LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
                     }
@@ -15,7 +15,7 @@ index a48582ad..9fffffd8 100644
                     if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
                     {
                         // we have to evaluate at least 1 token to generate logits.
-@@ -1581,6 +1575,12 @@ struct llama_server_context
+@@ -1615,6 +1609,12 @@ struct llama_server_context
                         }
                     }

--- a/llm/patches/02-shutdown.diff
+++ b/llm/patches/02-shutdown.diff
@@ -37,26 +37,18 @@ index 11dd82c3..311495a8 100644
     llama_backend_free();
 diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
-index 70cce072..2acb1eab 100644
+index 70cce072..9124869a 100644
 --- a/examples/server/utils.hpp
 +++ b/examples/server/utils.hpp
-@@ -6,6 +6,7 @@
+@@ -190,6 +190,7 @@ inline std::string format_chatml(std::vector<json> messages)
- #include <mutex>
- #include <condition_variable>
- #include <unordered_map>
-+#include <atomic>
- #include "json.hpp"
-@@ -190,6 +191,7 @@ inline std::string format_chatml(std::vector<json> messages)
 struct llama_server_queue {
     int id = 0;
     std::mutex mutex_tasks;
-+    std::atomic<bool> running;
+    bool running;
     // queues
     std::vector<task_server> queue_tasks;
     std::vector<task_server> queue_tasks_deferred;
-@@ -248,9 +250,15 @@ struct llama_server_queue {
+@@ -248,9 +249,18 @@ struct llama_server_queue {
         queue_tasks_deferred.clear();
     }
@@ -64,7 +56,10 @@ index 70cce072..2acb1eab 100644
 -    [[noreturn]]
 +    // end the start_loop routine
 +    void terminate() {
+        {
+            std::unique_lock<std::mutex> lock(mutex_tasks);
 +            running = false;
+        }
 +        condition_tasks.notify_all();
 +    }
 +
@@ -74,17 +69,17 @@ index 70cce072..2acb1eab 100644
         while (true) {
             // new task arrived
             LOG_VERBOSE("have new task", {});
-@@ -294,8 +302,12 @@ struct llama_server_queue {
+@@ -294,8 +304,12 @@ struct llama_server_queue {
             {
                 std::unique_lock<std::mutex> lock(mutex_tasks);
                 if (queue_tasks.empty()) {
-+                    if (!running.load()) {
+                    if (!running) {
 +                        LOG_VERBOSE("ending start_loop", {});
 +                        return;
 +                    }
                     condition_tasks.wait(lock, [&]{
 -                        return !queue_tasks.empty();
-+                        return (!queue_tasks.empty() || !running.load());
+                        return (!queue_tasks.empty() || !running);
                     });
                 }
             }