update llama.cpp submodule to `f364eb6` (#4060)

18d9a7e1 · Jeffrey Morgan · GitHub · 8488388c · 18d9a7e1 · f364eb6f
Unverified Commit 18d9a7e1 authored Apr 30, 2024 by Jeffrey Morgan Committed by GitHub Apr 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

llm/ext_server/server.cpp llm/ext_server/server.cpp +5 -2

llm/llama.cpp llm/llama.cpp +1 -1

No files found.
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1032,7 +1032,7 @@ struct llama_server_context
            slot.has_next_token = false;
        }

-        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
+        if (llama_token_is_eog(model, result.tok))
        {
            slot.stopped_eos = true;
            slot.has_next_token = false;
@@ -1144,12 +1144,15 @@ struct llama_server_context

        res.result_json = json
        {
-            {"content",    tkn.text_to_send},
            {"stop",       false},
            {"slot_id",    slot.id},
            {"multimodal", multimodal}
        };

+        if (!llama_token_is_eog(model, tkn.tok)) {
+            res.result_json["content"] = tkn.text_to_send;
+        }
+
        if (slot.sparams.n_probs > 0)
        {
            std::vector<completion_token_output> probs_output = {};

--- a/llama.cpp @ f364eb6f
+++ b/llama.cpp @ f364eb6f
-Subproject commit f4ab2a41476600a98067a9474ea8f9e6db41bcfa
+Subproject commit f364eb6fb5d46118a76fa045f487318de4c24961