01-cache.diff 654 Bytes
Newer Older
1
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
2
index 8fe5e0b1..3e82acb9 100644
3
4
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
5
6
@@ -997,13 +997,15 @@ struct llama_server_context
                 slot.n_sent_text += result.text_to_send.size();
7
8
9
                 // add the token to slot queue and cache
             }
-            slot.add_token_string(result);
10
11
12
+
             if (slot.params.stream)
             {
13
14
15
16
17
                 send_partial_response(slot, result);
             }
         }
 
+        slot.add_token_string(result);
18
+
19
20
         if (incomplete)
         {
21
             slot.has_next_token = true;