Commit 0a0e9f3e authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Apply 01-cache.diff

parent 58d95cc9
......@@ -1007,13 +1007,15 @@ struct llama_server_context
slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache
}
slot.add_token_string(result);
if (slot.params.stream)
{
send_partial_response(slot, result);
}
}
slot.add_token_string(result);
if (incomplete)
{
slot.has_next_token = true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment