"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "5646826a792ce07bbf4989210841d26b86f0de44"
Unverified Commit 6579cd7d authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Fix set kv cache multi-stream (#5975)

parent 97ac42b6
...@@ -374,9 +374,9 @@ class MHATokenToKVPool(KVCache): ...@@ -374,9 +374,9 @@ class MHATokenToKVPool(KVCache):
# Overlap the copy of K and V cache for small batch size # Overlap the copy of K and V cache for small batch size
current_stream = self.device_module.current_stream() current_stream = self.device_module.current_stream()
self.alt_stream.wait_stream(current_stream) self.alt_stream.wait_stream(current_stream)
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
with self.device_module.stream(self.alt_stream): with self.device_module.stream(self.alt_stream):
self.k_buffer[layer_id - self.start_layer][loc] = cache_k self.v_buffer[layer_id - self.start_layer][loc] = cache_v
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
current_stream.wait_stream(self.alt_stream) current_stream.wait_stream(self.alt_stream)
else: else:
self.k_buffer[layer_id - self.start_layer][loc] = cache_k self.k_buffer[layer_id - self.start_layer][loc] = cache_k
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment