Unverified Commit 6579cd7d authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Fix set kv cache multi-stream (#5975)

parent 97ac42b6
......@@ -374,8 +374,8 @@ class MHATokenToKVPool(KVCache):
# Overlap the copy of K and V cache for small batch size
current_stream = self.device_module.current_stream()
self.alt_stream.wait_stream(current_stream)
with self.device_module.stream(self.alt_stream):
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
with self.device_module.stream(self.alt_stream):
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
current_stream.wait_stream(self.alt_stream)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment