Unverified Commit 299c460b authored by Casper's avatar Casper Committed by GitHub
Browse files

Fix cache util logic (#186)

parent 7c976752
...@@ -7,7 +7,7 @@ def prepare_cache(blocks, seqlen: int) -> int: ...@@ -7,7 +7,7 @@ def prepare_cache(blocks, seqlen: int) -> int:
will_cache_be_exceeded = start_pos + seqlen > block.attn.max_seq_len will_cache_be_exceeded = start_pos + seqlen > block.attn.max_seq_len
# Reset and avoid retaining state when processing context # Reset and avoid retaining state when processing context
if seqlen > 1 and (will_cache_be_exceeded or seqlen > 1): if seqlen > 1 and (will_cache_be_exceeded or start_pos > 0):
block.attn.start_pos = block.attn.cache.roll_kv_n_steps(start_pos, n=start_pos) block.attn.start_pos = block.attn.cache.roll_kv_n_steps(start_pos, n=start_pos)
# Slowly roll out old tokens without performance hit if exceeded during decoding # Slowly roll out old tokens without performance hit if exceeded during decoding
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment