Fix cache util logic (#186)

299c460b · Casper · GitHub · 7c976752 · 299c460b
Unverified Commit 299c460b authored Nov 11, 2023 by Casper Committed by GitHub Nov 11, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

awq/utils/fused_utils.py awq/utils/fused_utils.py +1 -1

No files found.
--- a/awq/utils/fused_utils.py
+++ b/awq/utils/fused_utils.py
@@ -7,7 +7,7 @@ def prepare_cache(blocks, seqlen: int) -> int:
        will_cache_be_exceeded = start_pos + seqlen > block.attn.max_seq_len
        # Reset and avoid retaining state when processing context
-        if seqlen > 1 and (will_cache_be_exceeded or seqlen > 1):
+        if seqlen > 1 and (will_cache_be_exceeded or start_pos > 0):
            block.attn.start_pos = block.attn.cache.roll_kv_n_steps(start_pos, n=start_pos)
        # Slowly roll out old tokens without performance hit if exceeded during decoding