Fix no-cache mode (#136)

45d6592d · Ying Sheng · GitHub · f6bfe3aa · 45d6592d
Unverified Commit 45d6592d authored Feb 03, 2024 by Ying Sheng Committed by GitHub Feb 03, 2024
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

python/sglang/srt/managers/router/infer_batch.py python/sglang/srt/managers/router/infer_batch.py +6 -5

No files found.
--- a/python/sglang/srt/managers/router/infer_batch.py
+++ b/python/sglang/srt/managers/router/infer_batch.py
@@ -215,6 +215,7 @@ class Batch:
        extend_num_tokens = seq_lens.sum() - prefix_lens.sum()
        out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
        if out_cache_loc is None:
+            if not self.tree_cache.disable:
                self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free)
                out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
@@ -277,10 +278,10 @@ class Batch:
    def check_decode_mem(self):
        bs = len(self.reqs)
-        avai_size = self.token_to_kv_pool.available_size()
+        if self.token_to_kv_pool.available_size() >= bs:
-        if avai_size >= bs:
            return True
+        if not self.tree_cache.disable:
            self.tree_cache.evict(bs, self.token_to_kv_pool.free)
        if self.token_to_kv_pool.available_size() >= bs:
            return True