Unverified Commit 45d6592d authored by Ying Sheng's avatar Ying Sheng Committed by GitHub
Browse files

Fix no-cache mode (#136)

parent f6bfe3aa
...@@ -215,6 +215,7 @@ class Batch: ...@@ -215,6 +215,7 @@ class Batch:
extend_num_tokens = seq_lens.sum() - prefix_lens.sum() extend_num_tokens = seq_lens.sum() - prefix_lens.sum()
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens) out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
if out_cache_loc is None: if out_cache_loc is None:
if not self.tree_cache.disable:
self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free) self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free)
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens) out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
...@@ -277,10 +278,10 @@ class Batch: ...@@ -277,10 +278,10 @@ class Batch:
def check_decode_mem(self): def check_decode_mem(self):
bs = len(self.reqs) bs = len(self.reqs)
avai_size = self.token_to_kv_pool.available_size() if self.token_to_kv_pool.available_size() >= bs:
if avai_size >= bs:
return True return True
if not self.tree_cache.disable:
self.tree_cache.evict(bs, self.token_to_kv_pool.free) self.tree_cache.evict(bs, self.token_to_kv_pool.free)
if self.token_to_kv_pool.available_size() >= bs: if self.token_to_kv_pool.available_size() >= bs:
return True return True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment