Unverified Commit 7dd8a7e6 authored by min-xu-et's avatar min-xu-et Committed by GitHub
Browse files

fixed an error handling in bench_latency.py (#904)

parent 947402c8
...@@ -380,13 +380,15 @@ class Batch: ...@@ -380,13 +380,15 @@ class Batch:
extend_num_tokens = seq_lens.sum() - prefix_lens.sum() extend_num_tokens = seq_lens.sum() - prefix_lens.sum()
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens) out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
if out_cache_loc is None: if out_cache_loc is None:
self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free) if self.tree_cache is not None:
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens) self.tree_cache.evict(extend_num_tokens, self.token_to_kv_pool.free)
out_cache_loc = self.token_to_kv_pool.alloc(extend_num_tokens)
if out_cache_loc is None: if out_cache_loc is None:
logger.error("Prefill out of memory. This should never happen.") logger.error("Prefill out of memory. Try to lower your batch size.")
self.tree_cache.pretty_print() if self.tree_cache is not None:
exit() self.tree_cache.pretty_print()
exit(1)
pt = 0 pt = 0
for i in range(bs): for i in range(bs):
...@@ -637,9 +639,10 @@ class Batch: ...@@ -637,9 +639,10 @@ class Batch:
self.out_cache_loc = self.token_to_kv_pool.alloc(bs) self.out_cache_loc = self.token_to_kv_pool.alloc(bs)
if self.out_cache_loc is None: if self.out_cache_loc is None:
logger.error("Decode out of memory. This should never happen.") logger.error("Decode out of memory. Try to lower your batch size.")
self.tree_cache.pretty_print() if self.tree_cache is not None:
exit() self.tree_cache.pretty_print()
exit(1)
self.req_to_token_pool.req_to_token[ self.req_to_token_pool.req_to_token[
self.req_pool_indices, self.seq_lens - 1 self.req_pool_indices, self.seq_lens - 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment