Unverified Commit a470e60c authored by Ying Sheng's avatar Ying Sheng Committed by GitHub
Browse files

clean up step function (#635)

parent 5f90e076
...@@ -228,6 +228,18 @@ class ModelTpServer: ...@@ -228,6 +228,18 @@ class ModelTpServer:
# Print stats # Print stats
if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0: if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0:
self.print_stats()
if self.running_batch.is_empty():
self.running_batch = None
break
if self.out_pyobjs and self.running_batch.has_stream():
break
else:
self.check_memory()
def print_stats(self):
num_used = self.max_total_num_tokens - ( num_used = self.max_total_num_tokens - (
self.token_to_kv_pool.available_size() self.token_to_kv_pool.available_size()
+ self.tree_cache.evictable_size() + self.tree_cache.evictable_size()
...@@ -246,14 +258,7 @@ class ModelTpServer: ...@@ -246,14 +258,7 @@ class ModelTpServer:
f"#queue-req: {len(self.forward_queue)}" f"#queue-req: {len(self.forward_queue)}"
) )
if self.running_batch.is_empty(): def check_memory(self):
self.running_batch = None
break
if self.out_pyobjs and self.running_batch.has_stream():
break
else:
# Check the available size
available_size = ( available_size = (
self.token_to_kv_pool.available_size() self.token_to_kv_pool.available_size()
+ self.tree_cache.evictable_size() + self.tree_cache.evictable_size()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment