Unverified Commit 73fa2d49 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Some warnings to crash when CI (#1009)

parent 61728884
...@@ -17,6 +17,7 @@ limitations under the License. ...@@ -17,6 +17,7 @@ limitations under the License.
import logging import logging
import multiprocessing import multiprocessing
import os
import pickle import pickle
import time import time
import warnings import warnings
...@@ -285,6 +286,7 @@ class ModelTpServer: ...@@ -285,6 +286,7 @@ class ModelTpServer:
) )
def check_memory(self): def check_memory(self):
crash = os.getenv("CI", "false") == "true"
available_size = ( available_size = (
self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size() self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()
) )
...@@ -294,6 +296,7 @@ class ModelTpServer: ...@@ -294,6 +296,7 @@ class ModelTpServer:
f"available_size={available_size}, max_total_num_tokens={self.max_total_num_tokens}\n" f"available_size={available_size}, max_total_num_tokens={self.max_total_num_tokens}\n"
"KV cache pool leak detected!" "KV cache pool leak detected!"
) )
exit(1) if crash else None
if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size: if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size:
warnings.warn( warnings.warn(
...@@ -302,6 +305,7 @@ class ModelTpServer: ...@@ -302,6 +305,7 @@ class ModelTpServer:
f"total slots={self.req_to_token_pool.size}\n" f"total slots={self.req_to_token_pool.size}\n"
"Memory pool leak detected!" "Memory pool leak detected!"
) )
exit(1) if crash else None
def handle_generate_request( def handle_generate_request(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment