Unverified Commit 3f5ac88d authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Fix out of memory message. (#1771)

parent 0d800090
...@@ -514,7 +514,12 @@ class ScheduleBatch: ...@@ -514,7 +514,12 @@ class ScheduleBatch:
out_cache_loc = self.token_to_kv_pool.alloc(num_tokens) out_cache_loc = self.token_to_kv_pool.alloc(num_tokens)
if out_cache_loc is None: if out_cache_loc is None:
logger.error("Prefill out of memory. Try to lower your batch size.") phase_str = "Prefill" if self.forward_mode.is_extend() else "Decode"
logger.error(
f"{phase_str} out of memory. Try to lower your batch size.\n"
f"Try to allocate {num_tokens} tokens.\n"
f"Avaliable tokens: {self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()}\n"
)
if self.tree_cache is not None: if self.tree_cache is not None:
self.tree_cache.pretty_print() self.tree_cache.pretty_print()
exit(1) exit(1)
......
...@@ -71,7 +71,6 @@ from sglang.srt.utils import ( ...@@ -71,7 +71,6 @@ from sglang.srt.utils import (
is_generation_model, is_generation_model,
is_multimodal_model, is_multimodal_model,
kill_parent_process, kill_parent_process,
pytorch_profile,
set_random_seed, set_random_seed,
suppress_other_loggers, suppress_other_loggers,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment