Unverified Commit 3520f75f authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Remove inf value for chunked prefill size (#812)

parent c8e9fed8
......@@ -442,8 +442,11 @@ class ModelTpServer:
else:
# Add this request to the running batch
if (
new_batch_input_tokens + req.extend_input_len
<= self.chunked_prefill_size
self.chunked_prefill_size is None
or (
new_batch_input_tokens + req.extend_input_len
<= self.chunked_prefill_size
)
or (
req.return_logprob and req.normalized_prompt_logprob is None
)
......
......@@ -87,8 +87,6 @@ class ServerArgs:
node_rank: Optional[int] = None
def __post_init__(self):
if self.chunked_prefill_size is None:
self.chunked_prefill_size = 1 << 30
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
......@@ -414,7 +412,7 @@ class ServerArgs:
), "multi-node data parallel is not supported"
assert not (
self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
self.chunked_prefill_size is not None and self.disable_radix_cache
), "chunked prefill is not supported with radix cache disabled currently"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment