Remove inf value for chunked prefill size (#812)

3520f75f · Liangsheng Yin · GitHub · c8e9fed8 · 3520f75f · 3520f75f
Unverified Commit 3520f75f authored Jul 29, 2024 by Liangsheng Yin Committed by GitHub Jul 29, 2024
Showing with 6 additions and 5 deletions

python/sglang/srt/managers/controller/tp_worker.py python/sglang/srt/managers/controller/tp_worker.py +5 -2

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +1 -3

No files found.
--- a/python/sglang/srt/managers/controller/tp_worker.py
+++ b/python/sglang/srt/managers/controller/tp_worker.py
@@ -442,8 +442,11 @@ class ModelTpServer:
                else:
                    # Add this request to the running batch
                    if (
-                        new_batch_input_tokens + req.extend_input_len
-                        <= self.chunked_prefill_size
+                        self.chunked_prefill_size is None
+                        or (
+                            new_batch_input_tokens + req.extend_input_len
+                            <= self.chunked_prefill_size
+                        )
                        or (
                            req.return_logprob and req.normalized_prompt_logprob is None
                        )

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -87,8 +87,6 @@ class ServerArgs:
    node_rank: Optional[int] = None

    def __post_init__(self):
-        if self.chunked_prefill_size is None:
-            self.chunked_prefill_size = 1 << 30
        if self.tokenizer_path is None:
            self.tokenizer_path = self.model_path
        if self.mem_fraction_static is None:
@@ -414,7 +412,7 @@ class ServerArgs:
        ), "multi-node data parallel is not supported"

        assert not (
-            self.chunked_prefill_size < (1 << 30) and self.disable_radix_cache
+            self.chunked_prefill_size is not None and self.disable_radix_cache
        ), "chunked prefill is not supported with radix cache disabled currently"