Unverified Commit 68f8b60d authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Fix chunked prefill condition (#1594)

parent 6a5b352a
......@@ -249,7 +249,10 @@ class PrefillAdder:
return AddReqResult.NO_TOKEN
tokens_freed += tokens_occupied
if req.extend_input_len <= self.rem_chunk_tokens:
if (
self.rem_chunk_tokens is None
or req.extend_input_len <= self.rem_chunk_tokens
):
self.can_run_list.append(req)
self._prefill_one_req(
0,
......
......@@ -8,6 +8,7 @@ from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
run_bench_serving,
)
......@@ -62,6 +63,16 @@ class TestChunkedPrefill(unittest.TestCase):
disable_radix_cache=False, enable_mixed_chunk=False, chunked_prefill_size=-1
)
def test_no_chunked_prefill_without_radix_cache(self):
res = run_bench_serving(
model=DEFAULT_MODEL_NAME_FOR_TEST,
num_prompts=10,
request_rate=float("inf"),
other_server_args=["--disable-radix-cache", "--chunked-prefill-size", "-1"],
)
assert res["completed"] == 10
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment