Unverified Commit c35dcfdb authored by Yusong Gao's avatar Yusong Gao Committed by GitHub
Browse files

[PD] fix: skip warmup request in disaggregation mode to prevent crash on timeout (#5292)

parent c163bf4f
...@@ -785,6 +785,7 @@ def _wait_and_warmup( ...@@ -785,6 +785,7 @@ def _wait_and_warmup(
json_data["sampling_params"]["max_new_tokens"] = 0 json_data["sampling_params"]["max_new_tokens"] = 0
try: try:
if server_args.disaggregation_mode == "null":
res = requests.post( res = requests.post(
url + request_name, url + request_name,
json=json_data, json=json_data,
...@@ -792,6 +793,9 @@ def _wait_and_warmup( ...@@ -792,6 +793,9 @@ def _wait_and_warmup(
timeout=600, timeout=600,
) )
assert res.status_code == 200, f"{res}" assert res.status_code == 200, f"{res}"
else:
# Warmup request currently hangs in disaggregation mode, so we skip it.
logger.info("Skipping warmup request in disaggregation mode")
except Exception: except Exception:
last_traceback = get_exception_traceback() last_traceback = get_exception_traceback()
if pipe_finish_writer is not None: if pipe_finish_writer is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment