Unverified Commit eb675ae3 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

fix(sglang): make prefill warmup failure fatal (#7944)


Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent d5803cbe
......@@ -25,9 +25,12 @@ from dynamo.sglang.request_handlers import DecodeWorkerHandler, PrefillWorkerHan
async def _warmup_prefill_engine(engine: sgl.Engine, server_args) -> None:
"""Perform warmup request for prefill engine to reduce initial TTFT."""
"""Perform warmup request for prefill engine to reduce initial TTFT.
Raises on failure so the caller can prevent the worker from registering
with a broken engine (silent request drops).
"""
logging.info("Start of prefill disaggregation warmup ...")
try:
from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST
sampling_params = {
......@@ -50,10 +53,6 @@ async def _warmup_prefill_engine(engine: sgl.Engine, server_args) -> None:
await asyncio.wait_for(_do_warmup(), timeout=1800)
logging.info("Prefill warmup completed")
except asyncio.TimeoutError:
logging.warning("Prefill warmup timed out after 1800s")
except Exception as e:
logging.warning(f"Prefill warmup failed: {e}")
async def init_decode(
......@@ -184,7 +183,16 @@ async def init_prefill(
await handle_non_leader_node(engine, publisher, metrics_task)
return
try:
await _warmup_prefill_engine(engine, server_args)
except asyncio.TimeoutError as e:
logging.error("Prefill warmup timed out after 1800s — aborting worker startup")
raise RuntimeError(
"Prefill warmup timed out; worker cannot serve requests"
) from e
except Exception as e:
logging.error(f"Prefill warmup failed: {e} — aborting worker startup")
raise RuntimeError(f"Prefill warmup failed: {e}") from e
handler = PrefillWorkerHandler(
engine, config, publisher, generate_endpoint, shutdown_event
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment