Unverified Commit 263eab9f authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: dummy health check server not accessible on non-zero rank nodes (#12297)

parent 25257d8e
......@@ -104,15 +104,21 @@ def launch_server_process_and_send_one_request(
if response.status_code == 200:
# Rank-0 node send a request to sync with other node and then return.
if server_args.node_rank == 0:
response = requests.post(
f"{base_url}/generate",
json={
payload = {
"input_ids": [0, 1, 2, 3],
"sampling_params": {
"max_new_tokens": 8,
"temperature": 0,
},
},
}
# In PD mode, include fake bootstrap fields so workers don't assert
if server_args.disaggregation_mode != "null":
payload["bootstrap_host"] = FAKE_BOOTSTRAP_HOST
payload["bootstrap_room"] = 0
response = requests.post(
f"{base_url}/generate",
json=payload,
timeout=600,
)
if response.status_code != 200:
......
......@@ -2351,16 +2351,24 @@ def launch_dummy_health_check_server(host, port, enable_metrics):
)
server = uvicorn.Server(config=config)
# Run server in a background daemon thread with its own event loop
# This prevents blocking the main thread while still serving health checks
def run_server():
try:
loop = asyncio.get_running_loop()
asyncio.run(server.serve())
except Exception as e:
logger.error(f"Dummy health check server failed to start: {e}")
raise
finally:
logger.info(f"Dummy health check server stopped at {host}:{port}")
thread = threading.Thread(
target=run_server, daemon=True, name="health-check-server"
)
thread.start()
logger.info(
f"Dummy health check server scheduled on existing loop at {host}:{port}"
f"Dummy health check server started in background thread at {host}:{port}"
)
loop.create_task(server.serve())
except RuntimeError:
logger.info(f"Starting dummy health check server at {host}:{port}")
server.run()
def create_checksum(directory: str):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment