Unverified Commit 23486039 authored by Mark McLoughlin's avatar Mark McLoughlin Committed by GitHub
Browse files

[Frontend][Core] Revert "Add shutdown timeout" (#34730 and #36270) (#36628)


Signed-off-by: default avatarMark McLoughlin <markmc@redhat.com>
parent c8851008
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Integration tests for shutdown behavior, timeout, and signal handling."""
import asyncio
import signal import signal
import subprocess import subprocess
import sys import sys
import time import time
from dataclasses import dataclass, field
import httpx
import openai import openai
import psutil
import pytest import pytest
from tests.utils import RemoteOpenAIServer
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.network_utils import get_open_port from vllm.utils.network_utils import get_open_port
...@@ -24,101 +18,6 @@ MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM" ...@@ -24,101 +18,6 @@ MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
_IS_ROCM = current_platform.is_rocm() _IS_ROCM = current_platform.is_rocm()
_SERVER_STARTUP_TIMEOUT = 120 _SERVER_STARTUP_TIMEOUT = 120
_PROCESS_EXIT_TIMEOUT = 15 _PROCESS_EXIT_TIMEOUT = 15
_SHUTDOWN_DETECTION_TIMEOUT = 10
_CHILD_CLEANUP_TIMEOUT = 10
def _get_child_pids(parent_pid: int) -> list[int]:
try:
parent = psutil.Process(parent_pid)
return [c.pid for c in parent.children(recursive=True)]
except psutil.NoSuchProcess:
return []
async def _assert_children_cleaned_up(
child_pids: list[int],
timeout: float = _CHILD_CLEANUP_TIMEOUT,
):
"""Wait for child processes to exit and fail if any remain."""
if not child_pids:
return
deadline = time.time() + timeout
while time.time() < deadline:
still_alive = []
for pid in child_pids:
try:
p = psutil.Process(pid)
if p.is_running() and p.status() != psutil.STATUS_ZOMBIE:
still_alive.append(pid)
except psutil.NoSuchProcess:
pass
if not still_alive:
return
await asyncio.sleep(0.5)
pytest.fail(
f"Child processes {still_alive} still alive after {timeout}s. "
f"Process cleanup may not be working correctly."
)
@dataclass
class ShutdownState:
got_503: bool = False
got_500: bool = False
requests_after_sigterm: int = 0
aborted_requests: int = 0
connection_errors: int = 0
stop_requesting: bool = False
errors: list[str] = field(default_factory=list)
async def _concurrent_request_loop(
client: openai.AsyncOpenAI,
state: ShutdownState,
sigterm_sent: asyncio.Event | None = None,
concurrency: int = 10,
):
"""Run multiple concurrent requests to keep the server busy."""
async def single_request():
while not state.stop_requesting:
try:
response = await client.completions.create(
model=MODEL_NAME,
prompt="Write a story: ",
max_tokens=200,
)
if sigterm_sent is not None and sigterm_sent.is_set():
state.requests_after_sigterm += 1
# Check if any choice has finish_reason='abort'
if any(choice.finish_reason == "abort" for choice in response.choices):
state.aborted_requests += 1
except openai.APIStatusError as e:
if e.status_code == 503:
state.got_503 = True
elif e.status_code == 500:
state.got_500 = True
else:
state.errors.append(f"API error: {e}")
except (openai.APIConnectionError, httpx.RemoteProtocolError):
state.connection_errors += 1
if sigterm_sent is not None and sigterm_sent.is_set():
break
except Exception as e:
state.errors.append(f"Unexpected error: {e}")
break
await asyncio.sleep(0.01)
tasks = [asyncio.create_task(single_request()) for _ in range(concurrency)]
try:
await asyncio.gather(*tasks, return_exceptions=True)
finally:
for t in tasks:
if not t.done():
t.cancel()
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -204,361 +103,3 @@ async def test_shutdown_on_engine_failure(): ...@@ -204,361 +103,3 @@ async def test_shutdown_on_engine_failure():
return_code = proc.wait(timeout=_PROCESS_EXIT_TIMEOUT) return_code = proc.wait(timeout=_PROCESS_EXIT_TIMEOUT)
assert return_code is not None assert return_code is not None
@pytest.mark.asyncio
async def test_wait_timeout_completes_requests():
"""Verify wait timeout: new requests rejected, in-flight requests complete."""
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
"30",
]
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
client = remote_server.get_async_client()
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
state = ShutdownState()
sigterm_sent = asyncio.Event()
request_task = asyncio.create_task(
_concurrent_request_loop(client, state, sigterm_sent, concurrency=10)
)
await asyncio.sleep(0.5)
proc.send_signal(signal.SIGTERM)
sigterm_sent.set()
try:
await asyncio.wait_for(request_task, timeout=_SHUTDOWN_DETECTION_TIMEOUT)
except asyncio.TimeoutError:
pass
finally:
state.stop_requesting = True
if not request_task.done():
request_task.cancel()
await asyncio.gather(request_task, return_exceptions=True)
# wait timeout should complete in-flight requests
assert state.requests_after_sigterm > 0, (
f"Wait timeout should complete in-flight requests. "
f"503: {state.got_503}, 500: {state.got_500}, "
f"conn_errors: {state.connection_errors}, errors: {state.errors}"
)
# server must stop accepting new requests (503, 500, or connection close)
assert state.got_503 or state.got_500 or state.connection_errors > 0, (
f"Server should stop accepting requests. "
f"completed: {state.requests_after_sigterm}, errors: {state.errors}"
)
await _assert_children_cleaned_up(child_pids)
@pytest.mark.asyncio
@pytest.mark.parametrize("wait_for_engine_idle", [0.0, 2.0])
async def test_abort_timeout_exits_quickly(wait_for_engine_idle: float):
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
"0",
]
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
if wait_for_engine_idle > 0:
client = remote_server.get_async_client()
# Send requests to ensure engine is fully initialized
for _ in range(2):
await client.completions.create(
model=MODEL_NAME,
prompt="Test request: ",
max_tokens=10,
)
# Wait for engine to become idle
await asyncio.sleep(wait_for_engine_idle)
start_time = time.time()
proc.send_signal(signal.SIGTERM)
# abort timeout (0) should exit promptly
for _ in range(20):
if proc.poll() is not None:
break
time.sleep(0.1)
if proc.poll() is None:
proc.kill()
proc.wait(timeout=5)
pytest.fail("Process did not exit after SIGTERM with abort timeout")
exit_time = time.time() - start_time
assert exit_time < 2, f"Default shutdown took too long: {exit_time:.1f}s"
assert proc.returncode in (0, -15, None), f"Unexpected: {proc.returncode}"
await _assert_children_cleaned_up(child_pids)
@pytest.mark.asyncio
async def test_wait_timeout_with_short_duration():
"""Verify server exits cleanly with a short wait timeout."""
wait_timeout = 3
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
str(wait_timeout),
]
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
client = remote_server.get_async_client()
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
state = ShutdownState()
request_task = asyncio.create_task(
_concurrent_request_loop(client, state, concurrency=3)
)
await asyncio.sleep(0.5)
start_time = time.time()
proc.send_signal(signal.SIGTERM)
# server should exit within wait_timeout + buffer
max_wait = wait_timeout + 15
for _ in range(int(max_wait * 10)):
if proc.poll() is not None:
break
time.sleep(0.1)
exit_time = time.time() - start_time
state.stop_requesting = True
if not request_task.done():
request_task.cancel()
await asyncio.gather(request_task, return_exceptions=True)
if proc.poll() is None:
proc.kill()
proc.wait(timeout=5)
pytest.fail(f"Process did not exit within {max_wait}s after SIGTERM")
assert exit_time < wait_timeout + 10, (
f"Took too long to exit ({exit_time:.1f}s), expected <{wait_timeout + 10}s"
)
assert proc.returncode in (0, -15, None), f"Unexpected: {proc.returncode}"
await _assert_children_cleaned_up(child_pids)
@pytest.mark.asyncio
async def test_abort_timeout_fails_inflight_requests():
"""Verify abort timeout (0) immediately aborts in-flight requests."""
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
"0",
]
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
client = remote_server.get_async_client()
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
state = ShutdownState()
sigterm_sent = asyncio.Event()
request_task = asyncio.create_task(
_concurrent_request_loop(client, state, sigterm_sent, concurrency=10)
)
await asyncio.sleep(0.5)
proc.send_signal(signal.SIGTERM)
sigterm_sent.set()
try:
await asyncio.wait_for(request_task, timeout=5)
except asyncio.TimeoutError:
pass
finally:
state.stop_requesting = True
if not request_task.done():
request_task.cancel()
await asyncio.gather(request_task, return_exceptions=True)
# With abort timeout (0), requests should be aborted (finish_reason='abort')
# or rejected (connection errors or API errors)
assert (
state.aborted_requests > 0
or state.connection_errors > 0
or state.got_500
or state.got_503
), (
f"Abort timeout should cause request aborts or failures. "
f"aborted: {state.aborted_requests}, "
f"503: {state.got_503}, 500: {state.got_500}, "
f"conn_errors: {state.connection_errors}, "
f"completed: {state.requests_after_sigterm}"
)
# Verify fast shutdown
start_time = time.time()
for _ in range(100):
if proc.poll() is not None:
break
time.sleep(0.1)
exit_time = time.time() - start_time
assert exit_time < 10, f"Abort timeout shutdown took too long: {exit_time:.1f}s"
await _assert_children_cleaned_up(child_pids)
@pytest.mark.asyncio
async def test_request_rejection_during_shutdown():
"""Verify new requests are rejected with error during shutdown."""
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
"30",
]
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
client = remote_server.get_async_client()
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
proc.send_signal(signal.SIGTERM)
await asyncio.sleep(1.0)
# Try to send new requests - they should be rejected
rejected_count = 0
for _ in range(10):
try:
await client.completions.create(
model=MODEL_NAME, prompt="Hello", max_tokens=10
)
except (
openai.APIStatusError,
openai.APIConnectionError,
httpx.RemoteProtocolError,
):
rejected_count += 1
await asyncio.sleep(0.1)
assert rejected_count > 0, (
f"Expected requests to be rejected during shutdown, "
f"but {rejected_count} were rejected out of 10"
)
await _assert_children_cleaned_up(child_pids)
@pytest.mark.asyncio
async def test_multi_api_server_shutdown():
"""Verify shutdown works with multiple API servers."""
server_args = [
"--dtype",
"bfloat16",
"--max-model-len",
"256",
"--enforce-eager",
"--gpu-memory-utilization",
"0.05",
"--max-num-seqs",
"4",
"--shutdown-timeout",
"30",
"--api-server-count",
"2",
]
with RemoteOpenAIServer(MODEL_NAME, server_args, auto_port=True) as remote_server:
client = remote_server.get_async_client()
proc = remote_server.proc
child_pids = _get_child_pids(proc.pid)
assert len(child_pids) >= 2, (
f"Expected at least 2 child processes, got {len(child_pids)}"
)
state = ShutdownState()
sigterm_sent = asyncio.Event()
# Start concurrent requests across both API servers
request_task = asyncio.create_task(
_concurrent_request_loop(client, state, sigterm_sent, concurrency=8)
)
await asyncio.sleep(0.5)
# Send SIGTERM to parent - should propagate to all children
proc.send_signal(signal.SIGTERM)
sigterm_sent.set()
try:
await asyncio.wait_for(request_task, timeout=_SHUTDOWN_DETECTION_TIMEOUT)
except asyncio.TimeoutError:
pass
finally:
state.stop_requesting = True
if not request_task.done():
request_task.cancel()
await asyncio.gather(request_task, return_exceptions=True)
for _ in range(300): # up to 30 seconds
if proc.poll() is not None:
break
time.sleep(0.1)
if proc.poll() is None:
proc.kill()
proc.wait(timeout=5)
pytest.fail("Process did not exit after SIGTERM")
await _assert_children_cleaned_up(child_pids)
...@@ -79,7 +79,7 @@ def test_api_server_process_manager_init(api_server_args, with_stats_update): ...@@ -79,7 +79,7 @@ def test_api_server_process_manager_init(api_server_args, with_stats_update):
finally: finally:
# Always clean up the processes # Always clean up the processes
print("Cleaning up processes...") print("Cleaning up processes...")
manager.shutdown() manager.close()
# Give processes time to terminate # Give processes time to terminate
time.sleep(0.2) time.sleep(0.2)
...@@ -111,8 +111,6 @@ def test_wait_for_completion_or_failure(api_server_args): ...@@ -111,8 +111,6 @@ def test_wait_for_completion_or_failure(api_server_args):
wait_for_completion_or_failure(api_server_manager=manager) wait_for_completion_or_failure(api_server_manager=manager)
except Exception as e: except Exception as e:
result["exception"] = e result["exception"] = e
finally:
manager.shutdown()
# Start a thread to run wait_for_completion_or_failure # Start a thread to run wait_for_completion_or_failure
wait_thread = threading.Thread(target=run_with_exception_capture, daemon=True) wait_thread = threading.Thread(target=run_with_exception_capture, daemon=True)
...@@ -145,7 +143,7 @@ def test_wait_for_completion_or_failure(api_server_args): ...@@ -145,7 +143,7 @@ def test_wait_for_completion_or_failure(api_server_args):
assert not proc.is_alive(), f"Process {i} should not be alive" assert not proc.is_alive(), f"Process {i} should not be alive"
finally: finally:
manager.shutdown() manager.close()
time.sleep(0.2) time.sleep(0.2)
...@@ -176,14 +174,11 @@ def test_normal_completion(api_server_args): ...@@ -176,14 +174,11 @@ def test_normal_completion(api_server_args):
# since all processes have already # since all processes have already
# terminated, it should return immediately # terminated, it should return immediately
# with no error # with no error
try: wait_for_completion_or_failure(api_server_manager=manager)
wait_for_completion_or_failure(api_server_manager=manager)
finally:
manager.shutdown()
finally: finally:
# Clean up just in case # Clean up just in case
manager.shutdown() manager.close()
time.sleep(0.2) time.sleep(0.2)
...@@ -206,7 +201,7 @@ def test_external_process_monitoring(api_server_args): ...@@ -206,7 +201,7 @@ def test_external_process_monitoring(api_server_args):
def __init__(self, proc): def __init__(self, proc):
self.proc = proc self.proc = proc
def shutdown(self): def close(self):
if self.proc.is_alive(): if self.proc.is_alive():
self.proc.terminate() self.proc.terminate()
self.proc.join(timeout=0.5) self.proc.join(timeout=0.5)
...@@ -231,9 +226,6 @@ def test_external_process_monitoring(api_server_args): ...@@ -231,9 +226,6 @@ def test_external_process_monitoring(api_server_args):
) )
except Exception as e: except Exception as e:
result["exception"] = e result["exception"] = e
finally:
manager.shutdown()
mock_coordinator.shutdown()
# Start a thread to run wait_for_completion_or_failure # Start a thread to run wait_for_completion_or_failure
wait_thread = threading.Thread(target=run_with_exception_capture, daemon=True) wait_thread = threading.Thread(target=run_with_exception_capture, daemon=True)
...@@ -267,6 +259,6 @@ def test_external_process_monitoring(api_server_args): ...@@ -267,6 +259,6 @@ def test_external_process_monitoring(api_server_args):
finally: finally:
# Clean up # Clean up
manager.shutdown() manager.close()
mock_coordinator.shutdown() mock_coordinator.close()
time.sleep(0.2) time.sleep(0.2)
...@@ -327,12 +327,6 @@ class VllmConfig: ...@@ -327,12 +327,6 @@ class VllmConfig:
weight_transfer_config: WeightTransferConfig | None = None weight_transfer_config: WeightTransferConfig | None = None
"""The configurations for weight transfer during RL training.""" """The configurations for weight transfer during RL training."""
shutdown_timeout: int = Field(default=0, ge=0)
"""Shutdown grace period for in-flight requests. Shutdown will be delayed for
up to this amount of time to allow already-running requests to complete. Any
remaining requests are aborted once the timeout is reached.
"""
def compute_hash(self) -> str: def compute_hash(self) -> str:
""" """
WARNING: Whenever a new field is added to this config, WARNING: Whenever a new field is added to this config,
......
...@@ -606,8 +606,6 @@ class EngineArgs: ...@@ -606,8 +606,6 @@ class EngineArgs:
kv_offloading_backend: KVOffloadingBackend = CacheConfig.kv_offloading_backend kv_offloading_backend: KVOffloadingBackend = CacheConfig.kv_offloading_backend
tokens_only: bool = False tokens_only: bool = False
shutdown_timeout: int = 0
weight_transfer_config: WeightTransferConfig | None = get_field( weight_transfer_config: WeightTransferConfig | None = get_field(
VllmConfig, VllmConfig,
"weight_transfer_config", "weight_transfer_config",
...@@ -1310,14 +1308,6 @@ class EngineArgs: ...@@ -1310,14 +1308,6 @@ class EngineArgs:
default=False, default=False,
action=argparse.BooleanOptionalAction, action=argparse.BooleanOptionalAction,
) )
parser.add_argument(
"--shutdown-timeout",
type=int,
default=0,
help="Shutdown timeout in seconds. 0 = abort, >0 = wait.",
)
return parser return parser
@classmethod @classmethod
...@@ -1926,7 +1916,6 @@ class EngineArgs: ...@@ -1926,7 +1916,6 @@ class EngineArgs:
optimization_level=self.optimization_level, optimization_level=self.optimization_level,
performance_mode=self.performance_mode, performance_mode=self.performance_mode,
weight_transfer_config=self.weight_transfer_config, weight_transfer_config=self.weight_transfer_config,
shutdown_timeout=self.shutdown_timeout,
) )
return config return config
......
...@@ -200,11 +200,6 @@ class EngineClient(ABC): ...@@ -200,11 +200,6 @@ class EngineClient(ABC):
"""Return whether the engine is currently paused.""" """Return whether the engine is currently paused."""
... ...
@abstractmethod
def shutdown(self, timeout: float | None = None) -> None:
"""Shutdown the engine with optional timeout."""
...
async def scale_elastic_ep( async def scale_elastic_ep(
self, new_data_parallel_size: int, drain_timeout: int = 300 self, new_data_parallel_size: int, drain_timeout: int = 300
) -> None: ) -> None:
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
import argparse import argparse
import signal import signal
import time
import uvloop import uvloop
...@@ -225,12 +224,8 @@ def run_headless(args: argparse.Namespace): ...@@ -225,12 +224,8 @@ def run_headless(args: argparse.Namespace):
try: try:
engine_manager.join_first() engine_manager.join_first()
finally: finally:
timeout = None
if shutdown_requested:
timeout = vllm_config.shutdown_timeout
logger.info("Waiting up to %d seconds for processes to exit", timeout)
engine_manager.shutdown(timeout=timeout)
logger.info("Shutting down.") logger.info("Shutting down.")
engine_manager.close()
def run_multi_api_server(args: argparse.Namespace): def run_multi_api_server(args: argparse.Namespace):
...@@ -241,19 +236,6 @@ def run_multi_api_server(args: argparse.Namespace): ...@@ -241,19 +236,6 @@ def run_multi_api_server(args: argparse.Namespace):
if num_api_servers > 1: if num_api_servers > 1:
setup_multiprocess_prometheus() setup_multiprocess_prometheus()
shutdown_requested = False
# Catch SIGTERM and SIGINT to allow graceful shutdown.
def signal_handler(signum, frame):
nonlocal shutdown_requested
logger.debug("Received %d signal.", signum)
if not shutdown_requested:
shutdown_requested = True
raise SystemExit
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
listen_address, sock = setup_server(args) listen_address, sock = setup_server(args)
engine_args = vllm.AsyncEngineArgs.from_cli_args(args) engine_args = vllm.AsyncEngineArgs.from_cli_args(args)
...@@ -315,29 +297,11 @@ def run_multi_api_server(args: argparse.Namespace): ...@@ -315,29 +297,11 @@ def run_multi_api_server(args: argparse.Namespace):
api_server_manager = APIServerProcessManager(**api_server_manager_kwargs) api_server_manager = APIServerProcessManager(**api_server_manager_kwargs)
# Wait for API servers # Wait for API servers
try: wait_for_completion_or_failure(
wait_for_completion_or_failure( api_server_manager=api_server_manager,
api_server_manager=api_server_manager, engine_manager=local_engine_manager,
engine_manager=local_engine_manager, coordinator=coordinator,
coordinator=coordinator, )
)
finally:
timeout = shutdown_by = None
if shutdown_requested:
timeout = vllm_config.shutdown_timeout
shutdown_by = time.monotonic() + timeout
logger.info("Waiting up to %d seconds for processes to exit", timeout)
def to_timeout(deadline: float | None) -> float | None:
return (
deadline if deadline is None else max(deadline - time.monotonic(), 0.0)
)
api_server_manager.shutdown(timeout=timeout)
if local_engine_manager:
local_engine_manager.shutdown(timeout=to_timeout(shutdown_by))
if coordinator:
coordinator.shutdown(timeout=to_timeout(shutdown_by))
def run_api_server_worker_proc( def run_api_server_worker_proc(
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
import asyncio import asyncio
import signal import signal
import socket import socket
from functools import partial
from typing import Any from typing import Any
import uvicorn import uvicorn
...@@ -92,10 +91,12 @@ async def serve_http( ...@@ -92,10 +91,12 @@ async def serve_http(
) )
) )
shutdown_event = asyncio.Event()
def signal_handler() -> None: def signal_handler() -> None:
shutdown_event.set() # prevents the uvicorn signal handler to exit early
server_task.cancel()
watchdog_task.cancel()
if ssl_cert_refresher:
ssl_cert_refresher.stop()
async def dummy_shutdown() -> None: async def dummy_shutdown() -> None:
pass pass
...@@ -103,24 +104,6 @@ async def serve_http( ...@@ -103,24 +104,6 @@ async def serve_http(
loop.add_signal_handler(signal.SIGINT, signal_handler) loop.add_signal_handler(signal.SIGINT, signal_handler)
loop.add_signal_handler(signal.SIGTERM, signal_handler) loop.add_signal_handler(signal.SIGTERM, signal_handler)
async def handle_shutdown() -> None:
await shutdown_event.wait()
engine_client = app.state.engine_client
timeout = engine_client.vllm_config.shutdown_timeout
await loop.run_in_executor(
None, partial(engine_client.shutdown, timeout=timeout)
)
server.should_exit = True
server_task.cancel()
watchdog_task.cancel()
if ssl_cert_refresher:
ssl_cert_refresher.stop()
shutdown_task = loop.create_task(handle_shutdown())
try: try:
await server_task await server_task
return dummy_shutdown() return dummy_shutdown()
...@@ -137,7 +120,6 @@ async def serve_http( ...@@ -137,7 +120,6 @@ async def serve_http(
logger.info("Shutting down FastAPI HTTP server.") logger.info("Shutting down FastAPI HTTP server.")
return server.shutdown() return server.shutdown()
finally: finally:
shutdown_task.cancel()
watchdog_task.cancel() watchdog_task.cancel()
......
...@@ -226,8 +226,6 @@ class EngineCoreRequestType(enum.Enum): ...@@ -226,8 +226,6 @@ class EngineCoreRequestType(enum.Enum):
UTILITY = b"\x03" UTILITY = b"\x03"
# Sentinel used within EngineCoreProc. # Sentinel used within EngineCoreProc.
EXECUTOR_FAILED = b"\x04" EXECUTOR_FAILED = b"\x04"
# Sentinel to wake up input_queue.get() during shutdown.
WAKEUP = b"\x05"
class ReconfigureDistributedRequest(msgspec.Struct): class ReconfigureDistributedRequest(msgspec.Struct):
......
...@@ -264,15 +264,16 @@ class AsyncLLM(EngineClient): ...@@ -264,15 +264,16 @@ class AsyncLLM(EngineClient):
def __del__(self): def __del__(self):
self.shutdown() self.shutdown()
def shutdown(self, timeout: float | None = None) -> None: def shutdown(self):
"""Shutdown, cleaning up the background proc and IPC.""" """Shutdown, cleaning up the background proc and IPC."""
shutdown_prometheus() shutdown_prometheus()
if renderer := getattr(self, "renderer", None): if renderer := getattr(self, "renderer", None):
renderer.shutdown() renderer.shutdown()
if engine_core := getattr(self, "engine_core", None): if engine_core := getattr(self, "engine_core", None):
engine_core.shutdown(timeout=timeout) engine_core.shutdown()
handler = getattr(self, "output_handler", None) handler = getattr(self, "output_handler", None)
if handler is not None: if handler is not None:
......
...@@ -104,10 +104,8 @@ class DPCoordinator: ...@@ -104,10 +104,8 @@ class DPCoordinator:
"""Returns tuple of ZMQ input address, output address.""" """Returns tuple of ZMQ input address, output address."""
return self.coord_in_address, self.coord_out_address return self.coord_in_address, self.coord_out_address
def shutdown(self, timeout: float | None = None) -> None: def close(self):
"""Shutdown coordinator process with configurable timeout.""" self._finalizer()
if self._finalizer.detach() is not None:
shutdown([self.proc], timeout=timeout)
class EngineState: class EngineState:
......
...@@ -9,7 +9,6 @@ from collections import defaultdict, deque ...@@ -9,7 +9,6 @@ from collections import defaultdict, deque
from collections.abc import Callable, Generator from collections.abc import Callable, Generator
from concurrent.futures import Future from concurrent.futures import Future
from contextlib import ExitStack, contextmanager from contextlib import ExitStack, contextmanager
from enum import IntEnum
from functools import partial from functools import partial
from inspect import isclass, signature from inspect import isclass, signature
from logging import DEBUG from logging import DEBUG
...@@ -62,7 +61,6 @@ from vllm.v1.engine import ( ...@@ -62,7 +61,6 @@ from vllm.v1.engine import (
from vllm.v1.engine.utils import ( from vllm.v1.engine.utils import (
EngineHandshakeMetadata, EngineHandshakeMetadata,
EngineZmqAddresses, EngineZmqAddresses,
SignalCallback,
get_device_indices, get_device_indices,
) )
from vllm.v1.executor import Executor from vllm.v1.executor import Executor
...@@ -773,12 +771,6 @@ class EngineCore: ...@@ -773,12 +771,6 @@ class EngineCore:
raise NotImplementedError raise NotImplementedError
class EngineShutdownState(IntEnum):
RUNNING = 0
REQUESTED = 1
SHUTTING_DOWN = 2
class EngineCoreProc(EngineCore): class EngineCoreProc(EngineCore):
"""ZMQ-wrapper for running EngineCore in background process.""" """ZMQ-wrapper for running EngineCore in background process."""
...@@ -806,7 +798,6 @@ class EngineCoreProc(EngineCore): ...@@ -806,7 +798,6 @@ class EngineCoreProc(EngineCore):
self.engine_index = engine_index self.engine_index = engine_index
identity = self.engine_index.to_bytes(length=2, byteorder="little") identity = self.engine_index.to_bytes(length=2, byteorder="little")
self.engines_running = False self.engines_running = False
self.shutdown_state = EngineShutdownState.RUNNING
with self._perform_handshakes( with self._perform_handshakes(
handshake_address, handshake_address,
...@@ -1037,11 +1028,25 @@ class EngineCoreProc(EngineCore): ...@@ -1037,11 +1028,25 @@ class EngineCoreProc(EngineCore):
def run_engine_core(*args, dp_rank: int = 0, local_dp_rank: int = 0, **kwargs): def run_engine_core(*args, dp_rank: int = 0, local_dp_rank: int = 0, **kwargs):
"""Launch EngineCore busy loop in background process.""" """Launch EngineCore busy loop in background process."""
# Signal handler used for graceful termination.
# SystemExit exception is only raised once to allow this and worker
# processes to terminate without error
shutdown_requested = False
# Ensure we can serialize transformer config after spawning # Ensure we can serialize transformer config after spawning
maybe_register_config_serialize_by_value() maybe_register_config_serialize_by_value()
def signal_handler(signum, frame):
nonlocal shutdown_requested
if not shutdown_requested:
shutdown_requested = True
raise SystemExit()
# Either SIGTERM or SIGINT will terminate the engine_core
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
engine_core: EngineCoreProc | None = None engine_core: EngineCoreProc | None = None
signal_callback: SignalCallback | None = None
try: try:
vllm_config: VllmConfig = kwargs["vllm_config"] vllm_config: VllmConfig = kwargs["vllm_config"]
parallel_config: ParallelConfig = vllm_config.parallel_config parallel_config: ParallelConfig = vllm_config.parallel_config
...@@ -1089,22 +1094,6 @@ class EngineCoreProc(EngineCore): ...@@ -1089,22 +1094,6 @@ class EngineCoreProc(EngineCore):
engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs) engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
assert engine_core is not None assert engine_core is not None
def wakeup_engine():
# Wakes up idle engine via input_queue when shutdown is requested
# Not safe in a signal handler - we may interrupt the main thread
# while it is holding the non-reentrant input_queue.mutex
engine_core.input_queue.put_nowait((EngineCoreRequestType.WAKEUP, None))
signal_callback = SignalCallback(wakeup_engine)
def signal_handler(signum, frame):
engine_core.shutdown_state = EngineShutdownState.REQUESTED
signal_callback.trigger()
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
engine_core.run_busy_loop() engine_core.run_busy_loop()
except SystemExit: except SystemExit:
...@@ -1118,10 +1107,6 @@ class EngineCoreProc(EngineCore): ...@@ -1118,10 +1107,6 @@ class EngineCoreProc(EngineCore):
engine_core._send_engine_dead() engine_core._send_engine_dead()
raise e raise e
finally: finally:
signal.signal(signal.SIGTERM, signal.SIG_DFL)
signal.signal(signal.SIGINT, signal.SIG_DFL)
if signal_callback is not None:
signal_callback.stop()
if engine_core is not None: if engine_core is not None:
engine_core.shutdown() engine_core.shutdown()
...@@ -1136,25 +1121,21 @@ class EngineCoreProc(EngineCore): ...@@ -1136,25 +1121,21 @@ class EngineCoreProc(EngineCore):
or bool(self.batch_queue) or bool(self.batch_queue)
) )
def is_running(self) -> bool:
"""Returns true if shutdown has not been requested."""
return self.shutdown_state == EngineShutdownState.RUNNING
def run_busy_loop(self): def run_busy_loop(self):
"""Core busy loop of the EngineCore.""" """Core busy loop of the EngineCore."""
while self._handle_shutdown():
# Loop until process is sent a SIGINT or SIGTERM
while True:
# 1) Poll the input queue until there is work to do. # 1) Poll the input queue until there is work to do.
self._process_input_queue() self._process_input_queue()
# 2) Step the engine core and return the outputs. # 2) Step the engine core and return the outputs.
self._process_engine_step() self._process_engine_step()
raise SystemExit
def _process_input_queue(self): def _process_input_queue(self):
"""Exits when an engine step needs to be performed.""" """Exits when an engine step needs to be performed."""
waited = False waited = False
while not self.has_work() and self.is_running(): while not self.has_work():
# Notify callbacks waiting for engine to become idle. # Notify callbacks waiting for engine to become idle.
self._notify_idle_state_callbacks() self._notify_idle_state_callbacks()
if self.input_queue.empty(): if self.input_queue.empty():
...@@ -1206,60 +1187,18 @@ class EngineCoreProc(EngineCore): ...@@ -1206,60 +1187,18 @@ class EngineCoreProc(EngineCore):
callback = self._idle_state_callbacks.pop() callback = self._idle_state_callbacks.pop()
callback(self) callback(self)
def _handle_shutdown(self) -> bool:
# Check if shutdown was requested and handle it
if self.shutdown_state == EngineShutdownState.RUNNING:
return True
if self.shutdown_state == EngineShutdownState.REQUESTED:
shutdown_timeout = self.vllm_config.shutdown_timeout
logger.info("Shutdown initiated (timeout=%d)", shutdown_timeout)
if shutdown_timeout == 0:
num_requests = self.scheduler.get_num_unfinished_requests()
if num_requests > 0:
logger.info("Aborting %d requests", num_requests)
aborted_reqs = self.scheduler.finish_requests(
None, RequestStatus.FINISHED_ABORTED
)
self._send_abort_outputs(aborted_reqs)
else:
num_requests = self.scheduler.get_num_unfinished_requests()
if num_requests > 0:
logger.info(
"Draining %d in-flight requests (timeout=%ds)",
num_requests,
shutdown_timeout,
)
self.shutdown_state = EngineShutdownState.SHUTTING_DOWN
# Exit when no work remaining
if not self.has_work():
logger.info("Shutdown complete")
return False
return True
def _handle_client_request( def _handle_client_request(
self, request_type: EngineCoreRequestType, request: Any self, request_type: EngineCoreRequestType, request: Any
) -> None: ) -> None:
"""Dispatch request from client.""" """Dispatch request from client."""
if request_type == EngineCoreRequestType.WAKEUP: if request_type == EngineCoreRequestType.ADD:
return
elif request_type == EngineCoreRequestType.ADD:
req, request_wave = request req, request_wave = request
if self._reject_add_in_shutdown(req):
return
self.add_request(req, request_wave) self.add_request(req, request_wave)
elif request_type == EngineCoreRequestType.ABORT: elif request_type == EngineCoreRequestType.ABORT:
self.abort_requests(request) self.abort_requests(request)
elif request_type == EngineCoreRequestType.UTILITY: elif request_type == EngineCoreRequestType.UTILITY:
client_idx, call_id, method_name, args = request client_idx, call_id, method_name, args = request
if self._reject_utility_in_shutdown(client_idx, call_id, method_name):
return
output = UtilityOutput(call_id) output = UtilityOutput(call_id)
# Lazily look-up utility method so that failure will be handled/returned. # Lazily look-up utility method so that failure will be handled/returned.
get_result = lambda: (method := getattr(self, method_name)) and method( get_result = lambda: (method := getattr(self, method_name)) and method(
...@@ -1276,27 +1215,6 @@ class EngineCoreProc(EngineCore): ...@@ -1276,27 +1215,6 @@ class EngineCoreProc(EngineCore):
"Unrecognized input request type encountered: %s", request_type "Unrecognized input request type encountered: %s", request_type
) )
def _reject_add_in_shutdown(self, request: Request) -> bool:
if self.shutdown_state == EngineShutdownState.RUNNING:
return False
logger.info("Rejecting request %s (server shutting down)", request.request_id)
self._send_abort_outputs_to_client([request.request_id], request.client_index)
return True
def _reject_utility_in_shutdown(
self, client_idx: int, call_id: int, method_name: str
) -> bool:
if self.shutdown_state == EngineShutdownState.RUNNING:
return False
logger.warning("Rejecting utility call %s (server shutting down)", method_name)
output = UtilityOutput(call_id, failure_message="Server shutting down")
self.output_queue.put_nowait(
(client_idx, EngineCoreOutputs(utility_output=output))
)
return True
@staticmethod @staticmethod
def _invoke_utility_method( def _invoke_utility_method(
name: str, get_result: Callable, output: UtilityOutput, enqueue_output: Callable name: str, get_result: Callable, output: UtilityOutput, enqueue_output: Callable
...@@ -1510,7 +1428,22 @@ class EngineCoreProc(EngineCore): ...@@ -1510,7 +1428,22 @@ class EngineCoreProc(EngineCore):
logger.exception( logger.exception(
"Unexpected error pre-processing request %s", request.request_id "Unexpected error pre-processing request %s", request.request_id
) )
self._send_error_outputs_to_client([request.request_id], request.client_index) self.output_queue.put_nowait(
(
request.client_index,
EngineCoreOutputs(
engine_index=self.engine_index,
finished_requests={request.request_id},
outputs=[
EngineCoreOutput(
request_id=request.request_id,
new_token_ids=[],
finish_reason=FinishReason.ERROR,
)
],
),
)
)
def pause_scheduler( def pause_scheduler(
self, mode: PauseMode = "abort", clear_cache: bool = True self, mode: PauseMode = "abort", clear_cache: bool = True
...@@ -1553,26 +1486,6 @@ class EngineCoreProc(EngineCore): ...@@ -1553,26 +1486,6 @@ class EngineCoreProc(EngineCore):
self._idle_state_callbacks.append(partial(engine_idle_callback, future=future)) self._idle_state_callbacks.append(partial(engine_idle_callback, future=future))
return future return future
def _send_finish_outputs_to_client(
self, req_ids: list[str], client_index: int, finish_reason: FinishReason
) -> None:
outputs = [
EngineCoreOutput(req_id, [], finish_reason=finish_reason)
for req_id in req_ids
]
eco = EngineCoreOutputs(finished_requests=req_ids, outputs=outputs)
self.output_queue.put_nowait((client_index, eco))
def _send_abort_outputs_to_client(
self, req_ids: list[str], client_index: int
) -> None:
self._send_finish_outputs_to_client(req_ids, client_index, FinishReason.ABORT)
def _send_error_outputs_to_client(
self, req_ids: list[str], client_index: int
) -> None:
self._send_finish_outputs_to_client(req_ids, client_index, FinishReason.ERROR)
def _send_abort_outputs(self, aborted_reqs: list[tuple[str, int]]) -> None: def _send_abort_outputs(self, aborted_reqs: list[tuple[str, int]]) -> None:
# TODO(nick) this will be moved inside the scheduler # TODO(nick) this will be moved inside the scheduler
if aborted_reqs: if aborted_reqs:
...@@ -1581,7 +1494,12 @@ class EngineCoreProc(EngineCore): ...@@ -1581,7 +1494,12 @@ class EngineCoreProc(EngineCore):
for req_id, client_index in aborted_reqs: for req_id, client_index in aborted_reqs:
by_client[client_index].add(req_id) by_client[client_index].add(req_id)
for client_index, req_ids in by_client.items(): for client_index, req_ids in by_client.items():
self._send_abort_outputs_to_client(list(req_ids), client_index) outputs = [
EngineCoreOutput(req_id, [], finish_reason=FinishReason.ABORT)
for req_id in req_ids
]
eco = EngineCoreOutputs(finished_requests=req_ids, outputs=outputs)
self.output_queue.put_nowait((client_index, eco))
class DPEngineCoreProc(EngineCoreProc): class DPEngineCoreProc(EngineCoreProc):
...@@ -1699,7 +1617,7 @@ class DPEngineCoreProc(EngineCoreProc): ...@@ -1699,7 +1617,7 @@ class DPEngineCoreProc(EngineCoreProc):
"""Core busy loop of the EngineCore for data parallel case.""" """Core busy loop of the EngineCore for data parallel case."""
# Loop until process is sent a SIGINT or SIGTERM # Loop until process is sent a SIGINT or SIGTERM
while self._handle_shutdown(): while True:
# 1) Poll the input queue until there is work to do. # 1) Poll the input queue until there is work to do.
self._process_input_queue() self._process_input_queue()
...@@ -1747,8 +1665,6 @@ class DPEngineCoreProc(EngineCoreProc): ...@@ -1747,8 +1665,6 @@ class DPEngineCoreProc(EngineCoreProc):
self.current_wave += 1 self.current_wave += 1
self.step_counter = 0 self.step_counter = 0
raise SystemExit
def _has_global_unfinished_reqs(self, local_unfinished: bool) -> bool: def _has_global_unfinished_reqs(self, local_unfinished: bool) -> bool:
# Optimization - only perform finish-sync all-reduce every 32 steps. # Optimization - only perform finish-sync all-reduce every 32 steps.
self.step_counter += 1 self.step_counter += 1
......
...@@ -128,7 +128,7 @@ class EngineCoreClient(ABC): ...@@ -128,7 +128,7 @@ class EngineCoreClient(ABC):
return AsyncMPClient(*client_args) return AsyncMPClient(*client_args)
@abstractmethod @abstractmethod
def shutdown(self, timeout: float | None = None) -> None: ... def shutdown(self): ...
def get_output(self) -> EngineCoreOutputs: def get_output(self) -> EngineCoreOutputs:
raise NotImplementedError raise NotImplementedError
...@@ -298,7 +298,7 @@ class InprocClient(EngineCoreClient): ...@@ -298,7 +298,7 @@ class InprocClient(EngineCoreClient):
if len(request_ids) > 0: if len(request_ids) > 0:
self.engine_core.abort_requests(request_ids) self.engine_core.abort_requests(request_ids)
def shutdown(self, timeout: float | None = None) -> None: def shutdown(self) -> None:
self.engine_core.shutdown() self.engine_core.shutdown()
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None: def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
...@@ -390,9 +390,9 @@ class BackgroundResources: ...@@ -390,9 +390,9 @@ class BackgroundResources:
self.engine_dead = True self.engine_dead = True
if self.engine_manager is not None: if self.engine_manager is not None:
self.engine_manager.shutdown() self.engine_manager.close()
if self.coordinator is not None: if self.coordinator is not None:
self.coordinator.shutdown() self.coordinator.close()
if isinstance(self.output_socket, zmq.asyncio.Socket): if isinstance(self.output_socket, zmq.asyncio.Socket):
# Async case. # Async case.
...@@ -568,7 +568,10 @@ class MPClient(EngineCoreClient): ...@@ -568,7 +568,10 @@ class MPClient(EngineCoreClient):
) )
with launch_core_engines( with launch_core_engines(
vllm_config, executor_class, log_stats, addresses vllm_config,
executor_class,
log_stats,
addresses,
) as (engine_manager, coordinator, addresses): ) as (engine_manager, coordinator, addresses):
self.resources.coordinator = coordinator self.resources.coordinator = coordinator
self.resources.engine_manager = engine_manager self.resources.engine_manager = engine_manager
...@@ -634,12 +637,9 @@ class MPClient(EngineCoreClient): ...@@ -634,12 +637,9 @@ class MPClient(EngineCoreClient):
if not success: if not success:
self._finalizer() self._finalizer()
def shutdown(self, timeout: float | None = None) -> None: def shutdown(self):
"""Shutdown engine manager under timeout and clean up resources.""" # Terminate background resources.
if self._finalizer.detach() is not None: self._finalizer()
if self.resources.engine_manager is not None:
self.resources.engine_manager.shutdown(timeout=timeout)
self.resources()
def _format_exception(self, e: Exception) -> Exception: def _format_exception(self, e: Exception) -> Exception:
"""If errored, use EngineDeadError so root cause is clear.""" """If errored, use EngineDeadError so root cause is clear."""
...@@ -683,7 +683,7 @@ class MPClient(EngineCoreClient): ...@@ -683,7 +683,7 @@ class MPClient(EngineCoreClient):
sentinels = [proc.sentinel for proc in engine_processes] sentinels = [proc.sentinel for proc in engine_processes]
died = multiprocessing.connection.wait(sentinels) died = multiprocessing.connection.wait(sentinels)
_self = self_ref() _self = self_ref()
if not _self or not _self._finalizer.alive or _self.resources.engine_dead: if not _self or _self.resources.engine_dead:
return return
_self.resources.engine_dead = True _self.resources.engine_dead = True
proc_name = next( proc_name = next(
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
import contextlib import contextlib
import os import os
import threading
import weakref import weakref
from collections.abc import Callable, Iterator from collections.abc import Callable, Iterator
from dataclasses import dataclass from dataclasses import dataclass
...@@ -152,12 +151,11 @@ class CoreEngineProcManager: ...@@ -152,12 +151,11 @@ class CoreEngineProcManager:
finally: finally:
# Kill other procs if not all are running. # Kill other procs if not all are running.
if self.finished_procs(): if self.finished_procs():
self.shutdown() self.close()
def shutdown(self, timeout: float | None = None) -> None: def close(self):
"""Shutdown engine core processes with configurable timeout.""" """Shutdown all procs."""
if self._finalizer.detach() is not None: self._finalizer()
shutdown(self.processes, timeout=timeout)
def join_first(self): def join_first(self):
"""Wait for any process to exit.""" """Wait for any process to exit."""
...@@ -175,33 +173,6 @@ class CoreEngineProcManager: ...@@ -175,33 +173,6 @@ class CoreEngineProcManager:
} }
class SignalCallback:
"""Safely trigger a callback from signal handler context via a dedicated thread."""
def __init__(self, callback: Callable[[], None]):
self._callback = callback
self._event = threading.Event()
self._stopped = False
self._thread = threading.Thread(
target=self._run,
daemon=True,
name="signal-callback",
)
self._thread.start()
def _run(self):
self._event.wait()
if not self._stopped:
self._callback()
def trigger(self):
self._event.set()
def stop(self):
self._stopped = True
self._event.set()
@contextlib.contextmanager @contextlib.contextmanager
def set_device_control_env_var( def set_device_control_env_var(
vllm_config: VllmConfig, local_dp_rank: int vllm_config: VllmConfig, local_dp_rank: int
...@@ -797,7 +768,7 @@ class CoreEngineActorManager: ...@@ -797,7 +768,7 @@ class CoreEngineActorManager:
def get_run_refs(self): def get_run_refs(self):
return self.run_refs return self.run_refs
def shutdown(self, timeout: float | None = None) -> None: def close(self):
import ray import ray
for actor in self.local_engine_actors + self.remote_engine_actors: for actor in self.local_engine_actors + self.remote_engine_actors:
......
...@@ -220,10 +220,8 @@ class APIServerProcessManager: ...@@ -220,10 +220,8 @@ class APIServerProcessManager:
# The extra processes are managed by their owners # The extra processes are managed by their owners
self._finalizer = weakref.finalize(self, shutdown, self.processes) self._finalizer = weakref.finalize(self, shutdown, self.processes)
def shutdown(self, timeout: float | None = None) -> None: def close(self) -> None:
"""Shutdown API server processes with configurable timeout""" self._finalizer()
if self._finalizer.detach() is not None:
shutdown(self.processes, timeout=timeout)
def wait_for_completion_or_failure( def wait_for_completion_or_failure(
...@@ -290,30 +288,25 @@ def wait_for_completion_or_failure( ...@@ -290,30 +288,25 @@ def wait_for_completion_or_failure(
except Exception as e: except Exception as e:
logger.exception("Exception occurred while running API servers: %s", str(e)) logger.exception("Exception occurred while running API servers: %s", str(e))
raise raise
finally:
logger.info("Terminating remaining processes ...")
api_server_manager.close()
if coordinator:
coordinator.close()
if engine_manager:
engine_manager.close()
# Note(rob): shutdown function cannot be a bound method, # Note(rob): shutdown function cannot be a bound method,
# else the gc cannot collect the object. # else the gc cannot collect the object.
def shutdown(procs: list[BaseProcess], timeout: float | None = None) -> None: def shutdown(procs: list[BaseProcess]):
"""Shutdown processes with timeout.
Args:
procs: List of processes to shutdown
timeout: Maximum time in seconds to wait for graceful shutdown
"""
if timeout is None:
timeout = 0.0
# Allow at least 5 seconds for remaining procs to terminate.
timeout = max(timeout, 5.0)
# Shutdown the process. # Shutdown the process.
for proc in procs: for proc in procs:
if proc.is_alive(): if proc.is_alive():
proc.terminate() proc.terminate()
# Allow time for remaining procs to terminate. # Allow 5 seconds for remaining procs to terminate.
deadline = time.monotonic() + timeout deadline = time.monotonic() + 5
for proc in procs: for proc in procs:
remaining = deadline - time.monotonic() remaining = deadline - time.monotonic()
if remaining <= 0: if remaining <= 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment