test_shutdown.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import signal
import subprocess
import sys
import time

import openai
import pytest

from vllm.utils.network_utils import get_open_port

MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"


@pytest.mark.asyncio
async def test_shutdown_on_engine_failure():
    """Verify that API returns connection error when server process is killed.

    Starts a vLLM server, kills it to simulate a crash, then verifies that
    subsequent API calls fail appropriately.
    """

    port = get_open_port()

    proc = subprocess.Popen(
        [
            # dtype, max-len etc set so that this can run in CI
            sys.executable,
            "-m",
            "vllm.entrypoints.openai.api_server",
            "--model",
            MODEL_NAME,
            "--dtype",
            "bfloat16",
            "--max-model-len",
            "128",
            "--enforce-eager",
            "--port",
            str(port),
            "--gpu-memory-utilization",
            "0.05",
            "--max-num-seqs",
            "2",
            "--disable-frontend-multiprocessing",
        ],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        preexec_fn=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN),
    )

    # Wait for server startup
    start_time = time.time()
    client = openai.AsyncOpenAI(
        base_url=f"http://localhost:{port}/v1",
        api_key="dummy",
        max_retries=0,
        timeout=10,
    )

    # Poll until server is ready
    while time.time() - start_time < 30:
        try:
            await client.completions.create(
                model=MODEL_NAME, prompt="Hello", max_tokens=1
            )
            break
        except Exception:
            time.sleep(0.5)
            if proc.poll() is not None:
                stdout, stderr = proc.communicate(timeout=1)
                pytest.fail(
                    f"Server died during startup. stdout: {stdout}, stderr: {stderr}"
                )
    else:
        proc.terminate()
        proc.wait(timeout=5)
        pytest.fail("Server failed to start in 30 seconds")

    # Kill server to simulate crash
    proc.terminate()
    time.sleep(1)

    # Verify API calls now fail
    with pytest.raises((openai.APIConnectionError, openai.APIStatusError)):
        await client.completions.create(
            model=MODEL_NAME, prompt="This should fail", max_tokens=1
        )

    return_code = proc.wait(timeout=5)
    assert return_code is not None