test_shutdown.py 1.25 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
6
7
8
import openai
import pytest

from ...utils import RemoteOpenAIServer

9
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
10
11
12


@pytest.mark.asyncio
13
async def test_shutdown_on_engine_failure():
14
15
16
17
18
19
20
21
22
23
24
25
    # dtype, max-len etc set so that this can run in CI
    args = [
        "--dtype",
        "bfloat16",
        "--max-model-len",
        "8192",
        "--enforce-eager",
        "--max-num-seqs",
        "128",
    ]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
26
        async with remote_server.get_async_client() as client:
27

28
29
            with pytest.raises(
                (openai.APIConnectionError, openai.InternalServerError)):
30
31
32
33
34
35
36
                # Asking for lots of prompt logprobs will currently crash the
                # engine. This may change in the future when that bug is fixed
                prompt = "Hello " * 4000
                await client.completions.create(
                    model=MODEL_NAME,
                    prompt=prompt,
                    extra_body={"prompt_logprobs": 10})
37

38
            # Now the server should shut down
39
            return_code = remote_server.proc.wait(timeout=8)
40
            assert return_code is not None