conftest.py 928 Bytes
Newer Older
1
2
3
4
5
6
7
8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio

from tests.utils import RemoteOpenAIServer

# Use a small reasoning model to test the responses API.
9
MODEL_NAME = "Qwen/Qwen3-1.7B"
10
11
12
13
14
15
16
17
18
19
20
21
22
23


@pytest.fixture(scope="module")
def default_server_args():
    return [
        "--max-model-len",
        "8192",
        "--enforce-eager",  # For faster startup.
        "--reasoning-parser",
        "deepseek_r1",
    ]


@pytest.fixture(scope="module")
24
25
def server_with_store(default_server_args):
    with RemoteOpenAIServer(
26
27
28
        MODEL_NAME,
        default_server_args,
        env_dict={"VLLM_ENABLE_RESPONSES_API_STORE": "1"},
29
    ) as remote_server:
30
31
32
33
        yield remote_server


@pytest_asyncio.fixture
34
35
async def client(server_with_store):
    async with server_with_store.get_async_client() as async_client:
36
        yield async_client