conftest.py 1.05 KB
Newer Older
1
2
3
4
5
6
7
8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio

from tests.utils import RemoteOpenAIServer

# Use a small reasoning model to test the responses API.
9
MODEL_NAME = "Qwen/Qwen3-1.7B"
10
11
12
13
14
15
16
17


@pytest.fixture(scope="module")
def default_server_args():
    return [
        "--max-model-len",
        "8192",
        "--enforce-eager",  # For faster startup.
18
19
20
21
22
        "--enable-auto-tool-choice",
        "--structured-outputs-config.backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
23
        "--reasoning-parser",
24
        "qwen3",
25
26
27
28
    ]


@pytest.fixture(scope="module")
29
30
def server_with_store(default_server_args):
    with RemoteOpenAIServer(
31
32
33
        MODEL_NAME,
        default_server_args,
        env_dict={"VLLM_ENABLE_RESPONSES_API_STORE": "1"},
34
    ) as remote_server:
35
36
37
38
        yield remote_server


@pytest_asyncio.fixture
39
40
async def client(server_with_store):
    async with server_with_store.get_async_client() as async_client:
41
        yield async_client