conftest.py 1.11 KB
Newer Older
1
2
3
4
5
6
7
8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio

from tests.utils import RemoteOpenAIServer

# Use a small reasoning model to test the responses API.
9
MODEL_NAME = "Qwen/Qwen3-1.7B"
10
11
12
13
14
15
16
17


@pytest.fixture(scope="module")
def default_server_args():
    return [
        "--max-model-len",
        "8192",
        "--enforce-eager",  # For faster startup.
18
19
20
21
22
        "--enable-auto-tool-choice",
        "--structured-outputs-config.backend",
        "xgrammar",
        "--tool-call-parser",
        "hermes",
23
        "--reasoning-parser",
24
        "qwen3",
25
26
27
28
    ]


@pytest.fixture(scope="module")
29
30
def server_with_store(default_server_args):
    with RemoteOpenAIServer(
31
32
        MODEL_NAME,
        default_server_args,
33
34
35
36
        env_dict={
            "VLLM_ENABLE_RESPONSES_API_STORE": "1",
            "VLLM_SERVER_DEV_MODE": "1",
        },
37
    ) as remote_server:
38
39
40
41
        yield remote_server


@pytest_asyncio.fixture
42
43
async def client(server_with_store):
    async with server_with_store.get_async_client() as async_client:
44
        yield async_client