test_basic.py 1.29 KB
Newer Older
1
2
3
4
from http import HTTPStatus

import openai
import pytest
5
import pytest_asyncio
6
7
8
9
import requests

from vllm.version import __version__ as VLLM_VERSION

10
from ...utils import RemoteOpenAIServer, models_path_prefix
11

12
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


@pytest.fixture(scope="module")
def server():
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "bfloat16",
        "--max-model-len",
        "8192",
        "--enforce-eager",
        "--max-num-seqs",
        "128",
    ]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


32
33
34
35
@pytest_asyncio.fixture
async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54


@pytest.mark.asyncio
async def test_show_version(client: openai.AsyncOpenAI):
    base_url = str(client.base_url)[:-3].strip("/")

    response = requests.get(base_url + "/version")
    response.raise_for_status()

    assert response.json() == {"version": VLLM_VERSION}


@pytest.mark.asyncio
async def test_check_health(client: openai.AsyncOpenAI):
    base_url = str(client.base_url)[:-3].strip("/")

    response = requests.get(base_url + "/health")

    assert response.status_code == HTTPStatus.OK