test_launch_render.py 4.56 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
"""E2E tests for render endpoints via `vllm launch` (GPU-less serving)."""
4
5
6
7
8

import httpx
import pytest
import pytest_asyncio

9
from tests.utils import RemoteLaunchRenderServer
10
11
12
13
14
15
16

MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"


@pytest.fixture(scope="module")
def server():
    args: list[str] = []
17
18
    with RemoteLaunchRenderServer(MODEL_NAME, args, max_wait_seconds=120) as srv:
        yield srv
19
20
21
22
23
24
25
26
27
28


@pytest_asyncio.fixture
async def client(server):
    async with httpx.AsyncClient(
        base_url=server.url_for(""), timeout=30.0
    ) as http_client:
        yield http_client


29
30
31
# -- Chat Completion Render --


32
@pytest.mark.asyncio
33
async def test_chat_render_basic(client):
34
    response = await client.post(
35
        "/v1/chat/completions/render",
36
37
        json={
            "model": MODEL_NAME,
38
            "messages": [{"role": "user", "content": "Hello, how are you?"}],
39
40
41
42
43
44
        },
    )

    assert response.status_code == 200
    data = response.json()

45
46
47
48
49
50
    # Response should be a GenerateRequest dict
    assert isinstance(data, dict)
    assert "token_ids" in data
    assert isinstance(data["token_ids"], list)
    assert len(data["token_ids"]) > 0
    assert all(isinstance(t, int) for t in data["token_ids"])
51
52
53


@pytest.mark.asyncio
54
async def test_chat_render_multi_turn(client):
55
56
57
58
59
    response = await client.post(
        "/v1/chat/completions/render",
        json={
            "model": MODEL_NAME,
            "messages": [
60
61
62
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there!"},
                {"role": "user", "content": "How are you?"},
63
64
65
66
67
            ],
        },
    )

    assert response.status_code == 200
68
    data = response.json()
69

70
71
72
73
    assert isinstance(data, dict)
    assert "token_ids" in data
    assert isinstance(data["token_ids"], list)
    assert len(data["token_ids"]) > 0
74
75


76
# -- Completion Render --
77
78
79


@pytest.mark.asyncio
80
async def test_completion_render_basic(client):
81
    response = await client.post(
82
        "/v1/completions/render",
83
84
        json={
            "model": MODEL_NAME,
85
            "prompt": "Once upon a time",
86
87
88
89
90
91
        },
    )

    assert response.status_code == 200
    data = response.json()

92
93
    assert isinstance(data, list)
    assert len(data) > 0
94

95
    first_prompt = data[0]
96
97
98
99
100
101
102
    assert "token_ids" in first_prompt
    assert "sampling_params" in first_prompt
    assert "model" in first_prompt
    assert "request_id" in first_prompt
    assert isinstance(first_prompt["token_ids"], list)
    assert len(first_prompt["token_ids"]) > 0
    assert first_prompt["request_id"].startswith("cmpl-")
103
104
105


@pytest.mark.asyncio
106
async def test_completion_render_multiple_prompts(client):
107
108
109
    response = await client.post(
        "/v1/completions/render",
        json={
110
111
            "model": MODEL_NAME,
            "prompt": ["Hello world", "Goodbye world"],
112
113
114
        },
    )

115
    assert response.status_code == 200
116
    data = response.json()
117
118
119
120
121

    assert isinstance(data, list)
    assert len(data) == 2

    for prompt in data:
122
123
124
125
126
127
        assert "token_ids" in prompt
        assert "sampling_params" in prompt
        assert "model" in prompt
        assert "request_id" in prompt
        assert len(prompt["token_ids"]) > 0
        assert prompt["request_id"].startswith("cmpl-")
128
129
130


@pytest.mark.asyncio
131
async def test_completion_render_invalid_model(client):
132
    response = await client.post(
133
        "/v1/completions/render",
134
        json={
135
136
            "model": "nonexistent-model",
            "prompt": "Hello",
137
138
139
140
        },
    )

    assert response.status_code == 404
141
    assert "error" in response.json()
142
143
144


@pytest.mark.asyncio
145
146
async def test_render_is_fast(client):
    """Render should complete quickly since there is no inference."""
147
148
149
150
151
152
153
154
155
156
157
158
159
    import time

    start = time.perf_counter()
    response = await client.post(
        "/v1/completions/render",
        json={
            "model": MODEL_NAME,
            "prompt": "Tell me a very long story about " * 10,
        },
    )
    elapsed = time.perf_counter() - start

    assert response.status_code == 200
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    assert elapsed < 2.0


# -- Health & Models --


@pytest.mark.asyncio
async def test_health_endpoint(client):
    response = await client.get("/health")
    assert response.status_code == 200


@pytest.mark.asyncio
async def test_models_endpoint(client):
    response = await client.get("/v1/models")
    assert response.status_code == 200
    data = response.json()
    assert "data" in data
    model_ids = [m["id"] for m in data["data"]]
    assert MODEL_NAME in model_ids