test_online.py 10.7 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5

import pytest
import requests
6
7
import torch
import torch.nn.functional as F
8

9
from tests.utils import RemoteOpenAIServer
10
11
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
12
13
14

MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
DTYPE = "float32"  # Use float32 to avoid NaN issue
15
16
input_text = "This product was excellent and exceeded my expectations"
input_tokens = [1986, 1985, 572, 9073, 323, 33808, 847, 16665]
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33


@pytest.fixture(scope="module")
def server():
    args = [
        "--enforce-eager",
        "--max-model-len",
        "512",
        "--dtype",
        DTYPE,
    ]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


@pytest.mark.parametrize("model_name", [MODEL_NAME])
34
35
36
37
38
def test_basic(server: RemoteOpenAIServer, model_name: str):
    # test /v1/models
    response = requests.get(server.url_for("/v1/models"))
    served_model = response.json()["data"][0]["id"]
    assert served_model == MODEL_NAME
39

40
41
42
43
44
45
46
47
48
49
50
    # test /tokenize
    response = requests.post(
        server.url_for("/tokenize"),
        json={"model": model_name, "prompt": input_text},
    )
    assert response.json()["tokens"] == input_tokens


@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_completion_request(server: RemoteOpenAIServer, model_name: str):
    # test input: str
51
52
    classification_response = requests.post(
        server.url_for("classify"),
53
        json={"model": model_name, "input": input_text},
54
55
56
    )

    classification_response.raise_for_status()
57
    output = ClassificationResponse.model_validate(classification_response.json())
58
59
60
61
62
63
64

    assert output.object == "list"
    assert output.model == MODEL_NAME
    assert len(output.data) == 1
    assert hasattr(output.data[0], "label")
    assert hasattr(output.data[0], "probs")

65
66
    # test input: list[int]
    classification_response = requests.post(
67
        server.url_for("classify"),
68
        json={"model": model_name, "input": input_tokens},
69
    )
70
71
72
73
74
75
76
77
78

    classification_response.raise_for_status()
    output = ClassificationResponse.model_validate(classification_response.json())

    assert output.object == "list"
    assert output.model == MODEL_NAME
    assert len(output.data) == 1
    assert hasattr(output.data[0], "label")
    assert hasattr(output.data[0], "probs")
79
80


81
@pytest.mark.parametrize("model_name", [MODEL_NAME])
82
83
def test_completion_request_batched(server: RemoteOpenAIServer, model_name: str):
    N = 10
84

85
    # test input: list[str]
86
87
    classification_response = requests.post(
        server.url_for("classify"),
88
        json={"model": model_name, "input": [input_text] * N},
89
    )
90
    output = ClassificationResponse.model_validate(classification_response.json())
91

92
    assert len(output.data) == N
93
94
95
96
97
98
99
    for i, item in enumerate(output.data):
        assert item.index == i
        assert hasattr(item, "label")
        assert hasattr(item, "probs")
        assert len(item.probs) == item.num_classes
        assert item.label in ["Default", "Spoiled"]

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
    # test input: list[list[int]]
    classification_response = requests.post(
        server.url_for("classify"),
        json={"model": model_name, "input": [input_tokens] * N},
    )
    output = ClassificationResponse.model_validate(classification_response.json())

    assert len(output.data) == N
    for i, item in enumerate(output.data):
        assert item.index == i
        assert hasattr(item, "label")
        assert hasattr(item, "probs")
        assert len(item.probs) == item.num_classes
        assert item.label in ["Default", "Spoiled"]


@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_empty_input_error(server: RemoteOpenAIServer, model_name: str):
    classification_response = requests.post(
        server.url_for("classify"),
        json={"model": model_name, "input": ""},
    )

    error = classification_response.json()
    assert classification_response.status_code == 400
    assert "error" in error

    classification_response = requests.post(
        server.url_for("classify"),
        json={"model": model_name, "input": []},
    )
    classification_response.raise_for_status()
    output = ClassificationResponse.model_validate(classification_response.json())

    assert output.object == "list"
    assert isinstance(output.data, list)
    assert len(output.data) == 0

138
139
140
141
142
143
144

@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_truncate_prompt_tokens(server: RemoteOpenAIServer, model_name: str):
    long_text = "hello " * 600

    classification_response = requests.post(
        server.url_for("classify"),
145
        json={"model": model_name, "input": long_text, "truncate_prompt_tokens": 5},
146
147
148
    )

    classification_response.raise_for_status()
149
    output = ClassificationResponse.model_validate(classification_response.json())
150
151
152
153
154
155
156

    assert len(output.data) == 1
    assert output.data[0].index == 0
    assert hasattr(output.data[0], "probs")
    assert output.usage.prompt_tokens == 5
    assert output.usage.total_tokens == 5

157
    # invalid_truncate_prompt_tokens
158
159
    classification_response = requests.post(
        server.url_for("classify"),
160
        json={"model": model_name, "input": "test", "truncate_prompt_tokens": 513},
161
162
163
164
    )

    error = classification_response.json()
    assert classification_response.status_code == 400
165
    assert "truncate_prompt_tokens" in error["error"]["message"]
166
167
168


@pytest.mark.parametrize("model_name", [MODEL_NAME])
169
170
171
def test_add_special_tokens(server: RemoteOpenAIServer, model_name: str):
    # FIXME: The add_special_tokens parameter doesn't seem to be working.
    response = requests.post(
172
        server.url_for("classify"),
173
        json={"model": model_name, "input": input_text, "add_special_tokens": False},
174
    )
175
176
    response.raise_for_status()
    ClassificationResponse.model_validate(response.json())
177

178
    response = requests.post(
179
        server.url_for("classify"),
180
        json={"model": model_name, "input": input_text, "add_special_tokens": True},
181
    )
182
183
    response.raise_for_status()
    ClassificationResponse.model_validate(response.json())
184
185
186
187
188
189


@pytest.mark.asyncio
async def test_invocations(server: RemoteOpenAIServer):
    request_args = {
        "model": MODEL_NAME,
190
        "input": input_text,
191
192
    }

193
194
195
    classification_response = requests.post(
        server.url_for("classify"), json=request_args
    )
196
197
    classification_response.raise_for_status()

198
199
200
    invocation_response = requests.post(
        server.url_for("invocations"), json=request_args
    )
201
202
203
204
205
206
    invocation_response.raise_for_status()

    classification_output = classification_response.json()
    invocation_output = invocation_response.json()

    assert classification_output.keys() == invocation_output.keys()
207
    for classification_data, invocation_data in zip(
208
209
        classification_output["data"], invocation_output["data"]
    ):
210
211
        assert classification_data.keys() == invocation_data.keys()
        assert classification_data["probs"] == pytest.approx(
212
213
            invocation_data["probs"], rel=0.01
        )
214
215
216
217


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
218
219
async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
    async def get_outputs(use_activation):
220
221
        response = requests.post(
            server.url_for("classify"),
222
223
224
225
226
            json={
                "model": model_name,
                "input": input_text,
                "use_activation": use_activation,
            },
227
        )
228
        outputs = response.json()
229
        return torch.tensor([x["probs"] for x in outputs["data"]])
230

231
232
233
    default = await get_outputs(use_activation=None)
    w_activation = await get_outputs(use_activation=True)
    wo_activation = await get_outputs(use_activation=False)
234

235
236
237
238
239
240
241
242
243
    assert torch.allclose(default, w_activation, atol=1e-2), (
        "Default should use activation."
    )
    assert not torch.allclose(w_activation, wo_activation, atol=1e-2), (
        "wo_activation should not use activation."
    )
    assert torch.allclose(F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2), (
        "w_activation should be close to activation(wo_activation)."
    )
244
245
246
247


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
248
async def test_score(server: RemoteOpenAIServer, model_name: str):
249
250
251
252
253
254
255
256
257
258
259
260
261
262
    # score api is only enabled for num_labels == 1.
    response = requests.post(
        server.url_for("score"),
        json={
            "model": model_name,
            "text_1": "ping",
            "text_2": "pong",
        },
    )
    assert response.json()["error"]["type"] == "BadRequestError"


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
263
async def test_rerank(server: RemoteOpenAIServer, model_name: str):
264
265
266
267
268
269
270
271
272
273
    # rerank api is only enabled for num_labels == 1.
    response = requests.post(
        server.url_for("rerank"),
        json={
            "model": model_name,
            "query": "ping",
            "documents": ["pong"],
        },
    )
    assert response.json()["error"]["type"] == "BadRequestError"
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
    response = requests.post(
        server.url_for("pooling"),
        json={
            "model": model_name,
            "input": input_text,
            "encoding_format": "float",
            "task": "classify",
        },
    )
    poolings = PoolingResponse.model_validate(response.json())
    assert len(poolings.data) == 1
    assert len(poolings.data[0].data) == 2


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
    task = "token_classify"
    response = requests.post(
        server.url_for("pooling"),
        json={
            "model": model_name,
301
            "input": input_text,
302
303
304
305
            "encoding_format": "float",
            "task": task,
        },
    )
306
307
308
309
    poolings = PoolingResponse.model_validate(response.json())
    assert len(poolings.data) == 1
    assert len(poolings.data[0].data) == 8
    assert len(poolings.data[0].data[0]) == 2
310
311
312
313
314
315
316
317
318
319
320
321


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
    server: RemoteOpenAIServer, model_name: str, task: str
):
    response = requests.post(
        server.url_for("pooling"),
        json={
            "model": model_name,
322
            "input": input_text,
323
324
325
326
327
328
329
330
            "encoding_format": "float",
            "task": task,
        },
    )
    assert response.json()["error"]["type"] == "BadRequestError"
    assert response.json()["error"]["message"].startswith(
        f"Task {task} is not supported"
    )