"vscode:/vscode.git/clone" did not exist on "f2d9ad0620d9aa71481527dcfafdb8357da00470"
test_tokenization.py 11.5 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import pytest
5
import os
6
import pytest_asyncio
7
8
import requests

9
from vllm.tokenizers import get_tokenizer
10
11
12

from ...utils import RemoteOpenAIServer

13
from ...utils import models_path_prefix
14
15

# any model with a chat template should work here
16
MODEL_NAME = os.path.join(models_path_prefix, "HuggingFaceH4/zephyr-7b-beta")
17
18
19


@pytest.fixture(scope="module")
20
def server():
21
22
23
24
25
26
27
28
29
    args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "bfloat16",
        "--max-model-len",
        "8192",
        "--enforce-eager",
        "--max-num-seqs",
        "128",
30
        "--enable-tokenizer-info-endpoint",
31
32
33
    ]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
34
35
36
        yield remote_server


37
@pytest.fixture(scope="module")
38
39
def tokenizer_name(model_name: str):
    return model_name
40
41


42
43
44
45
@pytest_asyncio.fixture
async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client
46
47
48
49


@pytest.mark.asyncio
@pytest.mark.parametrize(
50
    "model_name,tokenizer_name",
51
    [(MODEL_NAME, MODEL_NAME)],
52
    indirect=["tokenizer_name"],
53
)
54
55
56
57
58
async def test_tokenize_completions(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
59
    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)
60
61

    for add_special in [False, True]:
62
        prompt = "vllm1 This is a test prompt."
63
64
        tokens = tokenizer.encode(prompt, add_special_tokens=add_special)

65
66
67
68
69
70
71
72
        response = requests.post(
            server.url_for("tokenize"),
            json={
                "add_special_tokens": add_special,
                "model": model_name,
                "prompt": prompt,
            },
        )
73
74
        response.raise_for_status()

75
76
77
78
79
        result = response.json()
        assert result["tokens"] == tokens
        assert result["count"] == len(tokens)
        assert result["max_model_len"] == 8192
        assert result["token_strs"] is None
80
81
82
83


@pytest.mark.asyncio
@pytest.mark.parametrize(
84
    "model_name,tokenizer_name",
85
    [(MODEL_NAME, MODEL_NAME)],
86
    indirect=["tokenizer_name"],
87
)
88
89
90
91
92
async def test_tokenize_chat(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
93
    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)
94
95
96

    for add_generation in [False, True]:
        for add_special in [False, True]:
97
98
99
100
101
            conversation = [
                {"role": "user", "content": "Hi there!"},
                {"role": "assistant", "content": "Nice to meet you!"},
                {"role": "user", "content": "Can I ask a question? vllm1"},
            ]
102
103
104
105
            for continue_final in [False, True]:
                if add_generation and continue_final:
                    continue
                if continue_final:
106
                    conversation.append({"role": "assistant", "content": "Sure,"})
107
108
109
110
111

                prompt = tokenizer.apply_chat_template(
                    add_generation_prompt=add_generation,
                    continue_final_message=continue_final,
                    conversation=conversation,
112
113
114
115
116
117
118
119
120
121
122
123
124
125
                    tokenize=False,
                )
                tokens = tokenizer.encode(prompt, add_special_tokens=add_special)

                response = requests.post(
                    server.url_for("tokenize"),
                    json={
                        "add_generation_prompt": add_generation,
                        "continue_final_message": continue_final,
                        "add_special_tokens": add_special,
                        "messages": conversation,
                        "model": model_name,
                    },
                )
126
127
                response.raise_for_status()

128
129
130
131
132
                result = response.json()
                assert result["tokens"] == tokens
                assert result["count"] == len(tokens)
                assert result["max_model_len"] == 8192
                assert result["token_strs"] is None
133
134


135
136
137
@pytest.mark.asyncio
@pytest.mark.parametrize(
    "model_name,tokenizer_name",
138
    [(MODEL_NAME, MODEL_NAME)],
139
140
141
142
143
144
145
    indirect=["tokenizer_name"],
)
async def test_tokenize_chat_with_tools(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
146
    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)
147
148
149

    for add_generation in [False, True]:
        for add_special in [False, True]:
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
            conversation = [
                {
                    "role": "user",
                    "content": "What's the weather like in Paris today?",
                }
            ]

            tools = [
                {
                    "type": "function",
                    "function": {
                        "name": "get_weather",
                        "parameters": {
                            "type": "object",
                            "properties": {"location": {"type": "string"}},
165
166
                        },
                    },
167
168
                }
            ]
169
170
171
172
173

            for continue_final in [False, True]:
                if add_generation and continue_final:
                    continue
                if continue_final:
174
                    conversation.append({"role": "assistant", "content": "Sure,"})
175
176
177
178
179
180
181
182

                prompt = tokenizer.apply_chat_template(
                    add_generation_prompt=add_generation,
                    continue_final_message=continue_final,
                    conversation=conversation,
                    tools=tools,
                    tokenize=False,
                )
183
                tokens = tokenizer.encode(prompt, add_special_tokens=add_special)
184
185
186
187
188
189
190
191
192
193
194
195
196
197

                response = requests.post(
                    server.url_for("tokenize"),
                    json={
                        "add_generation_prompt": add_generation,
                        "continue_final_message": continue_final,
                        "add_special_tokens": add_special,
                        "messages": conversation,
                        "model": model_name,
                        "tools": tools,
                    },
                )
                response.raise_for_status()

198
199
200
201
202
203
204
205
206
207
                result = response.json()
                assert result["tokens"] == tokens
                assert result["count"] == len(tokens)
                assert result["max_model_len"] == 8192
                assert result["token_strs"] is None


@pytest.mark.asyncio
@pytest.mark.parametrize(
    "model_name, tokenizer_name",
208
    [(MODEL_NAME, MODEL_NAME)],
209
210
211
212
213
214
215
    indirect=["tokenizer_name"],
)
async def test_tokenize_with_return_token_strs(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
216
    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)
217
218
219
220

    prompt = "This is a token_strs test prompt! vllm1"
    response = requests.post(
        server.url_for("tokenize"),
221
        json={"prompt": prompt, "model": model_name, "return_token_strs": True},
222
223
224
225
226
227
228
229
230
231
232
    )
    response.raise_for_status()

    tokens = tokenizer.encode(prompt, add_special_tokens=True)
    tokens_str = tokenizer.convert_ids_to_tokens(tokens)

    result = response.json()
    assert result["tokens"] == tokens
    assert result["count"] == len(tokens)
    assert result["max_model_len"] == 8192
    assert result["token_strs"] == tokens_str
233
234
235
236


@pytest.mark.asyncio
@pytest.mark.parametrize(
237
    "model_name,tokenizer_name",
238
    [(MODEL_NAME, MODEL_NAME)],
239
    indirect=["tokenizer_name"],
240
)
241
242
243
244
245
async def test_detokenize(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
246
    tokenizer = get_tokenizer(tokenizer_name=tokenizer_name)
247

248
    prompt = "This is a test prompt. vllm1"
249
250
    tokens = tokenizer.encode(prompt, add_special_tokens=False)

251
252
253
    response = requests.post(
        server.url_for("detokenize"), json={"model": model_name, "tokens": tokens}
    )
254
255
256
    response.raise_for_status()

    assert response.json() == {"prompt": prompt}
257
258
259
260
261


@pytest.mark.asyncio
@pytest.mark.parametrize(
    "model_name,tokenizer_name",
262
    [(MODEL_NAME, MODEL_NAME)],
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
    indirect=["tokenizer_name"],
)
async def test_tokenizer_info_basic(
    server: RemoteOpenAIServer,
    model_name: str,
    tokenizer_name: str,
):
    """Test basic tokenizer info endpoint functionality."""
    response = requests.get(server.url_for("tokenizer_info"))
    response.raise_for_status()
    result = response.json()
    assert "tokenizer_class" in result
    assert isinstance(result["tokenizer_class"], str)
    assert result["tokenizer_class"]


@pytest.mark.asyncio
async def test_tokenizer_info_schema(server: RemoteOpenAIServer):
    """Test that the response matches expected schema types."""
    response = requests.get(server.url_for("tokenizer_info"))
    response.raise_for_status()
    result = response.json()
    field_types = {
        "add_bos_token": bool,
        "add_prefix_space": bool,
        "clean_up_tokenization_spaces": bool,
        "split_special_tokens": bool,
        "bos_token": str,
        "eos_token": str,
        "pad_token": str,
        "unk_token": str,
        "chat_template": str,
        "errors": str,
        "model_max_length": int,
        "additional_special_tokens": list,
        "added_tokens_decoder": dict,
    }
    for field, expected_type in field_types.items():
        if field in result and result[field] is not None:
302
303
304
            assert isinstance(result[field], expected_type), (
                f"{field} should be {expected_type.__name__}"
            )
305
306
307
308


@pytest.mark.asyncio
async def test_tokenizer_info_added_tokens_structure(
309
310
    server: RemoteOpenAIServer,
):
311
312
313
314
315
316
317
318
319
320
    """Test added_tokens_decoder structure if present."""
    response = requests.get(server.url_for("tokenizer_info"))
    response.raise_for_status()
    result = response.json()
    added_tokens = result.get("added_tokens_decoder")
    if added_tokens:
        for token_id, token_info in added_tokens.items():
            assert isinstance(token_id, str), "Token IDs should be strings"
            assert isinstance(token_info, dict), "Token info should be a dict"
            assert "content" in token_info, "Token info should have content"
321
322
323
324
            assert "special" in token_info, "Token info should have special flag"
            assert isinstance(token_info["special"], bool), (
                "Special flag should be boolean"
            )
325
326
327
328


@pytest.mark.asyncio
async def test_tokenizer_info_consistency_with_tokenize(
329
330
    server: RemoteOpenAIServer,
):
331
332
333
334
335
336
    """Test that tokenizer info is consistent with tokenization endpoint."""
    info_response = requests.get(server.url_for("tokenizer_info"))
    info_response.raise_for_status()
    info = info_response.json()
    tokenize_response = requests.post(
        server.url_for("tokenize"),
337
        json={"model": MODEL_NAME, "prompt": "Hello world!"},
338
339
340
341
342
343
344
    )
    tokenize_response.raise_for_status()
    tokenize_result = tokenize_response.json()
    info_max_len = info.get("model_max_length")
    tokenize_max_len = tokenize_result.get("max_model_len")
    if info_max_len and tokenize_max_len:
        assert info_max_len >= tokenize_max_len, (
345
346
            "Info max length should be >= tokenize max length"
        )
347
348
349
350
351
352
353
354
355
356


@pytest.mark.asyncio
async def test_tokenizer_info_chat_template(server: RemoteOpenAIServer):
    """Test chat template is properly included."""
    response = requests.get(server.url_for("tokenizer_info"))
    response.raise_for_status()
    result = response.json()
    chat_template = result.get("chat_template")
    if chat_template:
357
        assert isinstance(chat_template, str), "Chat template should be a string"
358
        assert chat_template.strip(), "Chat template should not be empty"