test_tokenization_vlm.py 1.65 KB
Newer Older
hallerite's avatar
hallerite committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Regression test: ``/tokenize`` must expand image placeholders for VLM models.

Fixed by PR #34560 ("Move InputPreprocessor into Renderer (2/2)").
Before that change, ``/tokenize`` returned ~26 tokens for a message with an
image instead of the expected 1451.  Confirmed broken on 0.15.1 and 0.16.0.
"""

import json

import pytest
import requests

16
from tests.utils import RemoteOpenAIServer
hallerite's avatar
hallerite committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

MODEL_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"


@pytest.fixture(scope="module")
def server():
    args = [
        "--dtype",
        "bfloat16",
        "--max-model-len",
        "4096",
        "--max-num-seqs",
        "5",
        "--enforce-eager",
        "--limit-mm-per-prompt",
        json.dumps({"image": 1}),
    ]
    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


def test_tokenize_chat_expands_image_placeholders(
    server: RemoteOpenAIServer,
    local_asset_server,
):
    image_url = local_asset_server.url_for("stop_sign.jpg")
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": {"url": image_url}},
                {"type": "text", "text": "Describe this image."},
            ],
        }
    ]

    response = requests.post(
        server.url_for("tokenize"),
        json={"model": MODEL_NAME, "messages": messages},
    )
    response.raise_for_status()

    # stop_sign.jpg (1300x876) produces 1451 tokens after expansion.
    # Without expansion the count would be ~26 (text + one placeholder).
    assert response.json()["count"] == 1451