test_multimodal.py 2.51 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6

import openai
import pytest

7
from vllm.multimodal.utils import encode_image_url
8
9
from vllm.platforms import current_platform

10
from ...entrypoints.openai.test_vision import TEST_IMAGE_ASSETS
11
12
13
14
from ...utils import RemoteOpenAIServer


@pytest.fixture(scope="session")
15
def url_encoded_image(local_asset_server) -> dict[str, str]:
16
    return {
17
        image_asset: encode_image_url(local_asset_server.get_image_asset(image_asset))
18
        for image_asset in TEST_IMAGE_ASSETS
19
20
21
22
    }


@pytest.mark.asyncio
23
@pytest.mark.skipif(not current_platform.is_tpu(), reason="This test needs a TPU")
24
@pytest.mark.parametrize("model_name", ["llava-hf/llava-1.5-7b-hf"])
25
async def test_basic_vision(model_name: str, url_encoded_image: dict[str, str]):
26
27
    pytest.skip("Skip this test until it's fixed.")

28
    def whats_in_this_image_msg(url):
29
30
31
32
33
        return [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
34
                    {"type": "image_url", "image_url": {"url": url}},
35
36
37
                ],
            }
        ]
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

    server_args = [
        "--max-model-len",
        "1024",
        "--max-num-seqs",
        "16",
        "--gpu-memory-utilization",
        "0.95",
        "--trust-remote-code",
        "--max-num-batched-tokens",
        "576",
        # NOTE: max-num-batched-tokens>=mm_item_size
        "--disable_chunked_mm_input",
    ]

    # Server will pre-compile on first startup (takes a long time).
54
55
56
    with RemoteOpenAIServer(
        model_name, server_args, max_wait_seconds=600
    ) as remote_server:
57
58
59
        client: openai.AsyncOpenAI = remote_server.get_async_client()

        # Other requests now should be much faster
60
        for image_url in TEST_IMAGE_ASSETS:
61
62
            image_url = url_encoded_image[image_url]
            chat_completion_from_url = await client.chat.completions.create(
63
                model=model_name,
64
                messages=whats_in_this_image_msg(image_url),
65
                max_completion_tokens=24,
66
67
                temperature=0.0,
            )
68
            result = chat_completion_from_url
69
70
71
72
73
74
75
76
            assert result
            choice = result.choices[0]
            assert choice.finish_reason == "length"

            message = choice.message
            message = result.choices[0].message
            assert message.content is not None and len(message.content) >= 10
            assert message.role == "assistant"