test_pixtral.py 7.73 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
Patrick von Platen's avatar
Patrick von Platen committed
2
3
4
5
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.

Run `pytest tests/models/test_mistral.py`.
"""
6
7
import json
from dataclasses import asdict
8
from typing import TYPE_CHECKING, Any, Optional
9

Patrick von Platen's avatar
Patrick von Platen committed
10
import pytest
11
from mistral_common.multimodal import download_image
12
13
14
15
from mistral_common.protocol.instruct.messages import ImageURLChunk
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
16
from transformers import AutoProcessor
17

18
from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt
19
from vllm.multimodal import MultiModalDataBuiltins
20
from vllm.multimodal.inputs import PlaceholderRange
21
from vllm.sequence import Logprob, SampleLogprobs
Patrick von Platen's avatar
Patrick von Platen committed
22

23
from ....utils import VLLM_PATH, large_gpu_test
24
from ...utils import check_logprobs_close
Patrick von Platen's avatar
Patrick von Platen committed
25

26
27
if TYPE_CHECKING:
    from _typeshed import StrPath
Patrick von Platen's avatar
Patrick von Platen committed
28

29
30
31
32
33
PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"

MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]

34
35
36
37
38
39
40
41
42
IMG_URLS = [
    "https://picsum.photos/id/237/400/300",
    "https://picsum.photos/id/231/200/300",
    "https://picsum.photos/id/27/500/500",
    "https://picsum.photos/id/17/150/600",
]
PROMPT = "Describe each image in one short sentence."


43
def _create_msg_format(urls: list[str]) -> list[dict[str, Any]]:
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    return [{
        "role":
        "user",
        "content": [{
            "type": "text",
            "text": PROMPT,
        }] + [{
            "type": "image_url",
            "image_url": {
                "url": url
            }
        } for url in urls],
    }]


59
def _create_msg_format_hf(urls: list[str]) -> list[dict[str, Any]]:
60
61
62
63
64
65
66
67
68
69
70
71
72
    return [{
        "role":
        "user",
        "content": [{
            "type": "text",
            "content": PROMPT,
        }, *({
            "type": "image",
            "image": download_image(url)
        } for url in urls)],
    }]


73
def _create_engine_inputs(urls: list[str]) -> TokensPrompt:
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
    msg = _create_msg_format(urls)

    tokenizer = MistralTokenizer.from_model("pixtral")

    request = ChatCompletionRequest(messages=msg)  # type: ignore[type-var]
    tokenized = tokenizer.encode_chat_completion(request)

    engine_inputs = TokensPrompt(prompt_token_ids=tokenized.tokens)

    images = []
    for chunk in request.messages[0].content:
        if isinstance(chunk, ImageURLChunk):
            images.append(image_from_chunk(chunk))

    mm_data = MultiModalDataBuiltins(image=images)
    engine_inputs["multi_modal_data"] = mm_data

    return engine_inputs


94
def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    msg = _create_msg_format_hf(urls)

    tokenizer = AutoProcessor.from_pretrained("mistral-community/pixtral-12b")
    prompt = tokenizer.apply_chat_template(msg)

    images = []
    for chunk in msg[0]["content"]:
        if chunk["type"] == "image":
            images.append(chunk["image"])

    mm_data = MultiModalDataBuiltins(image=images)
    engine_inputs = TextPrompt(prompt=prompt, multi_modal_data=mm_data)

    return engine_inputs


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
MSGS = [
    _create_msg_format(IMG_URLS[:1]),
    _create_msg_format(IMG_URLS[:2]),
    _create_msg_format(IMG_URLS),
]
ENGINE_INPUTS = [
    _create_engine_inputs(IMG_URLS[:1]),
    _create_engine_inputs(IMG_URLS[:2]),
    _create_engine_inputs(IMG_URLS),
]

SAMPLING_PARAMS = SamplingParams(max_tokens=512, temperature=0.0, logprobs=5)
LIMIT_MM_PER_PROMPT = dict(image=4)

MAX_MODEL_LEN = [8192, 65536]
126
127
128
129

FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
assert FIXTURES_PATH.exists()

130
131
132
133
FIXTURE_LOGPROBS_CHAT = {
    PIXTRAL_ID: FIXTURES_PATH / "pixtral_chat.json",
    MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
}
134

135
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
136

137
138

# For the test author to store golden output in JSON
139
140
141
142
def _dump_outputs_w_logprobs(
    outputs: OutputsLogprobs,
    filename: "StrPath",
) -> None:
143
144
145
146
    json_data = [(tokens, text, [{
        k: asdict(v)
        for k, v in token_logprobs.items()
    } for token_logprobs in (logprobs or [])])
147
148
149
150
151
152
                 for tokens, text, logprobs in outputs]

    with open(filename, "w") as f:
        json.dump(json_data, f)


153
def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs:
154
155
156
    with open(filename, "rb") as f:
        json_data = json.load(f)

157
158
159
160
    return [(tokens, text, [{
        int(k): Logprob(**v)
        for k, v in token_logprobs.items()
    } for token_logprobs in logprobs]) for tokens, text, logprobs in json_data]
Patrick von Platen's avatar
Patrick von Platen committed
161
162


163
@large_gpu_test(min_gb=80)
Patrick von Platen's avatar
Patrick von Platen committed
164
@pytest.mark.parametrize("model", MODELS)
165
@pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
Patrick von Platen's avatar
Patrick von Platen committed
166
@pytest.mark.parametrize("dtype", ["bfloat16"])
167
def test_chat(
Patrick von Platen's avatar
Patrick von Platen committed
168
    vllm_runner,
169
    max_model_len: int,
Patrick von Platen's avatar
Patrick von Platen committed
170
171
172
    model: str,
    dtype: str,
) -> None:
173
174
    EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
        FIXTURE_LOGPROBS_CHAT[model])
175
176
177
178
    with vllm_runner(
            model,
            dtype=dtype,
            tokenizer_mode="mistral",
179
180
            load_format="mistral",
            config_format="mistral",
181
182
183
184
185
186
187
188
189
190
191
            max_model_len=max_model_len,
            limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
    ) as vllm_model:
        outputs = []
        for msg in MSGS:
            output = vllm_model.model.chat(msg,
                                           sampling_params=SAMPLING_PARAMS)

            outputs.extend(output)

    logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
192
193
194
195
    # Remove last `None` prompt_logprobs to compare with fixture
    for i in range(len(logprobs)):
        assert logprobs[i][-1] is None
        logprobs[i] = logprobs[i][:-1]
196
197
198
199
    check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
                         outputs_1_lst=logprobs,
                         name_0="h100_ref",
                         name_1="output")
200
201


202
@large_gpu_test(min_gb=48)
203
204
205
206
207
208
209
210
@pytest.mark.parametrize("prompt,expected_ranges",
                         [(_create_engine_inputs_hf(IMG_URLS[:1]),
                           [PlaceholderRange(offset=11, length=494)]),
                          (_create_engine_inputs_hf(IMG_URLS[1:4]), [
                              PlaceholderRange(offset=11, length=266),
                              PlaceholderRange(offset=277, length=1056),
                              PlaceholderRange(offset=1333, length=418)
                          ])])
211
212
213
214
215
216
217
def test_multi_modal_placeholders(vllm_runner, prompt,
                                  expected_ranges: list[PlaceholderRange],
                                  monkeypatch) -> None:

    # This placeholder checking test only works with V0 engine
    # where `multi_modal_placeholders` is returned with `RequestOutput`
    monkeypatch.setenv("VLLM_USE_V1", "0")
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    with vllm_runner(
            "mistral-community/pixtral-12b",
            max_model_len=8192,
            limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
    ) as vllm_model:
        outputs = vllm_model.model.generate(prompt)

        assert len(outputs) == 1, f"{len(outputs)=}"
        output: RequestOutput = outputs[0]
        assert hasattr(output,
                       "multi_modal_placeholders"), f"{output.__dict__=}"
        assert "image" in output.multi_modal_placeholders, \
            f"{output.multi_modal_placeholders.keys()=}"
        image_placeholder_ranges: list[
            PlaceholderRange] = output.multi_modal_placeholders["image"]
        assert len(image_placeholder_ranges) == len(
            expected_ranges), f"{image_placeholder_ranges=}"
        for real_range, expected_range in zip(image_placeholder_ranges,
                                              expected_ranges):
            assert real_range == expected_range, \
                f"{real_range=} {expected_range=}"