test_vision.py 22.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
import json

6
7
import openai
import pytest
8
import pytest_asyncio
pansicheng's avatar
pansicheng committed
9
from transformers import AutoProcessor
10

11
from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
12
from vllm.multimodal.media import MediaWithBytes
13
from vllm.multimodal.utils import encode_image_url, fetch_image
14
from vllm.platforms import current_platform
15

16
17
MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
MAXIMUM_IMAGES = 2
18

19
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
20
TEST_IMAGE_ASSETS = [
21
22
23
24
    "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
    "Grayscale_8bits_palette_sample_image.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/Grayscale_8bits_palette_sample_image.png",
    "1280px-Venn_diagram_rgb.svg.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/1280px-Venn_diagram_rgb.svg.png",
    "RGBA_comp.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/RGBA_comp.png",
25
26
]

27
28
29
30
31
32
33
34
35
36
37
38
39
# Required terms for beam search validation
# Each entry is a list of term groups - ALL groups must match
# Each group is a list of alternatives - at least ONE term in the group must appear
# This provides semantic validation while allowing wording variation
REQUIRED_BEAM_SEARCH_TERMS = [
    # Boardwalk image: must have "boardwalk" AND ("wooden" or "wood")
    [["boardwalk"], ["wooden", "wood"]],
    # Parrots image: must have ("parrot" or "bird") AND "two"
    [["parrot", "bird"], ["two"]],
    # Venn diagram: must have "venn" AND "diagram"
    [["venn"], ["diagram"]],
    # Gradient image: must have "gradient" AND ("color" or "spectrum")
    [["gradient"], ["color", "spectrum"]],
40
41
]

42
43
44
45
46
47
48
49

def check_output_matches_terms(content: str, term_groups: list[list[str]]) -> bool:
    """
    Check if content matches all required term groups.
    Each term group requires at least one of its terms to be present.
    All term groups must be satisfied.
    """
    content_lower = content.lower()
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    return all(
        any(term.lower() in content_lower for term in group) for group in term_groups
    )


def assert_non_empty_content(chat_completion, *, context: str = "") -> str:
    """Assert the first choice has non-empty string content; return it.

    Provides a detailed failure message including the full ChatCompletion
    response so flaky / model-quality issues are easy to diagnose.
    """
    prefix = f"[{context}] " if context else ""
    choice = chat_completion.choices[0]
    content = choice.message.content

    assert content is not None, (
        f"{prefix}Expected non-None content but got None. "
        f"finish_reason={choice.finish_reason!r}, "
        f"full message={choice.message!r}, "
        f"usage={chat_completion.usage!r}"
    )
    assert isinstance(content, str), (
        f"{prefix}Expected str content, got {type(content).__name__}: {content!r}"
    )
    assert len(content) > 0, (
        f"{prefix}Expected non-empty content but got empty string. "
        f"finish_reason={choice.finish_reason!r}, "
        f"full message={choice.message!r}, "
        f"usage={chat_completion.usage!r}"
    )
    return content
81

82

83
@pytest.fixture(scope="module")
84
def server():
85
    args = [
86
        "--runner",
87
        "generate",
88
89
90
91
92
93
94
        "--max-model-len",
        "2048",
        "--max-num-seqs",
        "5",
        "--enforce-eager",
        "--trust-remote-code",
        "--limit-mm-per-prompt",
95
        json.dumps({"image": MAXIMUM_IMAGES}),
96
        *ROCM_EXTRA_ARGS,
97
98
    ]

99
100
    # ROCm: Increase timeouts to handle potential network delays and slower
    # video processing when downloading multiple videos from external sources
101
102
103
104
105
106
107
108
109
110
111
    env_overrides = {
        **ROCM_ENV_OVERRIDES,
        **(
            {
                "VLLM_VIDEO_FETCH_TIMEOUT": "120",
                "VLLM_ENGINE_ITERATION_TIMEOUT_S": "300",
            }
            if current_platform.is_rocm()
            else {}
        ),
    }
112
113

    with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_overrides) as remote_server:
114
        yield remote_server
115
116


117
118
119
120
@pytest_asyncio.fixture
async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client
121
122


123
@pytest.fixture(scope="session")
124
def url_encoded_image(local_asset_server) -> dict[str, str]:
125
    return {
126
        image_asset: encode_image_url(local_asset_server.get_image_asset(image_asset))
127
        for image_asset in TEST_IMAGE_ASSETS
128
129
130
    }


131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def dummy_messages_from_image_url(
    image_urls: str | list[str],
    content_text: str = "What's in this image?",
):
    if isinstance(image_urls, str):
        image_urls = [image_urls]

    return [
        {
            "role": "user",
            "content": [
                *(
                    {"type": "image_url", "image_url": {"url": image_url}}
                    for image_url in image_urls
                ),
                {"type": "text", "text": content_text},
            ],
        }
    ]


152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def describe_image_messages(
    image_url: str, *, extra_image_fields: dict | None = None
) -> list[dict]:
    """Build the system + user messages used by the completions-with-image
    family of tests. *extra_image_fields* is merged into the top-level
    image content block (for uuid / bad-key tests)."""
    image_block: dict = {
        "type": "image_url",
        "image_url": {"url": image_url},
    }
    if extra_image_fields:
        image_block.update(extra_image_fields)

    return [
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe this image."},
                image_block,
            ],
        },
    ]


async def complete_and_check(
    client: openai.AsyncOpenAI,
    model_name: str,
    messages: list[dict],
    *,
    context: str,
    max_completion_tokens: int = 50,
    temperature: float = 0.0,
) -> str:
    """Run a chat completion and assert the output is non-empty.
    Returns the content string."""
    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
        max_completion_tokens=max_completion_tokens,
        temperature=temperature,
    )
    return assert_non_empty_content(chat_completion, context=context)


pansicheng's avatar
pansicheng committed
197
def get_hf_prompt_tokens(model_name, content, image_url):
198
199
200
    processor = AutoProcessor.from_pretrained(
        model_name, trust_remote_code=True, num_crops=4
    )
pansicheng's avatar
pansicheng committed
201
202

    placeholder = "<|image_1|>\n"
203
204
205
206
207
208
    messages = [
        {
            "role": "user",
            "content": f"{placeholder}{content}",
        }
    ]
209
210
211
212
213
    image = fetch_image(image_url)
    # Unwrap MediaWithBytes if present
    if isinstance(image, MediaWithBytes):
        image = image.media
    images = [image]
pansicheng's avatar
pansicheng committed
214
215

    prompt = processor.tokenizer.apply_chat_template(
216
217
        messages, tokenize=False, add_generation_prompt=True
    )
pansicheng's avatar
pansicheng committed
218
219
220
221
222
    inputs = processor(prompt, images, return_tensors="pt")

    return inputs.input_ids.shape[1]


223
224
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
225
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
226
227
228
async def test_single_chat_session_image(
    client: openai.AsyncOpenAI, model_name: str, image_url: str
):
pansicheng's avatar
pansicheng committed
229
    content_text = "What's in this image?"
230
    messages = dummy_messages_from_image_url(image_url, content_text)
231

pansicheng's avatar
pansicheng committed
232
    max_completion_tokens = 10
233
234
235
    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
pansicheng's avatar
pansicheng committed
236
        max_completion_tokens=max_completion_tokens,
237
        logprobs=True,
238
        temperature=0.0,
239
240
        top_logprobs=5,
    )
241
242
243
    assert len(chat_completion.choices) == 1, (
        f"Expected 1 choice, got {len(chat_completion.choices)}"
    )
244
245

    choice = chat_completion.choices[0]
246
247
248
249
250
251
    assert choice.finish_reason == "length", (
        f"Expected finish_reason='length' (capped at {max_completion_tokens} "
        f"tokens), got {choice.finish_reason!r}. "
        f"content={choice.message.content!r}"
    )

252
    hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text, image_url)
253
    expected_usage = openai.types.CompletionUsage(
pansicheng's avatar
pansicheng committed
254
255
        completion_tokens=max_completion_tokens,
        prompt_tokens=hf_prompt_tokens,
256
257
        total_tokens=hf_prompt_tokens + max_completion_tokens,
    )
258
259
260
    assert chat_completion.usage == expected_usage, (
        f"Usage mismatch: got {chat_completion.usage!r}, expected {expected_usage!r}"
    )
261
262

    message = choice.message
263
264
265
266
267
268
269
    assert message.content is not None and len(message.content) >= 10, (
        f"Expected content with >=10 chars, got {message.content!r}"
    )
    assert message.role == "assistant", (
        f"Expected role='assistant', got {message.role!r}"
    )

270
271
272
273
    messages.append({"role": "assistant", "content": message.content})

    # test multi-turn dialogue
    messages.append({"role": "user", "content": "express your result in json"})
274
275
276
277
278
    await complete_and_check(
        client,
        model_name,
        messages,
        context=f"multi-turn follow-up for {image_url}",
279
        max_completion_tokens=10,
280
281
282
    )


283
284
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
285
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
286
287
288
async def test_error_on_invalid_image_url_type(
    client: openai.AsyncOpenAI, model_name: str, image_url: str
):
289
    content_text = "What's in this image?"
290
291
292
293
294
295
296
297
298
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image_url", "image_url": image_url},
                {"type": "text", "text": content_text},
            ],
        }
    ]
299
300
301

    # image_url should be a dict {"url": "some url"}, not directly a string
    with pytest.raises(openai.BadRequestError):
302
        await client.chat.completions.create(
303
304
305
306
307
            model=model_name,
            messages=messages,
            max_completion_tokens=10,
            temperature=0.0,
        )
308
309


310
311
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
312
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
313
314
315
async def test_single_chat_session_image_beamsearch(
    client: openai.AsyncOpenAI, model_name: str, image_url: str
):
316
317
    content_text = "What's in this image?"
    messages = dummy_messages_from_image_url(image_url, content_text)
318
319
320
321
322

    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
        n=2,
323
        max_completion_tokens=10,
324
325
        logprobs=True,
        top_logprobs=5,
326
327
        extra_body=dict(use_beam_search=True),
    )
328
329
330
331
332
333
334
335
336
    assert len(chat_completion.choices) == 2, (
        f"Expected 2 beam search choices, got {len(chat_completion.choices)}"
    )

    content_0 = chat_completion.choices[0].message.content
    content_1 = chat_completion.choices[1].message.content
    assert content_0 != content_1, (
        f"Beam search should produce different outputs for {image_url}, "
        f"but both returned: {content_0!r}"
337
    )
338
339


340
341
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
342
343
@pytest.mark.parametrize("raw_image_url", TEST_IMAGE_ASSETS)
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
344
async def test_single_chat_session_image_base64encoded(
345
346
347
348
    client: openai.AsyncOpenAI,
    model_name: str,
    raw_image_url: str,
    image_url: str,
349
    url_encoded_image: dict[str, str],
350
):
pansicheng's avatar
pansicheng committed
351
    content_text = "What's in this image?"
352
    messages = dummy_messages_from_image_url(
353
        url_encoded_image[raw_image_url],
354
355
        content_text,
    )
356

pansicheng's avatar
pansicheng committed
357
    max_completion_tokens = 10
358
    # test single completion
359
360
361
    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
pansicheng's avatar
pansicheng committed
362
        max_completion_tokens=max_completion_tokens,
363
        logprobs=True,
364
        temperature=0.0,
365
366
        top_logprobs=5,
    )
367
368
369
    assert len(chat_completion.choices) == 1, (
        f"Expected 1 choice, got {len(chat_completion.choices)}"
    )
370
371

    choice = chat_completion.choices[0]
372
373
374
375
376
    assert choice.finish_reason == "length", (
        f"Expected finish_reason='length', got {choice.finish_reason!r}. "
        f"content={choice.message.content!r}"
    )

377
    hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text, image_url)
378
    expected_usage = openai.types.CompletionUsage(
pansicheng's avatar
pansicheng committed
379
380
        completion_tokens=max_completion_tokens,
        prompt_tokens=hf_prompt_tokens,
381
382
        total_tokens=hf_prompt_tokens + max_completion_tokens,
    )
383
384
385
    assert chat_completion.usage == expected_usage, (
        f"Usage mismatch: got {chat_completion.usage!r}, expected {expected_usage!r}"
    )
386
387

    message = choice.message
388
389
390
391
392
393
394
    assert message.content is not None and len(message.content) >= 10, (
        f"Expected content with >=10 chars, got {message.content!r}"
    )
    assert message.role == "assistant", (
        f"Expected role='assistant', got {message.role!r}"
    )

395
396
397
398
    messages.append({"role": "assistant", "content": message.content})

    # test multi-turn dialogue
    messages.append({"role": "user", "content": "express your result in json"})
399
400
401
402
403
    await complete_and_check(
        client,
        model_name,
        messages,
        context=f"multi-turn base64 follow-up for {raw_image_url}",
404
        max_completion_tokens=10,
405
        temperature=0.0,
406
407
408
    )


409
410
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
411
@pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_ASSETS))))
412
async def test_single_chat_session_image_base64encoded_beamsearch(
413
414
415
    client: openai.AsyncOpenAI,
    model_name: str,
    image_idx: int,
416
    url_encoded_image: dict[str, str],
417
):
418
    # NOTE: This test validates that we pass MM data through beam search
419
    raw_image_url = TEST_IMAGE_ASSETS[image_idx]
420
    required_terms = REQUIRED_BEAM_SEARCH_TERMS[image_idx]
421

422
    messages = dummy_messages_from_image_url(url_encoded_image[raw_image_url])
423

424
425
426
427
    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
        n=2,
428
        max_completion_tokens=10,
429
        temperature=0.0,
430
431
        extra_body=dict(use_beam_search=True),
    )
432
433
434
435
    assert len(chat_completion.choices) == 2, (
        f"Expected 2 beam search choices for image {image_idx} "
        f"({raw_image_url}), got {len(chat_completion.choices)}"
    )
436
437
438
439
440
441
442
443
444
445
446

    # Verify beam search produces two different non-empty outputs
    content_0 = chat_completion.choices[0].message.content
    content_1 = chat_completion.choices[1].message.content

    # Emit beam search outputs for debugging
    print(
        f"Beam search outputs for image {image_idx} ({raw_image_url}): "
        f"Output 0: {content_0!r}, Output 1: {content_1!r}"
    )

447
448
449
450
451
452
453
454
455
456
457
458
459
    assert content_0, (
        f"First beam output is empty for image {image_idx} ({raw_image_url}). "
        f"finish_reason={chat_completion.choices[0].finish_reason!r}"
    )
    assert content_1, (
        f"Second beam output is empty for image {image_idx} "
        f"({raw_image_url}). "
        f"finish_reason={chat_completion.choices[1].finish_reason!r}"
    )
    assert content_0 != content_1, (
        f"Beam search produced identical outputs for image {image_idx} "
        f"({raw_image_url}): {content_0!r}"
    )
460
461
462

    # Verify each output contains the required terms for this image
    for i, content in enumerate([content_0, content_1]):
463
464
465
466
467
468
        assert check_output_matches_terms(content, required_terms), (
            f"Beam output {i} for image {image_idx} ({raw_image_url}) "
            f"doesn't match required terms.\n"
            f"  content: {content!r}\n"
            f"  required (all groups, >=1 per group): {required_terms}"
        )
469
470


471
472
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
473
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
474
475
476
async def test_chat_streaming_image(
    client: openai.AsyncOpenAI, model_name: str, image_url: str
):
477
    messages = dummy_messages_from_image_url(image_url)
478
479
480
481
482

    # test single completion
    chat_completion = await client.chat.completions.create(
        model=model_name,
        messages=messages,
483
        max_completion_tokens=10,
484
485
486
487
488
489
490
491
492
        temperature=0.0,
    )
    output = chat_completion.choices[0].message.content
    stop_reason = chat_completion.choices[0].finish_reason

    # test streaming
    stream = await client.chat.completions.create(
        model=model_name,
        messages=messages,
493
        max_completion_tokens=10,
494
495
496
        temperature=0.0,
        stream=True,
    )
497
    chunks: list[str] = []
498
499
500
501
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta
        if delta.role:
502
503
504
            assert delta.role == "assistant", (
                f"Expected role='assistant' in stream delta, got {delta.role!r}"
            )
505
506
507
508
509
        if delta.content:
            chunks.append(delta.content)
        if chunk.choices[0].finish_reason is not None:
            finish_reason_count += 1
    # finish reason should only return in last block
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
    assert finish_reason_count == 1, (
        f"Expected exactly 1 finish_reason across stream chunks, "
        f"got {finish_reason_count}"
    )
    assert chunk.choices[0].finish_reason == stop_reason, (
        f"Stream finish_reason={chunk.choices[0].finish_reason!r} "
        f"doesn't match non-stream finish_reason={stop_reason!r}"
    )

    streamed_text = "".join(chunks)
    assert streamed_text == output, (
        f"Streamed output doesn't match non-streamed for {image_url}.\n"
        f"  streamed:     {streamed_text!r}\n"
        f"  non-streamed: {output!r}"
    )
525
526
527
528


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
529
530
@pytest.mark.parametrize(
    "image_urls",
531
    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
532
533
534
535
536
    indirect=True,
)
async def test_multi_image_input(
    client: openai.AsyncOpenAI, model_name: str, image_urls: list[str]
):
537
    messages = dummy_messages_from_image_url(image_urls)
538

539
540
541
542
543
    if len(image_urls) > MAXIMUM_IMAGES:
        with pytest.raises(openai.BadRequestError):  # test multi-image input
            await client.chat.completions.create(
                model=model_name,
                messages=messages,
544
                max_completion_tokens=10,
545
546
547
548
549
550
551
552
553
554
                temperature=0.0,
            )

        # the server should still work afterwards
        completion = await client.completions.create(
            model=model_name,
            prompt=[0, 0, 0, 0, 0],
            max_tokens=5,
            temperature=0.0,
        )
555
556
557
558
        assert completion.choices[0].text is not None, (
            "Server failed to produce output after rejecting over-limit "
            "multi-image request"
        )
559
    else:
560
561
562
563
564
        await complete_and_check(
            client,
            model_name,
            messages,
            context=f"multi-image input ({len(image_urls)} images)",
565
            max_completion_tokens=10,
566
567
            temperature=0.0,
        )
568
569
570
571
572
573
574


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
    "image_urls",
    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
575
576
    indirect=True,
)
577
578
579
580
581
582
async def test_completions_with_image(
    client: openai.AsyncOpenAI,
    model_name: str,
    image_urls: list[str],
):
    for image_url in image_urls:
583
584
585
586
587
588
        messages = describe_image_messages(image_url)
        await complete_and_check(
            client,
            model_name,
            messages,
            context=f"completions_with_image url={image_url}",
589
590
591
592
593
594
595
596
        )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
    "image_urls",
    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
597
598
    indirect=True,
)
599
600
601
602
603
604
async def test_completions_with_image_with_uuid(
    client: openai.AsyncOpenAI,
    model_name: str,
    image_urls: list[str],
):
    for image_url in image_urls:
605
606
607
        messages = describe_image_messages(
            image_url,
            extra_image_fields={"uuid": image_url},
608
        )
609
610
611
612
613
        await complete_and_check(
            client,
            model_name,
            messages,
            context=f"uuid first request url={image_url}",
614
        )
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630

        cached_messages: list[dict] = [
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image."},
                    {"type": "image_url", "image_url": {}, "uuid": image_url},
                ],
            },
        ]
        await complete_and_check(
            client,
            model_name,
            cached_messages,
            context=f"uuid cached (empty image) uuid={image_url}",
631
        )
632
633
634
635
636
637
638
639
640


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_completions_with_empty_image_with_uuid_without_cache_hit(
    client: openai.AsyncOpenAI,
    model_name: str,
):
    with pytest.raises(openai.BadRequestError):
641
        await client.chat.completions.create(
642
            messages=[
643
                {"role": "system", "content": "You are a helpful assistant."},
644
                {
645
                    "role": "user",
646
                    "content": [
647
                        {"type": "text", "text": "Describe this image."},
648
649
650
                        {
                            "type": "image_url",
                            "image_url": {},
651
                            "uuid": "uuid_not_previously_seen",
652
653
654
655
656
657
658
                        },
                    ],
                },
            ],
            model=model_name,
        )

659
660
661
662
663
664

@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
    "image_urls",
    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
665
666
    indirect=True,
)
667
668
669
670
671
672
async def test_completions_with_image_with_incorrect_uuid_format(
    client: openai.AsyncOpenAI,
    model_name: str,
    image_urls: list[str],
):
    for image_url in image_urls:
673
674
675
676
677
678
679
680
681
682
683
684
685
686
        messages = describe_image_messages(
            image_url,
            extra_image_fields={
                "also_incorrect_uuid_key": image_url,
            },
        )
        # Inject the bad key inside image_url dict too
        messages[1]["content"][1]["image_url"]["incorrect_uuid_key"] = image_url

        await complete_and_check(
            client,
            model_name,
            messages,
            context=f"incorrect uuid format url={image_url}",
687
        )