"lib/parsers/src/vscode:/vscode.git/clone" did not exist on "f4cd71f388d2490bb14b4e3701fead85ac8d4e94"
Unverified Commit e7544f19 authored by KrishnanPrash's avatar KrishnanPrash Committed by GitHub
Browse files

ci: Testing b64 multimodal support with real image (#4280)


Signed-off-by: default avatarKrishnan Prashanth <kprashanth@nvidia.com>
Signed-off-by: default avatarKrishnanPrash <140860868+KrishnanPrash@users.noreply.github.com>
Co-authored-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
parent 6e241236
...@@ -25,6 +25,7 @@ pytest-benchmark ...@@ -25,6 +25,7 @@ pytest-benchmark
pytest-codeblocks pytest-codeblocks
pytest-cov pytest-cov
pytest-forked pytest-forked
pytest-httpserver
pytest-md-report pytest-md-report
pytest-mypy pytest-mypy
pytest-timeout pytest-timeout
......
...@@ -39,13 +39,18 @@ while [[ $# -gt 0 ]]; do ...@@ -39,13 +39,18 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
# Use TCP transport (instead of default NATS)
# TCP is preferred for multimodal workloads because it overcomes:
# - NATS default 1MB max payload limit (multimodal base64 images can exceed this)
export DYN_REQUEST_PLANE=tcp
# Start frontend with Rust OpenAIPreprocessor # Start frontend with Rust OpenAIPreprocessor
python -m dynamo.frontend --http-port=8000 & python -m dynamo.frontend --http-port=8000 &
# Configure GPU memory optimization for specific models # Configure GPU memory optimization for specific models
EXTRA_ARGS="" EXTRA_ARGS=""
if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048" EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 4096"
elif [[ "$MODEL_NAME" == "llava-hf/llava-1.5-7b-hf" ]]; then elif [[ "$MODEL_NAME" == "llava-hf/llava-1.5-7b-hf" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048" EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048"
fi fi
......
...@@ -69,7 +69,9 @@ python -m dynamo.frontend --http-port=8000 & ...@@ -69,7 +69,9 @@ python -m dynamo.frontend --http-port=8000 &
# To make Qwen2.5-VL fit in A100 40GB, set the following extra arguments # To make Qwen2.5-VL fit in A100 40GB, set the following extra arguments
EXTRA_ARGS="" EXTRA_ARGS=""
if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then if [[ "$MODEL_NAME" == "Qwen/Qwen2.5-VL-7B-Instruct" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 2048" EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 4096"
elif [[ "$MODEL_NAME" == "llava-hf/llava-1.5-7b-hf" ]]; then
EXTRA_ARGS="--gpu-memory-utilization 0.85 --max-model-len 4096"
fi fi
# Start processor (Python-based preprocessing, handles prompt templating) # Start processor (Python-based preprocessing, handles prompt templating)
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import os
import pytest
from pytest_httpserver import HTTPServer
from dynamo.common.utils.paths import WORKSPACE_DIR
# Shared constants for multimodal testing
IMAGE_SERVER_PORT = 8765
MULTIMODAL_IMG_PATH = os.path.join(
WORKSPACE_DIR, "lib/llm/tests/data/media/llm-optimize-deploy-graphic.png"
)
MULTIMODAL_IMG_URL = f"http://localhost:{IMAGE_SERVER_PORT}/llm-graphic.png"
@pytest.fixture(scope="session")
def httpserver_listen_address():
return ("127.0.0.1", IMAGE_SERVER_PORT)
@pytest.fixture(scope="function")
def image_server(httpserver: HTTPServer):
"""
Provide an HTTP server that serves test images for multimodal inference.
This function-scoped fixture configures pytest-httpserver to serve
the LLM optimization diagram image. It's designed for testing multimodal
inference capabilities where models need to fetch images via HTTP.
Currently serves:
- /llm-graphic.png - LLM diagram image for multimodal tests
Usage:
def test_multimodal(image_server):
url = "http://localhost:8765/llm-graphic.png"
# ... use url in your test payload
"""
# Load LLM graphic image from shared test data
with open(MULTIMODAL_IMG_PATH, "rb") as f:
image_data = f.read()
# Configure server endpoint
httpserver.expect_request("/llm-graphic.png").respond_with_data(
image_data,
content_type="image/png",
)
return httpserver
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import base64
import logging import logging
import os import os
from dataclasses import dataclass, field from dataclasses import dataclass, field
...@@ -12,6 +13,7 @@ from tests.serve.common import ( ...@@ -12,6 +13,7 @@ from tests.serve.common import (
params_with_model_mark, params_with_model_mark,
run_serve_deployment, run_serve_deployment,
) )
from tests.serve.conftest import MULTIMODAL_IMG_PATH, MULTIMODAL_IMG_URL
from tests.utils.engine_process import EngineConfig from tests.utils.engine_process import EngineConfig
from tests.utils.payload_builder import ( from tests.utils.payload_builder import (
chat_payload, chat_payload,
...@@ -34,6 +36,7 @@ vllm_dir = os.environ.get("VLLM_DIR") or os.path.join( ...@@ -34,6 +36,7 @@ vllm_dir = os.environ.get("VLLM_DIR") or os.path.join(
WORKSPACE_DIR, "examples/backends/vllm" WORKSPACE_DIR, "examples/backends/vllm"
) )
# vLLM test configurations # vLLM test configurations
vllm_configs = { vllm_configs = {
"aggregated": VLLMConfig( "aggregated": VLLMConfig(
...@@ -151,17 +154,19 @@ vllm_configs = { ...@@ -151,17 +154,19 @@ vllm_configs = {
request_payloads=[ request_payloads=[
chat_payload( chat_payload(
[ [
{"type": "text", "text": "What is in this image?"},
{ {
"type": "image_url", "type": "text",
"image_url": { "text": "What colors are in the following image? Respond only with the colors.",
"url": "http://images.cocodataset.org/test2017/000000155781.jpg"
}, },
{
"type": "image_url",
"image_url": {"url": MULTIMODAL_IMG_URL},
}, },
], ],
repeat_count=1, repeat_count=1,
expected_response=["bus"], expected_response=["purple"],
temperature=0.0, temperature=0.0,
max_tokens=100,
) )
], ],
), ),
...@@ -177,16 +182,18 @@ vllm_configs = { ...@@ -177,16 +182,18 @@ vllm_configs = {
request_payloads=[ request_payloads=[
chat_payload( chat_payload(
[ [
{"type": "text", "text": "What is in this image?"},
{ {
"type": "image_url", "type": "text",
"image_url": { "text": "What colors are in the following image? Respond only with the colors.",
"url": "http://images.cocodataset.org/test2017/000000155781.jpg"
}, },
{
"type": "image_url",
"image_url": {"url": MULTIMODAL_IMG_URL},
}, },
], ],
repeat_count=1, repeat_count=1,
expected_response=["bus"], expected_response=["purple"],
max_tokens=100,
) )
], ],
), ),
...@@ -200,33 +207,20 @@ vllm_configs = { ...@@ -200,33 +207,20 @@ vllm_configs = {
delayed_start=0, delayed_start=0,
timeout=360, timeout=360,
request_payloads=[ request_payloads=[
# HTTP URL test
chat_payload( chat_payload(
[ [
{"type": "text", "text": "What is in this image?"},
{ {
"type": "image_url", "type": "text",
"image_url": { "text": "What colors are in the following image? Respond only with the colors.",
"url": "http://images.cocodataset.org/test2017/000000155781.jpg"
},
}, },
],
repeat_count=1,
expected_response=["bus"],
),
# Base64 data URL test (1x1 PNG inline, avoids network fetch)
chat_payload(
[
{"type": "text", "text": "What do you see in this image?"},
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {"url": MULTIMODAL_IMG_URL},
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAAAAAA6fptVAAAACklEQVR4nGNoAAAAggCBd81ytgAAAABJRU5ErkJggg=="
},
}, },
], ],
repeat_count=1, repeat_count=1,
expected_response=[], # Just validate no error expected_response=["purple"],
max_tokens=100,
), ),
], ],
), ),
...@@ -343,10 +337,57 @@ def vllm_config_test(request): ...@@ -343,10 +337,57 @@ def vllm_config_test(request):
@pytest.mark.vllm @pytest.mark.vllm
@pytest.mark.e2e @pytest.mark.e2e
def test_serve_deployment( def test_serve_deployment(
vllm_config_test, request, runtime_services, predownload_models vllm_config_test, request, runtime_services, predownload_models, image_server
): ):
""" """
Test dynamo serve deployments with different graph configurations. Test dynamo serve deployments with different graph configurations.
""" """
config = vllm_config_test config = vllm_config_test
run_serve_deployment(config, request) run_serve_deployment(config, request)
@pytest.mark.vllm
@pytest.mark.e2e
@pytest.mark.gpu_2
def test_multimodal_b64(request, runtime_services, predownload_models):
"""
Test multimodal inference with base64 url passthrough.
This test is separate because it loads the required image at runtime
(not collection time), ensuring it only fails when actually executed.
"""
# Load B64 image at test execution time
with open(MULTIMODAL_IMG_PATH, "rb") as f:
b64_img = base64.b64encode(f.read()).decode()
# Create payload with B64 image
b64_payload = chat_payload(
[
{
"type": "text",
"text": "What colors are in the following image? Respond only with the colors.",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{b64_img}"},
},
],
repeat_count=1,
expected_response=["purple"],
max_tokens=100,
)
# Create test config
config = VLLMConfig(
name="test_multimodal_b64",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[], # markers at function-level
model="Qwen/Qwen2.5-VL-7B-Instruct",
script_args=["--model", "Qwen/Qwen2.5-VL-7B-Instruct"],
delayed_start=0,
timeout=360,
request_payloads=[b64_payload],
)
run_serve_deployment(config, request)
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
import json import json
import logging import logging
import re
import time import time
from copy import deepcopy
from typing import Any, Dict from typing import Any, Dict
import requests import requests
...@@ -11,6 +13,39 @@ import requests ...@@ -11,6 +13,39 @@ import requests
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _truncate_base64_url(url: str, max_length: int = 100) -> str:
"""Helper to truncate a single base64 data URL."""
if (m := re.match(r"^(data:image/[^;]+;base64,)(.+)$", url)) and len(
m.group(2)
) > max_length:
data = m.group(2)
return f"{m.group(1)}{data[:max_length]}...<{len(data)} chars, truncated>"
return url
def _sanitize_payload_for_logging(payload: Dict[str, Any]) -> Dict[str, Any]:
"""
Truncate base64-encoded images in multimodal payloads for cleaner logging.
Multimodal payloads can contain base64 images with multiple MB of data in
the field "type": "image_url", "image_url": "data: ... <MB of data>"
"""
sanitized = deepcopy(payload)
# Handle chat completions with multimodal content
if "messages" in sanitized:
for message in sanitized["messages"]:
content = message.get("content")
# Content can be string or list of content parts (multimodal)
if isinstance(content, list):
for part in content:
if isinstance(part, dict) and part.get("type") == "image_url":
image_url = part.get("image_url", {})
if "url" in image_url:
image_url["url"] = _truncate_base64_url(image_url["url"])
return sanitized
def send_request( def send_request(
url: str, url: str,
payload: Dict[str, Any], payload: Dict[str, Any],
...@@ -35,7 +70,10 @@ def send_request( ...@@ -35,7 +70,10 @@ def send_request(
""" """
method_upper = method.upper() method_upper = method.upper()
payload_json = json.dumps(payload, indent=2)
# Sanitize payload for logging (truncate base64 images)
sanitized_payload = _sanitize_payload_for_logging(payload)
payload_json = json.dumps(sanitized_payload, indent=2)
curl_command = "" curl_command = ""
if method_upper == "GET": if method_upper == "GET":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment