Unverified Commit 6b8545fc authored by Qi Wang's avatar Qi Wang Committed by GitHub
Browse files

perf(multimodal): strip inline base64 from TCP transport in frontend decoding (#7895)


Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent da670d44
...@@ -380,6 +380,32 @@ impl OpenAIPreprocessor { ...@@ -380,6 +380,32 @@ impl OpenAIPreprocessor {
} }
} }
/// Replace inline `data:` URLs with empty strings in message content parts.
/// Preserves HTTP(S) URLs, text content, and overall message structure.
fn strip_inline_data_urls(messages: &mut serde_json::Value) {
let Some(arr) = messages.as_array_mut() else {
return;
};
for msg in arr {
let Some(content) = msg.get_mut("content") else {
continue;
};
let Some(parts) = content.as_array_mut() else {
continue;
};
for part in parts {
for key in ["image_url", "video_url", "audio_url"] {
if let Some(media) = part.get_mut(key)
&& let Some(url) = media.get_mut("url")
&& url.as_str().is_some_and(|s| s.starts_with("data:"))
{
*url = serde_json::Value::String(String::new());
}
}
}
}
}
pub async fn gather_multi_modal_data<R: OAIChatLikeRequest>( pub async fn gather_multi_modal_data<R: OAIChatLikeRequest>(
&self, &self,
request: &R, request: &R,
...@@ -458,6 +484,14 @@ impl OpenAIPreprocessor { ...@@ -458,6 +484,14 @@ impl OpenAIPreprocessor {
let mut extra_args = serde_json::json!({ let mut extra_args = serde_json::json!({
"messages": messages_json "messages": messages_json
}); });
// Strip redundant inline data: URLs only when frontend decoding is active
// (media_loader decoded the images into RDMA descriptors). TRT-LLM and
// other backends that pass URLs through still need the original data: URIs.
if self.media_loader.is_some() {
Self::strip_inline_data_urls(&mut extra_args["messages"]);
}
if let Some(ref prompt) = formatted_prompt { if let Some(ref prompt) = formatted_prompt {
extra_args["formatted_prompt"] = serde_json::Value::String(prompt.clone()); extra_args["formatted_prompt"] = serde_json::Value::String(prompt.clone());
} }
...@@ -1549,6 +1583,64 @@ impl ...@@ -1549,6 +1583,64 @@ impl
// Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs` // Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs`
#[cfg(test)]
mod strip_tests {
use super::OpenAIPreprocessor;
#[test]
fn test_strip_inline_data_urls_replaces_data_urls() {
let mut messages = serde_json::json!([{
"role": "user",
"content": [
{"type": "text", "text": "What is this?"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR...longdata..."}},
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}
]
}]);
OpenAIPreprocessor::strip_inline_data_urls(&mut messages);
let parts = messages[0]["content"].as_array().unwrap();
assert_eq!(parts[0]["text"], "What is this?");
assert_eq!(parts[1]["image_url"]["url"], "");
assert_eq!(parts[2]["image_url"]["url"], "https://example.com/img.png");
}
#[test]
fn test_strip_inline_data_urls_handles_video_audio() {
let mut messages = serde_json::json!([{
"role": "user",
"content": [
{"type": "video_url", "video_url": {"url": "data:video/mp4;base64,AAAA..."}},
{"type": "audio_url", "audio_url": {"url": "https://example.com/audio.wav"}}
]
}]);
OpenAIPreprocessor::strip_inline_data_urls(&mut messages);
let parts = messages[0]["content"].as_array().unwrap();
assert_eq!(parts[0]["video_url"]["url"], "");
assert_eq!(
parts[1]["audio_url"]["url"],
"https://example.com/audio.wav"
);
}
#[test]
fn test_strip_inline_data_urls_preserves_text_only() {
let mut messages = serde_json::json!([{
"role": "user",
"content": "plain text message"
}]);
let original = messages.clone();
OpenAIPreprocessor::strip_inline_data_urls(&mut messages);
assert_eq!(messages, original);
}
#[test]
fn test_strip_inline_data_urls_empty_messages() {
let mut messages = serde_json::json!([]);
OpenAIPreprocessor::strip_inline_data_urls(&mut messages);
assert_eq!(messages, serde_json::json!([]));
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
......
...@@ -905,6 +905,65 @@ def test_multimodal_b64( ...@@ -905,6 +905,65 @@ def test_multimodal_b64(
run_serve_deployment(config, request, ports=dynamo_dynamic_ports) run_serve_deployment(config, request, ports=dynamo_dynamic_ports)
@pytest.mark.vllm
@pytest.mark.e2e
@pytest.mark.gpu_1
@pytest.mark.pre_merge
@pytest.mark.timeout(220)
def test_multimodal_b64_frontend_decoding(
request,
runtime_services_dynamic_ports,
dynamo_dynamic_ports,
predownload_models,
):
"""
Test multimodal inference with base64 images through frontend decoding path.
This exercises the Rust frontend image decode + NIXL RDMA transfer path
with inline base64 data: URIs (not HTTP URLs). Verifies that the
strip_inline_data_urls optimization does not break correctness.
"""
b64_img = base64.b64encode(get_multimodal_test_image_bytes()).decode()
b64_payload = chat_payload(
[
{
"type": "text",
"text": "What colors are in the following image? Respond only with the colors.",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{b64_img}"},
},
],
repeat_count=1,
expected_response=["green"],
temperature=0.0,
max_tokens=100,
)
config = VLLMConfig(
name="test_multimodal_b64_frontend_decoding",
directory=vllm_dir,
script_name="agg_multimodal.sh",
marks=[],
model="Qwen/Qwen3-VL-2B-Instruct",
script_args=[
"--model",
"Qwen/Qwen3-VL-2B-Instruct",
"--frontend-decoding",
],
delayed_start=0,
timeout=220,
request_payloads=[b64_payload],
)
config = dataclasses.replace(
config, frontend_port=dynamo_dynamic_ports.frontend_port
)
run_serve_deployment(config, request, ports=dynamo_dynamic_ports)
# LoRA Test Directory # LoRA Test Directory
lora_dir = os.path.join(vllm_dir, "launch/lora") lora_dir = os.path.join(vllm_dir, "launch/lora")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment