Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
95a395db
Unverified
Commit
95a395db
authored
Feb 28, 2026
by
Martin Vit
Committed by
GitHub
Feb 28, 2026
Browse files
[Bugfix] Fix Anthropic API base64 image handling in Messages endpoint (#35557)
Signed-off-by:
Martin Vit
<
martin@voipmonitor.org
>
parent
e94b263b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
389 additions
and
5 deletions
+389
-5
tests/entrypoints/openai/test_anthropic_messages_conversion.py
.../entrypoints/openai/test_anthropic_messages_conversion.py
+326
-0
vllm/entrypoints/anthropic/serving.py
vllm/entrypoints/anthropic/serving.py
+63
-5
No files found.
tests/entrypoints/openai/test_anthropic_messages_conversion.py
0 → 100644
View file @
95a395db
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Unit tests for Anthropic-to-OpenAI request conversion.
Tests the image source handling and tool_result content parsing in
AnthropicServingMessages._convert_anthropic_to_openai_request().
"""
from
vllm.entrypoints.anthropic.protocol
import
(
AnthropicMessagesRequest
,
)
from
vllm.entrypoints.anthropic.serving
import
AnthropicServingMessages
_convert
=
AnthropicServingMessages
.
_convert_anthropic_to_openai_request
_img_url
=
AnthropicServingMessages
.
_convert_image_source_to_url
def
_make_request
(
messages
:
list
[
dict
],
**
kwargs
,
)
->
AnthropicMessagesRequest
:
return
AnthropicMessagesRequest
(
model
=
"test-model"
,
max_tokens
=
128
,
messages
=
messages
,
**
kwargs
,
)
# ======================================================================
# _convert_image_source_to_url
# ======================================================================
class
TestConvertImageSourceToUrl
:
def
test_base64_source
(
self
):
source
=
{
"type"
:
"base64"
,
"media_type"
:
"image/jpeg"
,
"data"
:
"iVBORw0KGgo="
,
}
assert
_img_url
(
source
)
==
"data:image/jpeg;base64,iVBORw0KGgo="
def
test_base64_png
(
self
):
source
=
{
"type"
:
"base64"
,
"media_type"
:
"image/png"
,
"data"
:
"AAAA"
,
}
assert
_img_url
(
source
)
==
"data:image/png;base64,AAAA"
def
test_url_source
(
self
):
source
=
{
"type"
:
"url"
,
"url"
:
"https://example.com/image.jpg"
,
}
assert
_img_url
(
source
)
==
"https://example.com/image.jpg"
def
test_missing_type_defaults_to_base64
(
self
):
"""When 'type' is absent, treat as base64."""
source
=
{
"media_type"
:
"image/webp"
,
"data"
:
"UklGR"
,
}
assert
_img_url
(
source
)
==
"data:image/webp;base64,UklGR"
def
test_missing_media_type_defaults_to_jpeg
(
self
):
source
=
{
"type"
:
"base64"
,
"data"
:
"abc123"
}
assert
_img_url
(
source
)
==
"data:image/jpeg;base64,abc123"
def
test_url_source_missing_url_returns_empty
(
self
):
source
=
{
"type"
:
"url"
}
assert
_img_url
(
source
)
==
""
def
test_empty_source_returns_data_uri_shell
(
self
):
source
:
dict
=
{}
assert
_img_url
(
source
)
==
"data:image/jpeg;base64,"
# ======================================================================
# Image blocks inside user messages
# ======================================================================
class
TestImageContentBlocks
:
def
test_base64_image_in_user_message
(
self
):
request
=
_make_request
(
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"Describe this image"
},
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"image/jpeg"
,
"data"
:
"iVBORw0KGgo="
,
},
},
],
}
]
)
result
=
_convert
(
request
)
user_msg
=
result
.
messages
[
0
]
assert
user_msg
[
"role"
]
==
"user"
parts
=
user_msg
[
"content"
]
assert
len
(
parts
)
==
2
assert
parts
[
0
]
==
{
"type"
:
"text"
,
"text"
:
"Describe this image"
}
assert
parts
[
1
]
==
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"data:image/jpeg;base64,iVBORw0KGgo="
},
}
def
test_url_image_in_user_message
(
self
):
request
=
_make_request
(
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"text"
,
"text"
:
"What is this?"
},
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"url"
,
"url"
:
"https://example.com/cat.png"
,
},
},
],
}
]
)
result
=
_convert
(
request
)
parts
=
result
.
messages
[
0
][
"content"
]
assert
parts
[
1
]
==
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://example.com/cat.png"
},
}
# ======================================================================
# tool_result content handling
# ======================================================================
class
TestToolResultContent
:
def
_make_tool_result_request
(
self
,
tool_result_content
)
->
AnthropicMessagesRequest
:
"""Build a request with assistant tool_use followed by user
tool_result."""
return
_make_request
(
[
{
"role"
:
"assistant"
,
"content"
:
[
{
"type"
:
"tool_use"
,
"id"
:
"call_001"
,
"name"
:
"read_file"
,
"input"
:
{
"path"
:
"/tmp/img.png"
},
}
],
},
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"tool_result"
,
"tool_use_id"
:
"call_001"
,
"content"
:
tool_result_content
,
}
],
},
]
)
def
test_tool_result_string_content
(
self
):
request
=
self
.
_make_tool_result_request
(
"file contents here"
)
result
=
_convert
(
request
)
tool_msg
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"tool"
]
assert
len
(
tool_msg
)
==
1
assert
tool_msg
[
0
][
"content"
]
==
"file contents here"
assert
tool_msg
[
0
][
"tool_call_id"
]
==
"call_001"
def
test_tool_result_text_blocks
(
self
):
request
=
self
.
_make_tool_result_request
(
[
{
"type"
:
"text"
,
"text"
:
"line 1"
},
{
"type"
:
"text"
,
"text"
:
"line 2"
},
]
)
result
=
_convert
(
request
)
tool_msg
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"tool"
]
assert
len
(
tool_msg
)
==
1
assert
tool_msg
[
0
][
"content"
]
==
"line 1
\n
line 2"
def
test_tool_result_with_image
(
self
):
"""Image in tool_result should produce a follow-up user message."""
request
=
self
.
_make_tool_result_request
(
[
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"image/png"
,
"data"
:
"AAAA"
,
},
}
]
)
result
=
_convert
(
request
)
tool_msg
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"tool"
]
assert
len
(
tool_msg
)
==
1
assert
tool_msg
[
0
][
"content"
]
==
""
# The image should be injected as a follow-up user message
follow_up
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"user"
and
isinstance
(
m
.
get
(
"content"
),
list
)
]
assert
len
(
follow_up
)
==
1
img_parts
=
follow_up
[
0
][
"content"
]
assert
len
(
img_parts
)
==
1
assert
img_parts
[
0
]
==
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"data:image/png;base64,AAAA"
},
}
def
test_tool_result_with_text_and_image
(
self
):
"""Mixed text+image tool_result: text in tool msg, image in user
msg."""
request
=
self
.
_make_tool_result_request
(
[
{
"type"
:
"text"
,
"text"
:
"Here is the screenshot"
},
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"image/jpeg"
,
"data"
:
"QUFB"
,
},
},
]
)
result
=
_convert
(
request
)
tool_msg
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"tool"
]
assert
len
(
tool_msg
)
==
1
assert
tool_msg
[
0
][
"content"
]
==
"Here is the screenshot"
follow_up
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"user"
and
isinstance
(
m
.
get
(
"content"
),
list
)
]
assert
len
(
follow_up
)
==
1
assert
follow_up
[
0
][
"content"
][
0
][
"image_url"
][
"url"
]
==
(
"data:image/jpeg;base64,QUFB"
)
def
test_tool_result_with_multiple_images
(
self
):
request
=
self
.
_make_tool_result_request
(
[
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"base64"
,
"media_type"
:
"image/png"
,
"data"
:
"IMG1"
,
},
},
{
"type"
:
"image"
,
"source"
:
{
"type"
:
"url"
,
"url"
:
"https://example.com/img2.jpg"
,
},
},
]
)
result
=
_convert
(
request
)
follow_up
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"user"
and
isinstance
(
m
.
get
(
"content"
),
list
)
]
assert
len
(
follow_up
)
==
1
urls
=
[
p
[
"image_url"
][
"url"
]
for
p
in
follow_up
[
0
][
"content"
]]
assert
urls
==
[
"data:image/png;base64,IMG1"
,
"https://example.com/img2.jpg"
,
]
def
test_tool_result_none_content
(
self
):
request
=
self
.
_make_tool_result_request
(
None
)
result
=
_convert
(
request
)
tool_msg
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"tool"
]
assert
len
(
tool_msg
)
==
1
assert
tool_msg
[
0
][
"content"
]
==
""
def
test_tool_result_no_follow_up_when_no_images
(
self
):
"""Ensure no extra user message is added when there are no images."""
request
=
self
.
_make_tool_result_request
(
[
{
"type"
:
"text"
,
"text"
:
"just text"
},
]
)
result
=
_convert
(
request
)
user_follow_ups
=
[
m
for
m
in
result
.
messages
if
m
[
"role"
]
==
"user"
and
isinstance
(
m
.
get
(
"content"
),
list
)
]
assert
len
(
user_follow_ups
)
==
0
vllm/entrypoints/anthropic/serving.py
View file @
95a395db
...
...
@@ -86,8 +86,30 @@ class AnthropicServingMessages(OpenAIServingChat):
"tool_calls"
:
"tool_use"
,
}
@
staticmethod
def
_convert_image_source_to_url
(
source
:
dict
[
str
,
Any
])
->
str
:
"""Convert an Anthropic image source to an OpenAI-compatible URL.
Anthropic supports two image source types:
- base64: {"type": "base64", "media_type": "image/jpeg", "data": "..."}
- url: {"type": "url", "url": "https://..."}
For base64 sources, this constructs a proper data URI that
downstream processors (e.g. vLLM's media connector) can handle.
"""
source_type
=
source
.
get
(
"type"
)
if
source_type
==
"url"
:
return
source
.
get
(
"url"
,
""
)
# Default to base64 processing if type is "base64"
# or missing, ensuring a proper data URI is always
# constructed for non-URL sources.
media_type
=
source
.
get
(
"media_type"
,
"image/jpeg"
)
data
=
source
.
get
(
"data"
,
""
)
return
f
"data:
{
media_type
}
;base64,
{
data
}
"
@
classmethod
def
_convert_anthropic_to_openai_request
(
self
,
anthropic_request
:
AnthropicMessagesRequest
cls
,
anthropic_request
:
AnthropicMessagesRequest
)
->
ChatCompletionRequest
:
"""Convert Anthropic message format to OpenAI format"""
openai_messages
=
[]
...
...
@@ -119,10 +141,11 @@ class AnthropicServingMessages(OpenAIServingChat):
if
block
.
type
==
"text"
and
block
.
text
:
content_parts
.
append
({
"type"
:
"text"
,
"text"
:
block
.
text
})
elif
block
.
type
==
"image"
and
block
.
source
:
image_url
=
cls
.
_convert_image_source_to_url
(
block
.
source
)
content_parts
.
append
(
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
block
.
source
.
get
(
"data"
,
""
)
},
"image_url"
:
{
"url"
:
image_url
},
}
)
elif
block
.
type
==
"thinking"
and
block
.
thinking
is
not
None
:
...
...
@@ -140,13 +163,48 @@ class AnthropicServingMessages(OpenAIServingChat):
tool_calls
.
append
(
tool_call
)
elif
block
.
type
==
"tool_result"
:
if
msg
.
role
==
"user"
:
# Parse tool_result content which can be
# a string or a list of content blocks
# (text, image, etc.)
tool_text
=
""
tool_image_urls
:
list
[
str
]
=
[]
if
isinstance
(
block
.
content
,
str
):
tool_text
=
block
.
content
elif
isinstance
(
block
.
content
,
list
):
text_parts
:
list
[
str
]
=
[]
for
item
in
block
.
content
:
if
not
isinstance
(
item
,
dict
):
continue
item_type
=
item
.
get
(
"type"
)
if
item_type
==
"text"
:
text_parts
.
append
(
item
.
get
(
"text"
,
""
))
elif
item_type
==
"image"
:
source
=
item
.
get
(
"source"
,
{})
url
=
cls
.
_convert_image_source_to_url
(
source
)
if
url
:
tool_image_urls
.
append
(
url
)
tool_text
=
"
\n
"
.
join
(
text_parts
)
openai_messages
.
append
(
{
"role"
:
"tool"
,
"tool_call_id"
:
block
.
tool_use_id
or
""
,
"content"
:
str
(
block
.
content
)
if
block
.
content
else
""
,
"content"
:
tool_text
or
""
,
}
)
# OpenAI tool messages only support string
# content, so inject images from tool
# results as a follow-up user message
if
tool_image_urls
:
openai_messages
.
append
(
{
"role"
:
"user"
,
"content"
:
[
# type: ignore[dict-item]
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
img
},
}
for
img
in
tool_image_urls
],
}
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment