Unverified Commit fca3f466 authored by Benjamin Bartels's avatar Benjamin Bartels Committed by GitHub
Browse files

[Frontend] Fixes anthropic /v1/messages streaming not containing input_tokens...


[Frontend] Fixes anthropic /v1/messages streaming not containing input_tokens on first chunk (#29971)
Signed-off-by: default avatarbbartels <benjamin@bartels.dev>
parent 28097d56
...@@ -69,9 +69,20 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic): ...@@ -69,9 +69,20 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
stream=True, stream=True,
) )
first_chunk = None
chunk_count = 0
async for chunk in resp: async for chunk in resp:
chunk_count += 1
if first_chunk is None and chunk.type == "message_start":
first_chunk = chunk
print(chunk.model_dump_json()) print(chunk.model_dump_json())
assert chunk_count > 0
assert first_chunk is not None, "message_start chunk was never observed"
assert first_chunk.usage is not None, "first chunk should include usage stats"
assert first_chunk.usage["output_tokens"] == 0
assert first_chunk.usage["input_tokens"] > 5
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic): async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
......
...@@ -183,7 +183,9 @@ class AnthropicServingMessages(OpenAIServingChat): ...@@ -183,7 +183,9 @@ class AnthropicServingMessages(OpenAIServingChat):
if anthropic_request.stream: if anthropic_request.stream:
req.stream = anthropic_request.stream req.stream = anthropic_request.stream
req.stream_options = StreamOptions.validate({"include_usage": True}) req.stream_options = StreamOptions.validate(
{"include_usage": True, "continuous_usage_stats": True}
)
if anthropic_request.tool_choice is None: if anthropic_request.tool_choice is None:
req.tool_choice = None req.tool_choice = None
...@@ -323,6 +325,12 @@ class AnthropicServingMessages(OpenAIServingChat): ...@@ -323,6 +325,12 @@ class AnthropicServingMessages(OpenAIServingChat):
content=[], content=[],
model=origin_chunk.model, model=origin_chunk.model,
), ),
usage=AnthropicUsage(
input_tokens=origin_chunk.usage.prompt_tokens
if origin_chunk.usage
else 0,
output_tokens=0,
),
) )
first_item = False first_item = False
data = chunk.model_dump_json(exclude_unset=True) data = chunk.model_dump_json(exclude_unset=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment