Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2887cd1c
Unverified
Commit
2887cd1c
authored
Mar 30, 2026
by
ishandhanani
Committed by
GitHub
Mar 30, 2026
Browse files
refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)
parent
d6136f4a
Changes
32
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1379 additions
and
1322 deletions
+1379
-1322
lib/async-openai/src/types/anthropic.rs
lib/async-openai/src/types/anthropic.rs
+869
-0
lib/async-openai/src/types/chat.rs
lib/async-openai/src/types/chat.rs
+0
-8
lib/async-openai/src/types/completion.rs
lib/async-openai/src/types/completion.rs
+0
-4
lib/async-openai/src/types/mod.rs
lib/async-openai/src/types/mod.rs
+1
-0
lib/llm/src/audit/stream.rs
lib/llm/src/audit/stream.rs
+95
-81
lib/llm/src/entrypoint/input/batch.rs
lib/llm/src/entrypoint/input/batch.rs
+3
-2
lib/llm/src/entrypoint/input/text.rs
lib/llm/src/entrypoint/input/text.rs
+3
-2
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+24
-20
lib/llm/src/perf/logprobs.rs
lib/llm/src/perf/logprobs.rs
+76
-68
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+1
-1
lib/llm/src/preprocessor/speculative_prefill.rs
lib/llm/src/preprocessor/speculative_prefill.rs
+1
-1
lib/llm/src/protocols/anthropic/stream_converter.rs
lib/llm/src/protocols/anthropic/stream_converter.rs
+81
-75
lib/llm/src/protocols/anthropic/types.rs
lib/llm/src/protocols/anthropic/types.rs
+44
-897
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+16
-13
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+107
-97
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+25
-21
lib/llm/src/protocols/openai/chat_completions/jail.rs
lib/llm/src/protocols/openai/chat_completions/jail.rs
+19
-17
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+3
-2
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+8
-8
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+3
-5
No files found.
lib/async-openai/src/types/anthropic.rs
0 → 100644
View file @
2887cd1c
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Anthropic Messages API types.
//!
//! Pure protocol types for the `/v1/messages` endpoint -- request, response,
//! streaming events, error shapes, and count-tokens types.
use
serde
::{
Deserialize
,
Serialize
};
use
utoipa
::
ToSchema
;
/// Anthropic-style cache control hint for prefix pinning with TTL.
#[derive(ToSchema,
Serialize,
Deserialize,
Debug,
Clone,
Default,
PartialEq)]
pub
struct
CacheControl
{
#[serde(rename
=
"type"
)]
pub
control_type
:
CacheControlType
,
/// TTL as seconds (integer) or shorthand ("5m" = 300s, "1h" = 3600s). Clamped to [300, 3600].
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
ttl
:
Option
<
String
>
,
}
#[derive(ToSchema,
Serialize,
Deserialize,
Debug,
Clone,
Default,
PartialEq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
CacheControlType
{
#[default]
Ephemeral
,
#[serde(other)]
Unknown
,
}
const
MIN_TTL_SECONDS
:
u64
=
300
;
const
MAX_TTL_SECONDS
:
u64
=
3600
;
impl
CacheControl
{
/// Parse TTL string to seconds, clamped to [300, 3600].
///
/// Accepts integer seconds ("120", "600") or shorthand ("5m", "1h").
/// Values below 300 are clamped to 300; values above 3600 are clamped to 3600.
/// Unrecognized strings default to 300s.
pub
fn
ttl_seconds
(
&
self
)
->
u64
{
let
raw
=
match
self
.ttl
.as_deref
()
{
None
=>
return
MIN_TTL_SECONDS
,
Some
(
"5m"
)
=>
300
,
Some
(
"1h"
)
=>
3600
,
Some
(
other
)
=>
match
other
.parse
::
<
u64
>
()
{
Ok
(
secs
)
=>
secs
,
Err
(
_
)
=>
{
tracing
::
warn!
(
"Unrecognized TTL '{}', defaulting to 300s"
,
other
);
return
MIN_TTL_SECONDS
;
}
},
};
raw
.clamp
(
MIN_TTL_SECONDS
,
MAX_TTL_SECONDS
)
}
}
/// Parsed system prompt content, preserving cache_control from block arrays.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
SystemContent
{
/// The concatenated text from all system blocks (or the plain string).
pub
text
:
String
,
/// Cache control from the last system block that had one.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
}
/// Deserialize `system` from either a plain string or an array of text blocks.
/// The Anthropic API accepts both `"system": "text"` and
/// `"system": [{"type": "text", "text": "...", "cache_control": {...}}]`.
fn
deserialize_system_prompt
<
'de
,
D
>
(
deserializer
:
D
)
->
Result
<
Option
<
SystemContent
>
,
D
::
Error
>
where
D
:
serde
::
Deserializer
<
'de
>
,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum
SystemPrompt
{
Text
(
String
),
Blocks
(
Vec
<
SystemBlock
>
),
}
#[derive(Deserialize)]
struct
SystemBlock
{
text
:
String
,
#[serde(default)]
cache_control
:
Option
<
CacheControl
>
,
}
let
maybe
:
Option
<
SystemPrompt
>
=
Option
::
deserialize
(
deserializer
)
?
;
Ok
(
maybe
.map
(|
sp
|
match
sp
{
SystemPrompt
::
Text
(
s
)
=>
SystemContent
{
text
:
s
,
cache_control
:
None
,
},
SystemPrompt
::
Blocks
(
blocks
)
=>
{
let
cache_control
=
blocks
.iter
()
.rev
()
.find_map
(|
b
|
b
.cache_control
.clone
());
let
text
=
blocks
.into_iter
()
.map
(|
b
|
b
.text
)
.collect
::
<
Vec
<
_
>>
()
.join
(
"
\n
"
);
SystemContent
{
text
,
cache_control
,
}
}
}))
}
/// Top-level request body for `POST /v1/messages`.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicCreateMessageRequest
{
/// The model to use (e.g. "claude-sonnet-4-20250514").
pub
model
:
String
,
/// The maximum number of tokens to generate.
pub
max_tokens
:
u32
,
/// The conversation messages.
pub
messages
:
Vec
<
AnthropicMessage
>
,
/// Optional system prompt (string or array of `{"type":"text","text":"..."}` blocks).
#[serde(
default,
skip_serializing_if
=
"Option::is_none"
,
deserialize_with
=
"deserialize_system_prompt"
)]
pub
system
:
Option
<
SystemContent
>
,
/// Sampling temperature (0.0 - 1.0).
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
temperature
:
Option
<
f32
>
,
/// Nucleus sampling parameter.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_p
:
Option
<
f32
>
,
/// Top-K sampling parameter.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_k
:
Option
<
u32
>
,
/// Custom stop sequences.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_sequences
:
Option
<
Vec
<
String
>>
,
/// Whether to stream the response.
#[serde(default)]
pub
stream
:
bool
,
/// Optional metadata (e.g. user_id).
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
metadata
:
Option
<
serde_json
::
Value
>
,
/// Tools the model may call.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
tools
:
Option
<
Vec
<
AnthropicTool
>>
,
/// How the model should choose which tool to call.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
tool_choice
:
Option
<
AnthropicToolChoice
>
,
/// Top-level cache control for automatic prompt prefix caching.
/// When present, the system caches all content up to the last cacheable block.
/// Matches the Anthropic Messages API automatic caching mode.
/// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#automatic-caching
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
/// Extended thinking configuration. When enabled, the model produces
/// `thinking` content blocks containing its internal reasoning before
/// the final response. The `budget_tokens` field controls how many tokens
/// the model may use for thinking (must be >= 1024 and < max_tokens).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
thinking
:
Option
<
ThinkingConfig
>
,
/// Service tier selection: `"auto"` or `"standard_only"`.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
service_tier
:
Option
<
String
>
,
/// Container identifier for stateful sandbox sessions.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
container
:
Option
<
String
>
,
/// Output configuration: effort level and optional JSON schema format.
/// `effort` can be `"low"`, `"medium"`, `"high"`, or `"max"`.
/// `format` specifies structured JSON output constraints.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
output_config
:
Option
<
serde_json
::
Value
>
,
}
/// Extended thinking configuration for the request.
///
/// When `type` is `"enabled"`, the model will produce `thinking` content blocks
/// with its internal reasoning. `budget_tokens` controls the maximum tokens
/// available for thinking (minimum 1024, must be less than `max_tokens`).
/// When `type` is `"disabled"`, no thinking blocks are produced.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
ThinkingConfig
{
/// Either `"enabled"` or `"disabled"`.
#[serde(rename
=
"type"
)]
pub
thinking_type
:
String
,
/// Maximum tokens for internal reasoning. Only relevant when type is "enabled".
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
budget_tokens
:
Option
<
u32
>
,
}
/// A single message in the conversation.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessage
{
pub
role
:
AnthropicRole
,
#[serde(flatten)]
pub
content
:
AnthropicMessageContent
,
}
/// The role of a message sender.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
AnthropicRole
{
User
,
Assistant
,
}
/// Message content -- either a plain string or an array of content blocks.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
AnthropicMessageContent
{
/// Plain text content.
Text
{
content
:
String
},
/// Array of structured content blocks.
Blocks
{
content
:
Vec
<
AnthropicContentBlock
>
},
}
/// A single content block within a message.
///
/// Uses a custom deserializer so that unknown block types (e.g. `citations`,
/// `server_tool_use`, `redacted_thinking`) are captured as `Other(Value)` instead
/// of causing a hard deserialization failure. This is important because Claude
/// Code may send block types that we don't yet handle.
#[derive(Debug,
Clone,
Serialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicContentBlock
{
/// Text content block. May optionally include `citations` -- references to
/// source documents that support the text content. Citations are generated
/// by the model when document/PDF content is provided and citation mode is enabled.
#[serde(rename
=
"text"
)]
Text
{
text
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Image content block.
#[serde(rename
=
"image"
)]
Image
{
source
:
AnthropicImageSource
},
/// Tool use request from assistant.
#[serde(rename
=
"tool_use"
)]
ToolUse
{
id
:
String
,
name
:
String
,
input
:
serde_json
::
Value
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Tool result from user.
#[serde(rename
=
"tool_result"
)]
ToolResult
{
tool_use_id
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
content
:
Option
<
ToolResultContent
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
is_error
:
Option
<
bool
>
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Thinking content block from assistant (extended thinking / reasoning).
#[serde(rename
=
"thinking"
)]
Thinking
{
thinking
:
String
,
signature
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Redacted thinking block from assistant. Contains encrypted reasoning data
/// that is opaque to the client but must be passed back verbatim in multi-turn
/// conversations so the model can maintain its chain of thought.
#[serde(rename
=
"redacted_thinking"
)]
RedactedThinking
{
data
:
String
},
/// Server-initiated tool use block. Represents a tool call that the API
/// executes server-side (e.g., web search). The client receives the result
/// via a corresponding `web_search_tool_result` or similar block.
#[serde(rename
=
"server_tool_use"
)]
ServerToolUse
{
id
:
String
,
name
:
String
,
#[serde(default)]
input
:
serde_json
::
Value
,
},
/// Result from a server-initiated tool (e.g., web search results).
/// Contains structured content returned by the server-side tool execution.
#[serde(rename
=
"web_search_tool_result"
)]
WebSearchToolResult
{
tool_use_id
:
String
,
#[serde(default)]
content
:
serde_json
::
Value
,
},
/// Catch-all for unrecognized block types. Preserves the full JSON value
/// so that new Anthropic features don't break the endpoint and can be
/// round-tripped or inspected.
#[serde(untagged)]
Other
(
serde_json
::
Value
),
}
/// Content of a `tool_result` block -- either a plain string or an array of
/// content blocks (the Anthropic API accepts both).
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
ToolResultContent
{
Text
(
String
),
Blocks
(
Vec
<
ToolResultContentBlock
>
),
}
impl
ToolResultContent
{
/// Extract the text content, concatenating array blocks if needed.
pub
fn
into_text
(
self
)
->
String
{
match
self
{
ToolResultContent
::
Text
(
s
)
=>
s
,
ToolResultContent
::
Blocks
(
blocks
)
=>
blocks
.into_iter
()
.filter_map
(|
b
|
match
b
{
ToolResultContentBlock
::
Text
{
text
}
=>
Some
(
text
),
ToolResultContentBlock
::
Other
(
_
)
=>
None
,
})
.collect
::
<
Vec
<
_
>>
()
.join
(
""
),
}
}
}
/// A content block within a `tool_result.content` array.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
ToolResultContentBlock
{
Text
{
text
:
String
,
},
/// Catch-all for non-text blocks (images, etc.) in tool results.
Other
(
serde_json
::
Value
),
}
/// Custom deserializer for `AnthropicContentBlock` that handles unknown types
/// gracefully. Since serde's `#[serde(other)]` is not supported on internally
/// tagged enums, we deserialize as `Value` first and dispatch manually.
impl
<
'de
>
Deserialize
<
'de
>
for
AnthropicContentBlock
{
fn
deserialize
<
D
>
(
deserializer
:
D
)
->
Result
<
Self
,
D
::
Error
>
where
D
:
serde
::
Deserializer
<
'de
>
,
{
let
value
=
serde_json
::
Value
::
deserialize
(
deserializer
)
?
;
let
block_type
=
value
.get
(
"type"
)
.and_then
(|
t
|
t
.as_str
())
.unwrap_or
(
""
)
.to_string
();
match
block_type
.as_str
()
{
"text"
=>
{
let
text
=
value
.get
(
"text"
)
.and_then
(|
t
|
t
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"text"
))
?
.to_string
();
let
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
=
value
.get
(
"citations"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
Text
{
text
,
citations
,
cache_control
,
})
}
"image"
=>
{
let
source
:
AnthropicImageSource
=
serde_json
::
from_value
(
value
.get
(
"source"
)
.cloned
()
.unwrap_or_default
())
.map_err
(
serde
::
de
::
Error
::
custom
)
?
;
Ok
(
AnthropicContentBlock
::
Image
{
source
})
}
"tool_use"
=>
{
let
id
=
value
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"id"
))
?
.to_string
();
let
name
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"name"
))
?
.to_string
();
let
input
=
value
.get
(
"input"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
({}));
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
ToolUse
{
id
,
name
,
input
,
cache_control
,
})
}
"tool_result"
=>
{
let
tool_use_id
=
value
.get
(
"tool_use_id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"tool_use_id"
))
?
.to_string
();
let
content
:
Option
<
ToolResultContent
>
=
value
.get
(
"content"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
let
is_error
=
value
.get
(
"is_error"
)
.and_then
(|
v
|
v
.as_bool
());
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
ToolResult
{
tool_use_id
,
content
,
is_error
,
cache_control
,
})
}
"thinking"
=>
{
let
thinking
=
value
.get
(
"thinking"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"thinking"
))
?
.to_string
();
let
signature
=
value
.get
(
"signature"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"signature"
))
?
.to_string
();
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
Thinking
{
thinking
,
signature
,
cache_control
,
})
}
"redacted_thinking"
=>
{
let
data
=
value
.get
(
"data"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"data"
))
?
.to_string
();
Ok
(
AnthropicContentBlock
::
RedactedThinking
{
data
})
}
"server_tool_use"
=>
{
let
id
=
value
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"id"
))
?
.to_string
();
let
name
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"name"
))
?
.to_string
();
let
input
=
value
.get
(
"input"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
({}));
Ok
(
AnthropicContentBlock
::
ServerToolUse
{
id
,
name
,
input
})
}
"web_search_tool_result"
=>
{
let
tool_use_id
=
value
.get
(
"tool_use_id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"tool_use_id"
))
?
.to_string
();
let
content
=
value
.get
(
"content"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
([]));
Ok
(
AnthropicContentBlock
::
WebSearchToolResult
{
tool_use_id
,
content
,
})
}
other
=>
{
tracing
::
debug!
(
"Unrecognized Anthropic content block type '{}', preserving as Other"
,
other
);
Ok
(
AnthropicContentBlock
::
Other
(
value
))
}
}
}
}
/// Image source for image content blocks.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicImageSource
{
#[serde(rename
=
"type"
)]
pub
source_type
:
String
,
pub
media_type
:
String
,
pub
data
:
String
,
}
/// A tool definition.
///
/// Client tools (custom) require `name` + `input_schema`. Server tools
/// (web_search, bash, text_editor, code_execution, etc.) are discriminated
/// by their `type` field (e.g. `"web_search_20260209"`) and may not have
/// `input_schema`. We keep all fields optional beyond `name` so both
/// kinds deserialize successfully and pass through to the backend.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicTool
{
/// Tool name (required for client tools, present on server tools too).
pub
name
:
String
,
/// Tool type discriminator. Client tools use `"custom"` (or omit).
/// Server tools use versioned types like `"web_search_20260209"`.
#[serde(default,
rename
=
"type"
,
skip_serializing_if
=
"Option::is_none"
)]
pub
tool_type
:
Option
<
String
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
description
:
Option
<
String
>
,
/// JSON Schema for the tool input. Required for client tools, absent on
/// server tools (which define their own input shape server-side).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
input_schema
:
Option
<
serde_json
::
Value
>
,
/// Cache control breakpoint on this tool definition.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
}
/// Tool choice specification.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
AnthropicToolChoice
{
/// Named tool: `{type: "tool", name: "..."}`
/// Must be listed before Simple so serde tries the stricter shape first.
Named
(
AnthropicToolChoiceNamed
),
/// Simple mode: "auto", "any", or "none".
Simple
(
AnthropicToolChoiceSimple
),
}
/// Simple tool choice modes.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicToolChoiceSimple
{
#[serde(rename
=
"type"
)]
pub
choice_type
:
AnthropicToolChoiceMode
,
/// When true, the model will call tools one at a time instead of
/// potentially issuing multiple tool calls in a single response.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
disable_parallel_tool_use
:
Option
<
bool
>
,
}
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
AnthropicToolChoiceMode
{
Auto
,
Any
,
None
,
Tool
,
}
/// Named tool choice.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicToolChoiceNamed
{
#[serde(rename
=
"type"
)]
pub
choice_type
:
AnthropicToolChoiceMode
,
pub
name
:
String
,
/// When true, the model will call tools one at a time instead of
/// potentially issuing multiple tool calls in a single response.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
disable_parallel_tool_use
:
Option
<
bool
>
,
}
/// Response body for `POST /v1/messages` (non-streaming).
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessageResponse
{
pub
id
:
String
,
#[serde(rename
=
"type"
)]
pub
object_type
:
String
,
pub
role
:
String
,
pub
content
:
Vec
<
AnthropicResponseContentBlock
>
,
pub
model
:
String
,
pub
stop_reason
:
Option
<
AnthropicStopReason
>
,
pub
stop_sequence
:
Option
<
String
>
,
pub
usage
:
AnthropicUsage
,
}
/// A content block in the response.
///
/// The Anthropic API returns up to 12 different block types. We model the
/// common ones explicitly and catch the rest as `Other` so the proxy can
/// forward them without losing data.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicResponseContentBlock
{
#[serde(rename
=
"thinking"
)]
Thinking
{
thinking
:
String
,
signature
:
String
},
#[serde(rename
=
"text"
)]
Text
{
text
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
,
},
#[serde(rename
=
"tool_use"
)]
ToolUse
{
id
:
String
,
name
:
String
,
input
:
serde_json
::
Value
,
},
#[serde(rename
=
"redacted_thinking"
)]
RedactedThinking
{
data
:
String
},
#[serde(rename
=
"server_tool_use"
)]
ServerToolUse
{
id
:
String
,
name
:
String
,
#[serde(default)]
input
:
serde_json
::
Value
,
},
#[serde(rename
=
"web_search_tool_result"
)]
WebSearchToolResult
{
tool_use_id
:
String
,
#[serde(default)]
content
:
serde_json
::
Value
,
},
/// Catch-all for new/uncommon block types (web_fetch_tool_result,
/// code_execution_tool_result, container_upload, etc.) so the proxy
/// can serialize them back without data loss.
#[serde(untagged)]
Other
(
serde_json
::
Value
),
}
/// Token usage information.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
Default)]
pub
struct
AnthropicUsage
{
pub
input_tokens
:
u32
,
pub
output_tokens
:
u32
,
/// Number of input tokens used to create a new cache entry.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_creation_input_tokens
:
Option
<
u32
>
,
/// Number of input tokens read from the prompt cache (prefix cache hits).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_read_input_tokens
:
Option
<
u32
>
,
}
/// Reason the model stopped generating.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"snake_case"
)]
pub
enum
AnthropicStopReason
{
EndTurn
,
MaxTokens
,
StopSequence
,
ToolUse
,
/// The model paused to yield control in an agentic loop, intending to
/// continue in a subsequent turn. Used with extended thinking / tool use.
PauseTurn
,
/// The model refused to generate content (safety refusal).
Refusal
,
}
/// SSE event types for the Anthropic streaming API.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicStreamEvent
{
#[serde(rename
=
"message_start"
)]
MessageStart
{
message
:
AnthropicMessageResponse
},
#[serde(rename
=
"content_block_start"
)]
ContentBlockStart
{
index
:
u32
,
content_block
:
AnthropicResponseContentBlock
,
},
#[serde(rename
=
"content_block_delta"
)]
ContentBlockDelta
{
index
:
u32
,
delta
:
AnthropicDelta
},
#[serde(rename
=
"content_block_stop"
)]
ContentBlockStop
{
index
:
u32
},
#[serde(rename
=
"message_delta"
)]
MessageDelta
{
delta
:
AnthropicMessageDeltaBody
,
usage
:
AnthropicUsage
,
},
#[serde(rename
=
"message_stop"
)]
MessageStop
{},
#[serde(rename
=
"ping"
)]
Ping
{},
#[serde(rename
=
"error"
)]
Error
{
error
:
AnthropicErrorBody
},
}
/// Delta content in a streaming content_block_delta event.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicDelta
{
#[serde(rename
=
"thinking_delta"
)]
ThinkingDelta
{
thinking
:
String
},
#[serde(rename
=
"text_delta"
)]
TextDelta
{
text
:
String
},
#[serde(rename
=
"input_json_delta"
)]
InputJsonDelta
{
partial_json
:
String
},
/// Incremental signature for a thinking block (sent at the end).
#[serde(rename
=
"signature_delta"
)]
SignatureDelta
{
signature
:
String
},
/// Incremental citation attached to a text block.
#[serde(rename
=
"citations_delta"
)]
CitationsDelta
{
citation
:
serde_json
::
Value
},
}
/// The delta body in a message_delta event.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessageDeltaBody
{
pub
stop_reason
:
Option
<
AnthropicStopReason
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_sequence
:
Option
<
String
>
,
}
/// Anthropic API error response wrapper.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicErrorResponse
{
#[serde(rename
=
"type"
)]
pub
object_type
:
String
,
pub
error
:
AnthropicErrorBody
,
}
/// Error body within an error response.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicErrorBody
{
#[serde(rename
=
"type"
)]
pub
error_type
:
String
,
pub
message
:
String
,
}
impl
AnthropicErrorResponse
{
/// Create an `invalid_request_error` response.
pub
fn
invalid_request
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"invalid_request_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
/// Create an `api_error` (internal server error) response.
pub
fn
api_error
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"api_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
/// Create a `not_found_error` response.
pub
fn
not_found
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"not_found_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
}
/// Request body for `POST /v1/messages/count_tokens`.
#[derive(Debug,
Clone,
Deserialize)]
pub
struct
AnthropicCountTokensRequest
{
pub
model
:
String
,
pub
messages
:
Vec
<
AnthropicMessage
>
,
#[serde(
default,
skip_serializing_if
=
"Option::is_none"
,
deserialize_with
=
"deserialize_system_prompt"
)]
pub
system
:
Option
<
SystemContent
>
,
#[serde(default)]
pub
tools
:
Option
<
Vec
<
AnthropicTool
>>
,
}
/// Response body for `POST /v1/messages/count_tokens`.
#[derive(Debug,
Clone,
Serialize)]
pub
struct
AnthropicCountTokensResponse
{
pub
input_tokens
:
u32
,
}
impl
AnthropicCountTokensRequest
{
/// Estimate input token count using a `len/3` heuristic.
pub
fn
estimate_tokens
(
&
self
)
->
u32
{
let
mut
total_len
:
usize
=
0
;
if
let
Some
(
system
)
=
&
self
.system
{
total_len
+=
system
.text
.len
();
}
for
msg
in
&
self
.messages
{
// Count role
total_len
+=
match
msg
.role
{
AnthropicRole
::
User
=>
4
,
AnthropicRole
::
Assistant
=>
9
,
};
// Count content
match
&
msg
.content
{
AnthropicMessageContent
::
Text
{
content
}
=>
total_len
+=
content
.len
(),
AnthropicMessageContent
::
Blocks
{
content
}
=>
{
for
block
in
content
{
total_len
+=
estimate_block_len
(
block
);
}
}
}
}
if
let
Some
(
tools
)
=
&
self
.tools
{
for
tool
in
tools
{
total_len
+=
tool
.name
.len
();
if
let
Some
(
desc
)
=
&
tool
.description
{
total_len
+=
desc
.len
();
}
if
let
Some
(
schema
)
=
&
tool
.input_schema
{
total_len
+=
schema
.to_string
()
.len
();
}
}
}
let
tokens
=
total_len
/
3
;
if
tokens
==
0
&&
total_len
>
0
{
1
}
else
{
tokens
as
u32
}
}
}
fn
estimate_block_len
(
block
:
&
AnthropicContentBlock
)
->
usize
{
match
block
{
AnthropicContentBlock
::
Text
{
text
,
..
}
=>
text
.len
(),
AnthropicContentBlock
::
ToolUse
{
name
,
input
,
..
}
=>
name
.len
()
+
input
.to_string
()
.len
(),
AnthropicContentBlock
::
ToolResult
{
content
,
..
}
=>
content
.as_ref
()
.map
(|
c
|
match
c
{
ToolResultContent
::
Text
(
s
)
=>
s
.len
(),
ToolResultContent
::
Blocks
(
blocks
)
=>
blocks
.iter
()
.map
(|
b
|
match
b
{
ToolResultContentBlock
::
Text
{
text
}
=>
text
.len
(),
ToolResultContentBlock
::
Other
(
v
)
=>
v
.to_string
()
.len
(),
})
.sum
(),
})
.unwrap_or
(
0
),
AnthropicContentBlock
::
Thinking
{
thinking
,
..
}
=>
thinking
.len
(),
AnthropicContentBlock
::
RedactedThinking
{
data
,
..
}
=>
data
.len
(),
AnthropicContentBlock
::
ServerToolUse
{
name
,
input
,
..
}
=>
{
name
.len
()
+
input
.to_string
()
.len
()
}
AnthropicContentBlock
::
WebSearchToolResult
{
content
,
..
}
=>
content
.to_string
()
.len
(),
AnthropicContentBlock
::
Image
{
..
}
=>
256
,
// rough estimate for image metadata
AnthropicContentBlock
::
Other
(
v
)
=>
v
.to_string
()
.len
(),
}
}
lib/async-openai/src/types/chat.rs
View file @
2887cd1c
...
@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse {
...
@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse {
/// The object type, which is always `chat.completion`.
/// The object type, which is always `chat.completion`.
pub
object
:
String
,
pub
object
:
String
,
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
/// Parsed server side events stream until an \[DONE\] is received from server.
/// Parsed server side events stream until an \[DONE\] is received from server.
...
@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse {
...
@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse {
/// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
/// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
/// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
/// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
#[cfg(test)]
#[cfg(test)]
...
...
lib/async-openai/src/types/completion.rs
View file @
2887cd1c
...
@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse {
...
@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse {
/// The object type, which is always "text_completion"
/// The object type, which is always "text_completion"
pub
object
:
String
,
pub
object
:
String
,
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
/// Parsed server side events stream until an \[DONE\] is received from server.
/// Parsed server side events stream until an \[DONE\] is received from server.
...
...
lib/async-openai/src/types/mod.rs
View file @
2887cd1c
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
//! Types used in OpenAI API requests and responses.
//! Types used in OpenAI API requests and responses.
//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
pub
mod
anthropic
;
mod
assistant
;
mod
assistant
;
mod
assistant_impls
;
mod
assistant_impls
;
mod
assistant_stream
;
mod
assistant_stream
;
...
...
lib/llm/src/audit/stream.rs
View file @
2887cd1c
...
@@ -90,14 +90,16 @@ where
...
@@ -90,14 +90,16 @@ where
tracing
::
warn!
(
"audit: aggregation future canceled/failed"
);
tracing
::
warn!
(
"audit: aggregation future canceled/failed"
);
// Return minimal response if aggregation failed
// Return minimal response if aggregation failed
NvCreateChatCompletionResponse
{
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
})
})
...
@@ -125,14 +127,16 @@ where
...
@@ -125,14 +127,16 @@ where
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
warn!
(
"fold aggregation failed: {e}"
);
tracing
::
warn!
(
"fold aggregation failed: {e}"
);
let
fallback
=
NvCreateChatCompletionResponse
{
let
fallback
=
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
let
_
=
tx
.send
(
fallback
.clone
());
let
_
=
tx
.send
(
fallback
.clone
());
...
@@ -145,14 +149,16 @@ where
...
@@ -145,14 +149,16 @@ where
rx
.await
.unwrap_or_else
(|
_
|
{
rx
.await
.unwrap_or_else
(|
_
|
{
tracing
::
warn!
(
"fold aggregation future canceled"
);
tracing
::
warn!
(
"fold aggregation future canceled"
);
NvCreateChatCompletionResponse
{
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
})
})
...
@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream(
...
@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream(
)
->
std
::
pin
::
Pin
<
)
->
std
::
pin
::
Pin
<
Box
<
dyn
futures
::
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
+
Send
>
,
Box
<
dyn
futures
::
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
+
Send
>
,
>
{
>
{
let
mut
choices
:
Vec
<
ChatChoiceStream
>
=
Vec
::
with_capacity
(
resp
.choices
.len
());
let
mut
choices
:
Vec
<
ChatChoiceStream
>
=
Vec
::
with_capacity
(
resp
.
inner.
choices
.len
());
for
(
idx
,
ch
)
in
resp
.choices
.iter
()
.enumerate
()
{
for
(
idx
,
ch
)
in
resp
.
inner.
choices
.iter
()
.enumerate
()
{
// Convert FunctionCall to FunctionCallStream if present
// Convert FunctionCall to FunctionCallStream if present
#[allow(deprecated)]
#[allow(deprecated)]
let
function_call
=
ch
.message.function_call
.as_ref
()
.map
(|
fc
|
{
let
function_call
=
ch
.message.function_call
.as_ref
()
.map
(|
fc
|
{
...
@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream(
...
@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream(
}
}
let
chunk
=
NvCreateChatCompletionStreamResponse
{
let
chunk
=
NvCreateChatCompletionStreamResponse
{
id
:
resp
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
"chat.completion.chunk"
.to_string
(),
id
:
resp
.inner.id
.clone
(),
created
:
resp
.created
,
object
:
"chat.completion.chunk"
.to_string
(),
model
:
resp
.model
.clone
(),
created
:
resp
.inner.created
,
system_fingerprint
:
resp
.system_fingerprint
.clone
(),
model
:
resp
.inner.model
.clone
(),
service_tier
:
resp
.service_tier
.clone
(),
system_fingerprint
:
resp
.inner.system_fingerprint
.clone
(),
choices
,
service_tier
:
resp
.inner.service_tier
.clone
(),
usage
:
resp
.usage
.clone
(),
choices
,
usage
:
resp
.inner.usage
.clone
(),
},
nvext
:
resp
.nvext
.clone
(),
nvext
:
resp
.nvext
.clone
(),
};
};
...
@@ -275,14 +283,16 @@ mod tests {
...
@@ -275,14 +283,16 @@ mod tests {
};
};
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
choice
],
id
:
"test-id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[
choice
],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -314,14 +324,16 @@ mod tests {
...
@@ -314,14 +324,16 @@ mod tests {
};
};
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
choice
],
id
:
"test-id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[
choice
],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -339,7 +351,7 @@ mod tests {
...
@@ -339,7 +351,7 @@ mod tests {
chunk
chunk
.data
.data
.as_ref
()
.as_ref
()
.and_then
(|
d
|
d
.choices
.first
())
.and_then
(|
d
|
d
.
inner.
choices
.first
())
.and_then
(|
c
|
c
.delta.content
.as_ref
())
.and_then
(|
c
|
c
.delta.content
.as_ref
())
.and_then
(|
content
|
match
content
{
.and_then
(|
content
|
match
content
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.clone
()),
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.clone
()),
...
@@ -396,7 +408,7 @@ mod tests {
...
@@ -396,7 +408,7 @@ mod tests {
assert_eq!
(
results
.len
(),
0
,
"Empty stream should produce no chunks"
);
assert_eq!
(
results
.len
(),
0
,
"Empty stream should produce no chunks"
);
// Verify fallback response (aggregation will fail on empty stream)
// Verify fallback response (aggregation will fail on empty stream)
assert_eq!
(
final_resp
.object
,
"chat.completion"
);
assert_eq!
(
final_resp
.
inner.
object
,
"chat.completion"
);
// Should get fallback response, not panic
// Should get fallback response, not panic
}
}
...
@@ -415,7 +427,7 @@ mod tests {
...
@@ -415,7 +427,7 @@ mod tests {
assert_eq!
(
extract_content
(
&
results
[
0
]),
"Single chunk"
);
assert_eq!
(
extract_content
(
&
results
[
0
]),
"Single chunk"
);
// Verify aggregation
// Verify aggregation
assert_eq!
(
final_resp
.object
,
"chat.completion"
);
assert_eq!
(
final_resp
.
inner.
object
,
"chat.completion"
);
}
}
#[tokio::test]
#[tokio::test]
...
@@ -423,32 +435,34 @@ mod tests {
...
@@ -423,32 +435,34 @@ mod tests {
// Test that metadata (id, event, comment) is preserved through passthrough
// Test that metadata (id, event, comment) is preserved through passthrough
let
chunk_with_metadata
=
Annotated
{
let
chunk_with_metadata
=
Annotated
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[{
id
:
"test-id"
.to_string
(),
#[allow(deprecated)]
choices
:
vec!
[{
ChatChoiceStream
{
#[allow(deprecated)]
index
:
0
,
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
role
:
Some
(
Role
::
Assistant
),
delta
:
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
role
:
Some
(
Role
::
Assistant
),
"Content"
.to_string
(),
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
)),
"Content"
.to_string
(),
tool_calls
:
None
,
)),
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
function_call
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}
logprobs
:
None
,
}],
}
created
:
1234567890
,
}],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}),
}),
id
:
Some
(
"correlation-123"
.to_string
()),
id
:
Some
(
"correlation-123"
.to_string
()),
...
@@ -481,7 +495,7 @@ mod tests {
...
@@ -481,7 +495,7 @@ mod tests {
let
(
resp1
,
resp2
)
=
tokio
::
join!
(
future1
,
future2
);
let
(
resp1
,
resp2
)
=
tokio
::
join!
(
future1
,
future2
);
// Both should complete successfully
// Both should complete successfully
assert_eq!
(
resp1
.object
,
"chat.completion"
);
assert_eq!
(
resp1
.
inner.
object
,
"chat.completion"
);
assert_eq!
(
resp2
.object
,
"chat.completion"
);
assert_eq!
(
resp2
.
inner.
object
,
"chat.completion"
);
}
}
}
}
lib/llm/src/entrypoint/input/batch.rs
View file @
2887cd1c
...
@@ -238,8 +238,9 @@ async fn evaluate(
...
@@ -238,8 +238,9 @@ async fn evaluate(
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
(
Some
(
data
),
_
)
=>
{
(
Some
(
data
),
_
)
=>
{
// Normal case
// Normal case
let
choice
=
data
.choices
.first
();
let
Some
(
chat_comp
)
=
data
.inner.choices
.first
()
else
{
let
chat_comp
=
choice
.as_ref
()
.unwrap
();
continue
;
};
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
match
c
{
match
c
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
...
...
lib/llm/src/entrypoint/input/text.rs
View file @
2887cd1c
...
@@ -138,8 +138,9 @@ async fn main_loop(
...
@@ -138,8 +138,9 @@ async fn main_loop(
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
(
Some
(
data
),
_
)
=>
{
(
Some
(
data
),
_
)
=>
{
// Normal case
// Normal case
let
entry
=
data
.choices
.first
();
let
Some
(
chat_comp
)
=
data
.inner.choices
.first
()
else
{
let
chat_comp
=
entry
.as_ref
()
.unwrap
();
continue
;
};
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
match
c
{
match
c
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
...
...
lib/llm/src/http/service/openai.rs
View file @
2887cd1c
...
@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events(
...
@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events(
};
};
let
mut
events
=
vec!
[];
let
mut
events
=
vec!
[];
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
let
Some
(
tool_calls
)
=
&
choice
.delta.tool_calls
else
{
let
Some
(
tool_calls
)
=
&
choice
.delta.tool_calls
else
{
continue
;
continue
;
};
};
...
@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch(
...
@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch(
};
};
let
mut
events
=
vec!
[];
let
mut
events
=
vec!
[];
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
let
buffer
=
buffers
.entry
(
choice
.index
)
.or_default
();
let
buffer
=
buffers
.entry
(
choice
.index
)
.or_default
();
let
has_reasoning
=
choice
let
has_reasoning
=
choice
.delta
.delta
...
@@ -2892,15 +2892,17 @@ mod tests {
...
@@ -2892,15 +2892,17 @@ mod tests {
// Create a normal data event
// Create a normal data event
let
normal_event
=
Annotated
::
<
NvCreateChatCompletionStreamResponse
>
{
let
normal_event
=
Annotated
::
<
NvCreateChatCompletionStreamResponse
>
{
data
:
Some
(
CreateChatCompletionStreamResponse
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
CreateChatCompletionStreamResponse
{
choices
:
vec!
[],
id
:
"test-id"
.to_string
(),
created
:
0
,
choices
:
vec!
[],
model
:
"test-model"
.to_string
(),
created
:
0
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}),
}),
id
:
Some
(
"msg-1"
.to_string
()),
id
:
Some
(
"msg-1"
.to_string
()),
...
@@ -3162,15 +3164,17 @@ mod tests {
...
@@ -3162,15 +3164,17 @@ mod tests {
fn
make_stream_response
(
fn
make_stream_response
(
choices
:
Vec
<
ChatChoiceStream
>
,
choices
:
Vec
<
ChatChoiceStream
>
,
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
let
response
=
CreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
CreateChatCompletionStreamResponse
{
choices
,
id
:
"test-id"
.to_string
(),
created
:
0
,
choices
,
model
:
"test-model"
.to_string
(),
created
:
0
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
Annotated
{
Annotated
{
...
...
lib/llm/src/perf/logprobs.rs
View file @
2887cd1c
...
@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
...
@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
fn
extract_logprobs_by_choice
(
&
self
)
->
HashMap
<
u32
,
Vec
<
TokenLogProbs
>>
{
fn
extract_logprobs_by_choice
(
&
self
)
->
HashMap
<
u32
,
Vec
<
TokenLogProbs
>>
{
let
mut
result
=
HashMap
::
new
();
let
mut
result
=
HashMap
::
new
();
for
choice
in
&
self
.choices
{
for
choice
in
&
self
.
inner.
choices
{
let
choice_index
=
choice
.index
;
let
choice_index
=
choice
.index
;
let
choice_logprobs
=
choice
let
choice_logprobs
=
choice
...
@@ -949,34 +949,36 @@ mod tests {
...
@@ -949,34 +949,36 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[expect(deprecated)]
#[expect(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"test_id"
.to_string
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
delta
:
ChatCompletionStreamResponseDelta
{
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
:
Some
(
"test"
.to_string
(),
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"test"
.to_string
(),
),
),
),
)
,
function_call
:
None
,
function
_call
:
None
,
tool
_call
s
:
None
,
tool_calls
:
None
,
role
:
Some
(
Role
::
Assistant
)
,
role
:
Some
(
Role
::
Assistant
)
,
refusal
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
}
,
}
,
finish_reason
:
Some
(
FinishReason
::
Stop
)
,
finish
_reason
:
Some
(
FinishReason
::
Stop
)
,
stop
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
Some
(
ChatChoiceLogprobs
{
logprobs
:
Some
(
ChatChoiceL
ogprobs
{
content
:
Some
(
token_l
ogprobs
),
content
:
Some
(
token_logprobs
)
,
refusal
:
None
,
refusal
:
None
,
})
,
}
)
,
}
]
,
}]
,
created
:
1234567890
,
created
:
1234567890
,
model
:
"test-model"
.to_string
()
,
model
:
"test-model"
.to_string
()
,
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
()
,
object
:
"chat.completion.chunk"
.to_string
()
,
usage
:
None
,
usage
:
None
,
}
,
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -1012,14 +1014,16 @@ mod tests {
...
@@ -1012,14 +1014,16 @@ mod tests {
.collect
();
.collect
();
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
,
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
choices
,
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
service_tier
:
None
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -1341,31 +1345,33 @@ mod tests {
...
@@ -1341,31 +1345,33 @@ mod tests {
// Test with choice that has no logprobs
// Test with choice that has no logprobs
#[expect(deprecated)]
#[expect(deprecated)]
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"test_id"
.to_string
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
delta
:
ChatCompletionStreamResponseDelta
{
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
:
Some
(
"test"
.to_string
(),
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"test"
.to_string
(),
),
),
),
)
,
function_call
:
None
,
function
_call
:
None
,
tool
_call
s
:
None
,
tool_calls
:
None
,
role
:
Some
(
Role
::
Assistant
)
,
role
:
Some
(
Role
::
Assistant
)
,
refusal
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
}
,
}
,
finish_reason
:
Some
(
FinishReason
::
Stop
)
,
finish
_reason
:
Some
(
FinishReason
::
Stop
)
,
stop
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
// No logprobs
logprobs
:
None
,
// No logprobs
}],
}]
,
created
:
1234567890
,
created
:
1234567890
,
model
:
"test-model"
.to_string
()
,
model
:
"test-model"
.to_string
()
,
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
()
,
object
:
"chat.completion.chunk"
.to_string
()
,
usage
:
None
,
usage
:
None
,
}
,
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -1573,14 +1579,16 @@ mod tests {
...
@@ -1573,14 +1579,16 @@ mod tests {
// In practice, this would have real logprobs data
// In practice, this would have real logprobs data
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[],
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
service_tier
:
None
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/preprocessor.rs
View file @
2887cd1c
...
@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor {
...
@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor {
let
processed_response
=
if
let
Some
(
ref
mut
parser
)
=
state
.reasoning_parser
{
let
processed_response
=
if
let
Some
(
ref
mut
parser
)
=
state
.reasoning_parser
{
response
.map_data
(|
mut
data
|
{
response
.map_data
(|
mut
data
|
{
// Process all choices, not just the first one
// Process all choices, not just the first one
for
choice
in
data
.choices
.iter_mut
()
{
for
choice
in
data
.
inner.
choices
.iter_mut
()
{
// Reasoning parsing only applies to text content
// Reasoning parsing only applies to text content
if
let
Some
(
if
let
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
...
...
lib/llm/src/preprocessor/speculative_prefill.rs
View file @
2887cd1c
...
@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream(
...
@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream(
let
mut
prefill_tx
=
Some
(
tx
);
let
mut
prefill_tx
=
Some
(
tx
);
Box
::
pin
(
stream
.map
(
move
|
item
|
{
Box
::
pin
(
stream
.map
(
move
|
item
|
{
if
let
Some
(
ref
resp
)
=
item
.data
{
if
let
Some
(
ref
resp
)
=
item
.data
{
for
choice
in
&
resp
.choices
{
for
choice
in
&
resp
.
inner.
choices
{
if
let
Some
(
ChatCompletionMessageContent
::
Text
(
ref
text
))
=
choice
.delta.content
{
if
let
Some
(
ChatCompletionMessageContent
::
Text
(
ref
text
))
=
choice
.delta.content
{
accumulated_text
.push_str
(
text
);
accumulated_text
.push_str
(
text
);
}
}
...
...
lib/llm/src/protocols/anthropic/stream_converter.rs
View file @
2887cd1c
...
@@ -106,7 +106,7 @@ impl AnthropicStreamConverter {
...
@@ -106,7 +106,7 @@ impl AnthropicStreamConverter {
let
mut
events
=
Vec
::
new
();
let
mut
events
=
Vec
::
new
();
// Capture real token usage from engine when available (typically on the final chunk).
// Capture real token usage from engine when available (typically on the final chunk).
if
let
Some
(
usage
)
=
&
chunk
.usage
{
if
let
Some
(
usage
)
=
&
chunk
.
inner.
usage
{
self
.input_token_count
=
usage
.prompt_tokens
;
self
.input_token_count
=
usage
.prompt_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.cached_token_count
=
usage
self
.cached_token_count
=
usage
...
@@ -115,7 +115,7 @@ impl AnthropicStreamConverter {
...
@@ -115,7 +115,7 @@ impl AnthropicStreamConverter {
.and_then
(|
d
|
d
.cached_tokens
);
.and_then
(|
d
|
d
.cached_tokens
);
}
}
for
choice
in
&
chunk
.choices
{
for
choice
in
&
chunk
.
inner.
choices
{
let
delta
=
&
choice
.delta
;
let
delta
=
&
choice
.delta
;
// Track finish reason
// Track finish reason
...
@@ -444,7 +444,7 @@ impl AnthropicStreamConverter {
...
@@ -444,7 +444,7 @@ impl AnthropicStreamConverter {
)
->
Vec
<
TaggedEvent
>
{
)
->
Vec
<
TaggedEvent
>
{
let
mut
events
=
Vec
::
new
();
let
mut
events
=
Vec
::
new
();
if
let
Some
(
usage
)
=
&
chunk
.usage
{
if
let
Some
(
usage
)
=
&
chunk
.
inner.
usage
{
self
.input_token_count
=
usage
.prompt_tokens
;
self
.input_token_count
=
usage
.prompt_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.cached_token_count
=
usage
self
.cached_token_count
=
usage
...
@@ -453,7 +453,7 @@ impl AnthropicStreamConverter {
...
@@ -453,7 +453,7 @@ impl AnthropicStreamConverter {
.and_then
(|
d
|
d
.cached_tokens
);
.and_then
(|
d
|
d
.cached_tokens
);
}
}
for
choice
in
&
chunk
.choices
{
for
choice
in
&
chunk
.
inner.
choices
{
let
delta
=
&
choice
.delta
;
let
delta
=
&
choice
.delta
;
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
...
@@ -722,27 +722,29 @@ mod tests {
...
@@ -722,27 +722,29 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
tool_calls
:
None
,
function_call
:
None
,
role
:
None
,
tool_calls
:
None
,
refusal
:
None
,
role
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -755,35 +757,37 @@ mod tests {
...
@@ -755,35 +757,37 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
None
,
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
function_call
:
None
,
index
:
tc_index
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
id
:
id
.map
(
String
::
from
),
index
:
tc_index
,
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
id
:
id
.map
(
String
::
from
),
function
:
Some
(
FunctionCallStream
{
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
name
:
name
.map
(
String
::
from
),
function
:
Some
(
FunctionCallStream
{
arguments
:
args
.map
(
String
::
from
),
name
:
name
.map
(
String
::
from
),
}),
arguments
:
args
.map
(
String
::
from
),
}]),
}),
role
:
None
,
}]),
refusal
:
None
,
role
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -908,27 +912,29 @@ mod tests {
...
@@ -908,27 +912,29 @@ mod tests {
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
None
,
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
None
,
tool_calls
:
None
,
function_call
:
None
,
role
:
None
,
tool_calls
:
None
,
refusal
:
None
,
role
:
None
,
reasoning_content
:
Some
(
text
.into
()),
refusal
:
None
,
},
reasoning_content
:
Some
(
text
.into
()),
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/protocols/anthropic/types.rs
View file @
2887cd1c
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
//! Anthropic Messages API
types and
conversion logic.
//! Anthropic Messages API conversion logic.
//!
//!
//! All request/response types for the `/v1/messages` endpoint, plus
//! Pure protocol types live in `dynamo_async_openai::types::anthropic`.
//! bidirectional conversion to/from the internal chat completions format.
//! This module provides bidirectional conversion to/from the internal
//! chat completions format used by the Dynamo engine.
// Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
// continues to work throughout dynamo-llm.
pub
use
dynamo_async_openai
::
types
::
anthropic
::
*
;
use
dynamo_async_openai
::
types
::{
use
dynamo_async_openai
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
...
@@ -17,764 +22,13 @@ use dynamo_async_openai::types::{
...
@@ -17,764 +22,13 @@ use dynamo_async_openai::types::{
ChatCompletionTool
,
ChatCompletionToolChoiceOption
,
ChatCompletionToolType
,
FunctionName
,
ChatCompletionTool
,
ChatCompletionToolChoiceOption
,
ChatCompletionToolType
,
FunctionName
,
FunctionObject
,
ImageUrl
,
ReasoningContent
,
FunctionObject
,
ImageUrl
,
ReasoningContent
,
};
};
use
serde
::{
Deserialize
,
Serialize
};
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
crate
::
protocols
::
openai
::
chat_completions
::{
use
crate
::
protocols
::
openai
::
chat_completions
::{
NvCreateChatCompletionRequest
,
NvCreateChatCompletionResponse
,
NvCreateChatCompletionRequest
,
NvCreateChatCompletionResponse
,
};
};
use
crate
::
protocols
::
openai
::
common_ext
::
CommonExt
;
use
crate
::
protocols
::
openai
::
common_ext
::
CommonExt
;
use
crate
::
protocols
::
openai
::
nvext
::{
CacheControl
,
NvExt
};
use
crate
::
protocols
::
openai
::
nvext
::
NvExt
;
// ---------------------------------------------------------------------------
// Custom deserializers
// ---------------------------------------------------------------------------
/// Parsed system prompt content, preserving cache_control from block arrays.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
SystemContent
{
/// The concatenated text from all system blocks (or the plain string).
pub
text
:
String
,
/// Cache control from the last system block that had one.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
}
/// Deserialize `system` from either a plain string or an array of text blocks.
/// The Anthropic API accepts both `"system": "text"` and
/// `"system": [{"type": "text", "text": "...", "cache_control": {...}}]`.
fn
deserialize_system_prompt
<
'de
,
D
>
(
deserializer
:
D
)
->
Result
<
Option
<
SystemContent
>
,
D
::
Error
>
where
D
:
serde
::
Deserializer
<
'de
>
,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum
SystemPrompt
{
Text
(
String
),
Blocks
(
Vec
<
SystemBlock
>
),
}
#[derive(Deserialize)]
struct
SystemBlock
{
text
:
String
,
#[serde(default)]
cache_control
:
Option
<
CacheControl
>
,
}
let
maybe
:
Option
<
SystemPrompt
>
=
Option
::
deserialize
(
deserializer
)
?
;
Ok
(
maybe
.map
(|
sp
|
match
sp
{
SystemPrompt
::
Text
(
s
)
=>
SystemContent
{
text
:
s
,
cache_control
:
None
,
},
SystemPrompt
::
Blocks
(
blocks
)
=>
{
let
cache_control
=
blocks
.iter
()
.rev
()
.find_map
(|
b
|
b
.cache_control
.clone
());
let
text
=
blocks
.into_iter
()
.map
(|
b
|
b
.text
)
.collect
::
<
Vec
<
_
>>
()
.join
(
"
\n
"
);
SystemContent
{
text
,
cache_control
,
}
}
}))
}
// ---------------------------------------------------------------------------
// Request types
// ---------------------------------------------------------------------------
/// Top-level request body for `POST /v1/messages`.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicCreateMessageRequest
{
/// The model to use (e.g. "claude-sonnet-4-20250514").
pub
model
:
String
,
/// The maximum number of tokens to generate.
pub
max_tokens
:
u32
,
/// The conversation messages.
pub
messages
:
Vec
<
AnthropicMessage
>
,
/// Optional system prompt (string or array of `{"type":"text","text":"..."}` blocks).
#[serde(
default,
skip_serializing_if
=
"Option::is_none"
,
deserialize_with
=
"deserialize_system_prompt"
)]
pub
system
:
Option
<
SystemContent
>
,
/// Sampling temperature (0.0 - 1.0).
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
temperature
:
Option
<
f32
>
,
/// Nucleus sampling parameter.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_p
:
Option
<
f32
>
,
/// Top-K sampling parameter.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_k
:
Option
<
u32
>
,
/// Custom stop sequences.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_sequences
:
Option
<
Vec
<
String
>>
,
/// Whether to stream the response.
#[serde(default)]
pub
stream
:
bool
,
/// Optional metadata (e.g. user_id).
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
metadata
:
Option
<
serde_json
::
Value
>
,
/// Tools the model may call.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
tools
:
Option
<
Vec
<
AnthropicTool
>>
,
/// How the model should choose which tool to call.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
tool_choice
:
Option
<
AnthropicToolChoice
>
,
/// Top-level cache control for automatic prompt prefix caching.
/// When present, the system caches all content up to the last cacheable block.
/// Matches the Anthropic Messages API automatic caching mode.
/// See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#automatic-caching
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
/// Extended thinking configuration. When enabled, the model produces
/// `thinking` content blocks containing its internal reasoning before
/// the final response. The `budget_tokens` field controls how many tokens
/// the model may use for thinking (must be ≥ 1024 and < max_tokens).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
thinking
:
Option
<
ThinkingConfig
>
,
/// Service tier selection: `"auto"` or `"standard_only"`.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
service_tier
:
Option
<
String
>
,
/// Container identifier for stateful sandbox sessions.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
container
:
Option
<
String
>
,
/// Output configuration: effort level and optional JSON schema format.
/// `effort` can be `"low"`, `"medium"`, `"high"`, or `"max"`.
/// `format` specifies structured JSON output constraints.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
output_config
:
Option
<
serde_json
::
Value
>
,
}
/// Extended thinking configuration for the request.
///
/// When `type` is `"enabled"`, the model will produce `thinking` content blocks
/// with its internal reasoning. `budget_tokens` controls the maximum tokens
/// available for thinking (minimum 1024, must be less than `max_tokens`).
/// When `type` is `"disabled"`, no thinking blocks are produced.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
ThinkingConfig
{
/// Either `"enabled"` or `"disabled"`.
#[serde(rename
=
"type"
)]
pub
thinking_type
:
String
,
/// Maximum tokens for internal reasoning. Only relevant when type is "enabled".
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
budget_tokens
:
Option
<
u32
>
,
}
/// A single message in the conversation.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessage
{
pub
role
:
AnthropicRole
,
#[serde(flatten)]
pub
content
:
AnthropicMessageContent
,
}
/// The role of a message sender.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
AnthropicRole
{
User
,
Assistant
,
}
/// Message content — either a plain string or an array of content blocks.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
AnthropicMessageContent
{
/// Plain text content.
Text
{
content
:
String
},
/// Array of structured content blocks.
Blocks
{
content
:
Vec
<
AnthropicContentBlock
>
},
}
/// A single content block within a message.
///
/// Uses a custom deserializer so that unknown block types (e.g. `citations`,
/// `server_tool_use`, `redacted_thinking`) are captured as `Other(Value)` instead
/// of causing a hard deserialization failure. This is important because Claude
/// Code may send block types that we don't yet handle.
#[derive(Debug,
Clone,
Serialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicContentBlock
{
/// Text content block. May optionally include `citations` — references to
/// source documents that support the text content. Citations are generated
/// by the model when document/PDF content is provided and citation mode is enabled.
#[serde(rename
=
"text"
)]
Text
{
text
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Image content block.
#[serde(rename
=
"image"
)]
Image
{
source
:
AnthropicImageSource
},
/// Tool use request from assistant.
#[serde(rename
=
"tool_use"
)]
ToolUse
{
id
:
String
,
name
:
String
,
input
:
serde_json
::
Value
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Tool result from user.
#[serde(rename
=
"tool_result"
)]
ToolResult
{
tool_use_id
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
content
:
Option
<
ToolResultContent
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
is_error
:
Option
<
bool
>
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Thinking content block from assistant (extended thinking / reasoning).
#[serde(rename
=
"thinking"
)]
Thinking
{
thinking
:
String
,
signature
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
cache_control
:
Option
<
CacheControl
>
,
},
/// Redacted thinking block from assistant. Contains encrypted reasoning data
/// that is opaque to the client but must be passed back verbatim in multi-turn
/// conversations so the model can maintain its chain of thought.
#[serde(rename
=
"redacted_thinking"
)]
RedactedThinking
{
data
:
String
},
/// Server-initiated tool use block. Represents a tool call that the API
/// executes server-side (e.g., web search). The client receives the result
/// via a corresponding `web_search_tool_result` or similar block.
#[serde(rename
=
"server_tool_use"
)]
ServerToolUse
{
id
:
String
,
name
:
String
,
#[serde(default)]
input
:
serde_json
::
Value
,
},
/// Result from a server-initiated tool (e.g., web search results).
/// Contains structured content returned by the server-side tool execution.
#[serde(rename
=
"web_search_tool_result"
)]
WebSearchToolResult
{
tool_use_id
:
String
,
#[serde(default)]
content
:
serde_json
::
Value
,
},
/// Catch-all for unrecognized block types. Preserves the full JSON value
/// so that new Anthropic features don't break the endpoint and can be
/// round-tripped or inspected.
#[serde(untagged)]
Other
(
serde_json
::
Value
),
}
/// Content of a `tool_result` block — either a plain string or an array of
/// content blocks (the Anthropic API accepts both).
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
ToolResultContent
{
Text
(
String
),
Blocks
(
Vec
<
ToolResultContentBlock
>
),
}
impl
ToolResultContent
{
/// Extract the text content, concatenating array blocks if needed.
pub
fn
into_text
(
self
)
->
String
{
match
self
{
ToolResultContent
::
Text
(
s
)
=>
s
,
ToolResultContent
::
Blocks
(
blocks
)
=>
blocks
.into_iter
()
.filter_map
(|
b
|
match
b
{
ToolResultContentBlock
::
Text
{
text
}
=>
Some
(
text
),
ToolResultContentBlock
::
Other
(
_
)
=>
None
,
})
.collect
::
<
Vec
<
_
>>
()
.join
(
""
),
}
}
}
/// A content block within a `tool_result.content` array.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
ToolResultContentBlock
{
Text
{
text
:
String
,
},
/// Catch-all for non-text blocks (images, etc.) in tool results.
Other
(
serde_json
::
Value
),
}
/// Custom deserializer for `AnthropicContentBlock` that handles unknown types
/// gracefully. Since serde's `#[serde(other)]` is not supported on internally
/// tagged enums, we deserialize as `Value` first and dispatch manually.
impl
<
'de
>
Deserialize
<
'de
>
for
AnthropicContentBlock
{
fn
deserialize
<
D
>
(
deserializer
:
D
)
->
Result
<
Self
,
D
::
Error
>
where
D
:
serde
::
Deserializer
<
'de
>
,
{
let
value
=
serde_json
::
Value
::
deserialize
(
deserializer
)
?
;
let
block_type
=
value
.get
(
"type"
)
.and_then
(|
t
|
t
.as_str
())
.unwrap_or
(
""
)
.to_string
();
match
block_type
.as_str
()
{
"text"
=>
{
let
text
=
value
.get
(
"text"
)
.and_then
(|
t
|
t
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"text"
))
?
.to_string
();
let
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
=
value
.get
(
"citations"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
Text
{
text
,
citations
,
cache_control
,
})
}
"image"
=>
{
let
source
:
AnthropicImageSource
=
serde_json
::
from_value
(
value
.get
(
"source"
)
.cloned
()
.unwrap_or_default
())
.map_err
(
serde
::
de
::
Error
::
custom
)
?
;
Ok
(
AnthropicContentBlock
::
Image
{
source
})
}
"tool_use"
=>
{
let
id
=
value
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"id"
))
?
.to_string
();
let
name
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"name"
))
?
.to_string
();
let
input
=
value
.get
(
"input"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
({}));
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
ToolUse
{
id
,
name
,
input
,
cache_control
,
})
}
"tool_result"
=>
{
let
tool_use_id
=
value
.get
(
"tool_use_id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"tool_use_id"
))
?
.to_string
();
let
content
:
Option
<
ToolResultContent
>
=
value
.get
(
"content"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
let
is_error
=
value
.get
(
"is_error"
)
.and_then
(|
v
|
v
.as_bool
());
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
ToolResult
{
tool_use_id
,
content
,
is_error
,
cache_control
,
})
}
"thinking"
=>
{
let
thinking
=
value
.get
(
"thinking"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"thinking"
))
?
.to_string
();
let
signature
=
value
.get
(
"signature"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"signature"
))
?
.to_string
();
let
cache_control
:
Option
<
CacheControl
>
=
value
.get
(
"cache_control"
)
.cloned
()
.and_then
(|
v
|
serde_json
::
from_value
(
v
)
.ok
());
Ok
(
AnthropicContentBlock
::
Thinking
{
thinking
,
signature
,
cache_control
,
})
}
"redacted_thinking"
=>
{
let
data
=
value
.get
(
"data"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"data"
))
?
.to_string
();
Ok
(
AnthropicContentBlock
::
RedactedThinking
{
data
})
}
"server_tool_use"
=>
{
let
id
=
value
.get
(
"id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"id"
))
?
.to_string
();
let
name
=
value
.get
(
"name"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"name"
))
?
.to_string
();
let
input
=
value
.get
(
"input"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
({}));
Ok
(
AnthropicContentBlock
::
ServerToolUse
{
id
,
name
,
input
})
}
"web_search_tool_result"
=>
{
let
tool_use_id
=
value
.get
(
"tool_use_id"
)
.and_then
(|
v
|
v
.as_str
())
.ok_or_else
(||
serde
::
de
::
Error
::
missing_field
(
"tool_use_id"
))
?
.to_string
();
let
content
=
value
.get
(
"content"
)
.cloned
()
.unwrap_or
(
serde_json
::
json!
([]));
Ok
(
AnthropicContentBlock
::
WebSearchToolResult
{
tool_use_id
,
content
,
})
}
other
=>
{
tracing
::
debug!
(
"Unrecognized Anthropic content block type '{}', preserving as Other"
,
other
);
Ok
(
AnthropicContentBlock
::
Other
(
value
))
}
}
}
}
/// Image source for image content blocks.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicImageSource
{
#[serde(rename
=
"type"
)]
pub
source_type
:
String
,
pub
media_type
:
String
,
pub
data
:
String
,
}
/// A tool definition.
///
/// Client tools (custom) require `name` + `input_schema`. Server tools
/// (web_search, bash, text_editor, code_execution, etc.) are discriminated
/// by their `type` field (e.g. `"web_search_20260209"`) and may not have
/// `input_schema`. We keep all fields optional beyond `name` so both
/// kinds deserialize successfully and pass through to the backend.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicTool
{
/// Tool name (required for client tools, present on server tools too).
pub
name
:
String
,
/// Tool type discriminator. Client tools use `"custom"` (or omit).
/// Server tools use versioned types like `"web_search_20260209"`.
#[serde(default,
rename
=
"type"
,
skip_serializing_if
=
"Option::is_none"
)]
pub
tool_type
:
Option
<
String
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
description
:
Option
<
String
>
,
/// JSON Schema for the tool input. Required for client tools, absent on
/// server tools (which define their own input shape server-side).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
input_schema
:
Option
<
serde_json
::
Value
>
,
/// Cache control breakpoint on this tool definition.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_control
:
Option
<
CacheControl
>
,
}
/// Tool choice specification.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(untagged)]
pub
enum
AnthropicToolChoice
{
/// Named tool: `{type: "tool", name: "..."}`
/// Must be listed before Simple so serde tries the stricter shape first.
Named
(
AnthropicToolChoiceNamed
),
/// Simple mode: "auto", "any", or "none".
Simple
(
AnthropicToolChoiceSimple
),
}
/// Simple tool choice modes.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicToolChoiceSimple
{
#[serde(rename
=
"type"
)]
pub
choice_type
:
AnthropicToolChoiceMode
,
/// When true, the model will call tools one at a time instead of
/// potentially issuing multiple tool calls in a single response.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
disable_parallel_tool_use
:
Option
<
bool
>
,
}
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
AnthropicToolChoiceMode
{
Auto
,
Any
,
None
,
Tool
,
}
/// Named tool choice.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicToolChoiceNamed
{
#[serde(rename
=
"type"
)]
pub
choice_type
:
AnthropicToolChoiceMode
,
pub
name
:
String
,
/// When true, the model will call tools one at a time instead of
/// potentially issuing multiple tool calls in a single response.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
disable_parallel_tool_use
:
Option
<
bool
>
,
}
// ---------------------------------------------------------------------------
// Response types
// ---------------------------------------------------------------------------
/// Response body for `POST /v1/messages` (non-streaming).
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessageResponse
{
pub
id
:
String
,
#[serde(rename
=
"type"
)]
pub
object_type
:
String
,
pub
role
:
String
,
pub
content
:
Vec
<
AnthropicResponseContentBlock
>
,
pub
model
:
String
,
pub
stop_reason
:
Option
<
AnthropicStopReason
>
,
pub
stop_sequence
:
Option
<
String
>
,
pub
usage
:
AnthropicUsage
,
}
/// A content block in the response.
///
/// The Anthropic API returns up to 12 different block types. We model the
/// common ones explicitly and catch the rest as `Other` so the proxy can
/// forward them without losing data.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicResponseContentBlock
{
#[serde(rename
=
"thinking"
)]
Thinking
{
thinking
:
String
,
signature
:
String
},
#[serde(rename
=
"text"
)]
Text
{
text
:
String
,
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
citations
:
Option
<
Vec
<
serde_json
::
Value
>>
,
},
#[serde(rename
=
"tool_use"
)]
ToolUse
{
id
:
String
,
name
:
String
,
input
:
serde_json
::
Value
,
},
#[serde(rename
=
"redacted_thinking"
)]
RedactedThinking
{
data
:
String
},
#[serde(rename
=
"server_tool_use"
)]
ServerToolUse
{
id
:
String
,
name
:
String
,
#[serde(default)]
input
:
serde_json
::
Value
,
},
#[serde(rename
=
"web_search_tool_result"
)]
WebSearchToolResult
{
tool_use_id
:
String
,
#[serde(default)]
content
:
serde_json
::
Value
,
},
/// Catch-all for new/uncommon block types (web_fetch_tool_result,
/// code_execution_tool_result, container_upload, etc.) so the proxy
/// can serialize them back without data loss.
#[serde(untagged)]
Other
(
serde_json
::
Value
),
}
/// Token usage information.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
Default)]
pub
struct
AnthropicUsage
{
pub
input_tokens
:
u32
,
pub
output_tokens
:
u32
,
/// Number of input tokens used to create a new cache entry.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_creation_input_tokens
:
Option
<
u32
>
,
/// Number of input tokens read from the prompt cache (prefix cache hits).
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
cache_read_input_tokens
:
Option
<
u32
>
,
}
/// Reason the model stopped generating.
#[derive(Debug,
Clone,
Serialize,
Deserialize,
PartialEq,
Eq)]
#[serde(rename_all
=
"snake_case"
)]
pub
enum
AnthropicStopReason
{
EndTurn
,
MaxTokens
,
StopSequence
,
ToolUse
,
/// The model paused to yield control in an agentic loop, intending to
/// continue in a subsequent turn. Used with extended thinking / tool use.
PauseTurn
,
/// The model refused to generate content (safety refusal).
Refusal
,
}
// ---------------------------------------------------------------------------
// Streaming types
// ---------------------------------------------------------------------------
/// SSE event types for the Anthropic streaming API.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicStreamEvent
{
#[serde(rename
=
"message_start"
)]
MessageStart
{
message
:
AnthropicMessageResponse
},
#[serde(rename
=
"content_block_start"
)]
ContentBlockStart
{
index
:
u32
,
content_block
:
AnthropicResponseContentBlock
,
},
#[serde(rename
=
"content_block_delta"
)]
ContentBlockDelta
{
index
:
u32
,
delta
:
AnthropicDelta
},
#[serde(rename
=
"content_block_stop"
)]
ContentBlockStop
{
index
:
u32
},
#[serde(rename
=
"message_delta"
)]
MessageDelta
{
delta
:
AnthropicMessageDeltaBody
,
usage
:
AnthropicUsage
,
},
#[serde(rename
=
"message_stop"
)]
MessageStop
{},
#[serde(rename
=
"ping"
)]
Ping
{},
#[serde(rename
=
"error"
)]
Error
{
error
:
AnthropicErrorBody
},
}
/// Delta content in a streaming content_block_delta event.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
#[serde(tag
=
"type"
)]
pub
enum
AnthropicDelta
{
#[serde(rename
=
"thinking_delta"
)]
ThinkingDelta
{
thinking
:
String
},
#[serde(rename
=
"text_delta"
)]
TextDelta
{
text
:
String
},
#[serde(rename
=
"input_json_delta"
)]
InputJsonDelta
{
partial_json
:
String
},
/// Incremental signature for a thinking block (sent at the end).
#[serde(rename
=
"signature_delta"
)]
SignatureDelta
{
signature
:
String
},
/// Incremental citation attached to a text block.
#[serde(rename
=
"citations_delta"
)]
CitationsDelta
{
citation
:
serde_json
::
Value
},
}
/// The delta body in a message_delta event.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicMessageDeltaBody
{
pub
stop_reason
:
Option
<
AnthropicStopReason
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_sequence
:
Option
<
String
>
,
}
// ---------------------------------------------------------------------------
// Error types
// ---------------------------------------------------------------------------
/// Anthropic API error response wrapper.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicErrorResponse
{
#[serde(rename
=
"type"
)]
pub
object_type
:
String
,
pub
error
:
AnthropicErrorBody
,
}
/// Error body within an error response.
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
AnthropicErrorBody
{
#[serde(rename
=
"type"
)]
pub
error_type
:
String
,
pub
message
:
String
,
}
impl
AnthropicErrorResponse
{
/// Create an `invalid_request_error` response.
pub
fn
invalid_request
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"invalid_request_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
/// Create an `api_error` (internal server error) response.
pub
fn
api_error
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"api_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
/// Create a `not_found_error` response.
pub
fn
not_found
(
message
:
impl
Into
<
String
>
)
->
Self
{
Self
{
object_type
:
"error"
.to_string
(),
error
:
AnthropicErrorBody
{
error_type
:
"not_found_error"
.to_string
(),
message
:
message
.into
(),
},
}
}
}
// ---------------------------------------------------------------------------
// Conversion: AnthropicCreateMessageRequest -> NvCreateChatCompletionRequest
// ---------------------------------------------------------------------------
impl
TryFrom
<
AnthropicCreateMessageRequest
>
for
NvCreateChatCompletionRequest
{
impl
TryFrom
<
AnthropicCreateMessageRequest
>
for
NvCreateChatCompletionRequest
{
type
Error
=
anyhow
::
Error
;
type
Error
=
anyhow
::
Error
;
...
@@ -1199,11 +453,6 @@ fn convert_anthropic_tool_choice(tc: &AnthropicToolChoice) -> ChatCompletionTool
...
@@ -1199,11 +453,6 @@ fn convert_anthropic_tool_choice(tc: &AnthropicToolChoice) -> ChatCompletionTool
}
}
}
}
}
}
// ---------------------------------------------------------------------------
// Conversion: NvCreateChatCompletionResponse -> AnthropicMessageResponse
// ---------------------------------------------------------------------------
/// Convert a completed chat completion response into an Anthropic Messages response.
/// Convert a completed chat completion response into an Anthropic Messages response.
pub
fn
chat_completion_to_anthropic_response
(
pub
fn
chat_completion_to_anthropic_response
(
chat_resp
:
NvCreateChatCompletionResponse
,
chat_resp
:
NvCreateChatCompletionResponse
,
...
@@ -1211,7 +460,7 @@ pub fn chat_completion_to_anthropic_response(
...
@@ -1211,7 +460,7 @@ pub fn chat_completion_to_anthropic_response(
)
->
AnthropicMessageResponse
{
)
->
AnthropicMessageResponse
{
let
msg_id
=
format!
(
"msg_{}"
,
Uuid
::
new_v4
()
.simple
());
let
msg_id
=
format!
(
"msg_{}"
,
Uuid
::
new_v4
()
.simple
());
let
choice
=
chat_resp
.choices
.into_iter
()
.next
();
let
choice
=
chat_resp
.
inner.
choices
.into_iter
()
.next
();
let
mut
content
=
Vec
::
new
();
let
mut
content
=
Vec
::
new
();
let
mut
stop_reason
=
None
;
let
mut
stop_reason
=
None
;
...
@@ -1282,6 +531,7 @@ pub fn chat_completion_to_anthropic_response(
...
@@ -1282,6 +531,7 @@ pub fn chat_completion_to_anthropic_response(
// Map usage
// Map usage
let
usage
=
chat_resp
let
usage
=
chat_resp
.inner
.usage
.usage
.map
(|
u
|
{
.map
(|
u
|
{
let
cache_read_input_tokens
=
u
let
cache_read_input_tokens
=
u
...
@@ -1308,111 +558,6 @@ pub fn chat_completion_to_anthropic_response(
...
@@ -1308,111 +558,6 @@ pub fn chat_completion_to_anthropic_response(
usage
,
usage
,
}
}
}
}
// ---------------------------------------------------------------------------
// Count tokens
// ---------------------------------------------------------------------------
/// Request body for `POST /v1/messages/count_tokens`.
#[derive(Debug,
Clone,
Deserialize)]
pub
struct
AnthropicCountTokensRequest
{
pub
model
:
String
,
pub
messages
:
Vec
<
AnthropicMessage
>
,
#[serde(
default,
skip_serializing_if
=
"Option::is_none"
,
deserialize_with
=
"deserialize_system_prompt"
)]
pub
system
:
Option
<
SystemContent
>
,
#[serde(default)]
pub
tools
:
Option
<
Vec
<
AnthropicTool
>>
,
}
/// Response body for `POST /v1/messages/count_tokens`.
#[derive(Debug,
Clone,
Serialize)]
pub
struct
AnthropicCountTokensResponse
{
pub
input_tokens
:
u32
,
}
impl
AnthropicCountTokensRequest
{
/// Estimate input token count using a `len/3` heuristic.
pub
fn
estimate_tokens
(
&
self
)
->
u32
{
let
mut
total_len
:
usize
=
0
;
if
let
Some
(
system
)
=
&
self
.system
{
total_len
+=
system
.text
.len
();
}
for
msg
in
&
self
.messages
{
// Count role
total_len
+=
match
msg
.role
{
AnthropicRole
::
User
=>
4
,
AnthropicRole
::
Assistant
=>
9
,
};
// Count content
match
&
msg
.content
{
AnthropicMessageContent
::
Text
{
content
}
=>
total_len
+=
content
.len
(),
AnthropicMessageContent
::
Blocks
{
content
}
=>
{
for
block
in
content
{
total_len
+=
estimate_block_len
(
block
);
}
}
}
}
if
let
Some
(
tools
)
=
&
self
.tools
{
for
tool
in
tools
{
total_len
+=
tool
.name
.len
();
if
let
Some
(
desc
)
=
&
tool
.description
{
total_len
+=
desc
.len
();
}
if
let
Some
(
schema
)
=
&
tool
.input_schema
{
total_len
+=
schema
.to_string
()
.len
();
}
}
}
let
tokens
=
total_len
/
3
;
if
tokens
==
0
&&
total_len
>
0
{
1
}
else
{
tokens
as
u32
}
}
}
fn
estimate_block_len
(
block
:
&
AnthropicContentBlock
)
->
usize
{
match
block
{
AnthropicContentBlock
::
Text
{
text
,
..
}
=>
text
.len
(),
AnthropicContentBlock
::
ToolUse
{
name
,
input
,
..
}
=>
name
.len
()
+
input
.to_string
()
.len
(),
AnthropicContentBlock
::
ToolResult
{
content
,
..
}
=>
content
.as_ref
()
.map
(|
c
|
match
c
{
ToolResultContent
::
Text
(
s
)
=>
s
.len
(),
ToolResultContent
::
Blocks
(
blocks
)
=>
blocks
.iter
()
.map
(|
b
|
match
b
{
ToolResultContentBlock
::
Text
{
text
}
=>
text
.len
(),
ToolResultContentBlock
::
Other
(
v
)
=>
v
.to_string
()
.len
(),
})
.sum
(),
})
.unwrap_or
(
0
),
AnthropicContentBlock
::
Thinking
{
thinking
,
..
}
=>
thinking
.len
(),
AnthropicContentBlock
::
RedactedThinking
{
data
,
..
}
=>
data
.len
(),
AnthropicContentBlock
::
ServerToolUse
{
name
,
input
,
..
}
=>
{
name
.len
()
+
input
.to_string
()
.len
()
}
AnthropicContentBlock
::
WebSearchToolResult
{
content
,
..
}
=>
content
.to_string
()
.len
(),
AnthropicContentBlock
::
Image
{
..
}
=>
256
,
// rough estimate for image metadata
AnthropicContentBlock
::
Other
(
v
)
=>
v
.to_string
()
.len
(),
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
#[cfg(test)]
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
...
@@ -1656,38 +801,40 @@ mod tests {
...
@@ -1656,38 +801,40 @@ mod tests {
#[test]
#[test]
fn
test_chat_completion_to_anthropic_response
()
{
fn
test_chat_completion_to_anthropic_response
()
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
id
:
"chatcmpl-xyz"
.into
(),
index
:
0
,
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
index
:
0
,
content
:
Some
(
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
:
Some
(
"Hello!"
.to_string
(),
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"Hello!"
.to_string
(),
),
),
),
)
,
refusal
:
None
,
refusal
:
None
,
tool_calls
:
None
,
tool_calls
:
None
,
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
function_call
:
None
,
audio
:
None
,
audio
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
}
,
}
,
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
,
finish
_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
,
stop
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
logprobs
:
None
,
}]
,
}]
,
created
:
1726000000
,
created
:
1726000000
,
model
:
"test-model"
.into
()
,
model
:
"test-model"
.into
()
,
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
()
,
object
:
"chat.completion"
.to_string
(),
usage
:
Some
(
dynamo_async_openai
::
types
::
CompletionUsage
{
usage
:
Some
(
dynamo_async_openai
::
types
::
CompletionUsage
{
prompt_tokens
:
10
,
prompt
_tokens
:
10
,
completion
_tokens
:
5
,
completion
_tokens
:
5
,
total
_tokens
:
1
5
,
total_tokens
:
15
,
prompt_tokens_details
:
None
,
prompt
_tokens_details
:
None
,
completion
_tokens_details
:
None
,
completion_tokens_details
:
None
,
})
,
}
)
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
...
lib/llm/src/protocols/openai/chat_completions.rs
View file @
2887cd1c
...
@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest {
...
@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest {
}
}
/// A response structure for unary chat completion responses, embedding OpenAI's
/// A response structure for unary chat completion responses, embedding OpenAI's
/// `CreateChatCompletionResponse`.
/// `CreateChatCompletionResponse` with optional NVIDIA extension metadata.
///
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
/// # Fields
pub
struct
NvCreateChatCompletionResponse
{
/// - `inner`: The base OpenAI unary chat completion response, embedded
#[serde(flatten)]
/// using `serde(flatten)`.
pub
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
,
pub
type
NvCreateChatCompletionResponse
=
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
;
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
/// A response structure for streamed chat completions, embedding OpenAI's
/// A response structure for streamed chat completions, embedding OpenAI's
/// `CreateChatCompletionStreamResponse`.
/// `CreateChatCompletionStreamResponse` with optional NVIDIA extension metadata.
///
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
/// # Fields
pub
struct
NvCreateChatCompletionStreamResponse
{
/// - `inner`: The base OpenAI streaming chat completion response, embedded
#[serde(flatten)]
/// using `serde(flatten)`.
pub
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
,
pub
type
NvCreateChatCompletionStreamResponse
=
#[serde(skip_serializing_if
=
"Option::is_none"
)]
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
;
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// providing access to NVIDIA-specific extensions.
/// providing access to NVIDIA-specific extensions.
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
2887cd1c
...
@@ -136,16 +136,16 @@ impl DeltaAggregator {
...
@@ -136,16 +136,16 @@ impl DeltaAggregator {
if
aggregator
.error
.is_none
()
if
aggregator
.error
.is_none
()
&&
let
Some
(
delta
)
=
delta
.data
&&
let
Some
(
delta
)
=
delta
.data
{
{
aggregator
.id
=
delta
.id
;
aggregator
.id
=
delta
.
inner.
id
;
aggregator
.model
=
delta
.model
;
aggregator
.model
=
delta
.
inner.
model
;
aggregator
.created
=
delta
.created
;
aggregator
.created
=
delta
.
inner.
created
;
aggregator
.service_tier
=
delta
.service_tier
;
aggregator
.service_tier
=
delta
.
inner.
service_tier
;
// Aggregate usage statistics if available.
// Aggregate usage statistics if available.
if
let
Some
(
usage
)
=
delta
.usage
{
if
let
Some
(
usage
)
=
delta
.
inner.
usage
{
aggregator
.usage
=
Some
(
usage
);
aggregator
.usage
=
Some
(
usage
);
}
}
if
let
Some
(
system_fingerprint
)
=
delta
.system_fingerprint
{
if
let
Some
(
system_fingerprint
)
=
delta
.
inner.
system_fingerprint
{
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
}
}
...
@@ -155,7 +155,7 @@ impl DeltaAggregator {
...
@@ -155,7 +155,7 @@ impl DeltaAggregator {
}
}
// Aggregate choices incrementally.
// Aggregate choices incrementally.
for
choice
in
delta
.choices
{
for
choice
in
delta
.
inner.
choices
{
let
state_choice
=
let
state_choice
=
aggregator
aggregator
.choices
.choices
...
@@ -267,14 +267,16 @@ impl DeltaAggregator {
...
@@ -267,14 +267,16 @@ impl DeltaAggregator {
// Construct the final response object.
// Construct the final response object.
let
response
=
NvCreateChatCompletionResponse
{
let
response
=
NvCreateChatCompletionResponse
{
id
:
aggregator
.id
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
aggregator
.created
,
id
:
aggregator
.id
,
usage
:
aggregator
.usage
,
created
:
aggregator
.created
,
model
:
aggregator
.model
,
usage
:
aggregator
.usage
,
object
:
"chat.completion"
.to_string
(),
model
:
aggregator
.model
,
system_fingerprint
:
aggregator
.system_fingerprint
,
object
:
"chat.completion"
.to_string
(),
choices
,
system_fingerprint
:
aggregator
.system_fingerprint
,
service_tier
:
aggregator
.service_tier
,
choices
,
service_tier
:
aggregator
.service_tier
,
},
nvext
:
aggregator
.nvext
,
nvext
:
aggregator
.nvext
,
};
};
...
@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator {
...
@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator {
)
->
Result
<
NvCreateChatCompletionResponse
,
String
>
;
)
->
Result
<
NvCreateChatCompletionResponse
,
String
>
;
}
}
impl
ChatCompletionAggregator
for
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
impl
ChatCompletionAggregator
for
Nv
CreateChatCompletionResponse
{
async
fn
from_annotated_stream
(
async
fn
from_annotated_stream
(
stream
:
impl
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
stream
:
impl
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
parsing_options
:
ParsingOptions
,
parsing_options
:
ParsingOptions
,
...
@@ -445,14 +447,16 @@ mod tests {
...
@@ -445,14 +447,16 @@ mod tests {
};
};
let
data
=
NvCreateChatCompletionStreamResponse
{
let
data
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
service_tier
:
None
,
created
:
1234567890
,
usage
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
usage
:
None
,
choices
:
vec!
[
choice
],
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[
choice
],
object
:
"chat.completion"
.to_string
(),
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -479,13 +483,13 @@ mod tests {
...
@@ -479,13 +483,13 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify that the response is empty and has default values
// Verify that the response is empty and has default values
assert_eq!
(
response
.id
,
""
);
assert_eq!
(
response
.
inner.
id
,
""
);
assert_eq!
(
response
.model
,
""
);
assert_eq!
(
response
.
inner.
model
,
""
);
assert_eq!
(
response
.created
,
0
);
assert_eq!
(
response
.
inner.
created
,
0
);
assert
!
(
response
.usage
.is_none
());
assert
!
(
response
.
inner.
usage
.is_none
());
assert
!
(
response
.system_fingerprint
.is_none
());
assert
!
(
response
.
inner.
system_fingerprint
.is_none
());
assert_eq!
(
response
.choices
.len
(),
0
);
assert_eq!
(
response
.
inner.
choices
.len
(),
0
);
assert
!
(
response
.service_tier
.is_none
());
assert
!
(
response
.
inner.
service_tier
.is_none
());
}
}
#[tokio::test]
#[tokio::test]
...
@@ -511,13 +515,13 @@ mod tests {
...
@@ -511,13 +515,13 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.id
,
"test_id"
);
assert_eq!
(
response
.
inner.
id
,
"test_id"
);
assert_eq!
(
response
.model
,
"meta/llama-3.1-8b-instruct"
);
assert_eq!
(
response
.
inner.
model
,
"meta/llama-3.1-8b-instruct"
);
assert_eq!
(
response
.created
,
1234567890
);
assert_eq!
(
response
.
inner.
created
,
1234567890
);
assert
!
(
response
.usage
.is_none
());
assert
!
(
response
.
inner.
usage
.is_none
());
assert
!
(
response
.system_fingerprint
.is_none
());
assert
!
(
response
.
inner.
system_fingerprint
.is_none
());
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
choice
.message.content
.as_ref
()
.unwrap
(),
...
@@ -525,7 +529,7 @@ mod tests {
...
@@ -525,7 +529,7 @@ mod tests {
);
);
assert
!
(
choice
.finish_reason
.is_none
());
assert
!
(
choice
.finish_reason
.is_none
());
assert_eq!
(
choice
.message.role
,
dynamo_async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_async_openai
::
types
::
Role
::
User
);
assert
!
(
response
.service_tier
.is_none
());
assert
!
(
response
.
inner.
service_tier
.is_none
());
}
}
#[tokio::test]
#[tokio::test]
...
@@ -562,8 +566,8 @@ mod tests {
...
@@ -562,8 +566,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
choice
.message.content
.as_ref
()
.unwrap
(),
...
@@ -630,8 +634,8 @@ mod tests {
...
@@ -630,8 +634,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
...
@@ -653,43 +657,49 @@ mod tests {
...
@@ -653,43 +657,49 @@ mod tests {
// Create a delta with multiple choices
// Create a delta with multiple choices
// ALLOW: function_call is deprecated
// ALLOW: function_call is deprecated
let
data
=
NvCreateChatCompletionStreamResponse
{
let
data
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
model
:
"test_model"
.to_string
(),
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
model
:
"test_model"
.to_string
(),
service_tier
:
None
,
created
:
1234567890
,
usage
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
usage
:
None
,
choices
:
vec!
[
system_fingerprint
:
None
,
dynamo_async_openai
::
types
::
ChatChoiceStream
{
choices
:
vec!
[
index
:
0
,
dynamo_async_openai
::
types
::
ChatChoiceStream
{
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
index
:
0
,
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 0"
.to_string
())),
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
tool_calls
:
None
,
"Choice 0"
.to_string
(),
refusal
:
None
,
)),
reasoning_content
:
None
,
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
dynamo_async_openai
::
types
::
ChatChoiceStream
{
stop_reason
:
None
,
index
:
1
,
logprobs
:
None
,
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
},
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
dynamo_async_openai
::
types
::
ChatChoiceStream
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
index
:
1
,
"Choice 1"
.to_string
(),
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
)),
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 1"
.to_string
())),
tool_calls
:
None
,
function_call
:
None
,
refusal
:
None
,
tool_calls
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
],
stop_reason
:
None
,
object
:
"chat.completion"
.to_string
(),
logprobs
:
None
,
},
},
],
object
:
"chat.completion"
.to_string
(),
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -711,9 +721,9 @@ mod tests {
...
@@ -711,9 +721,9 @@ mod tests {
let
mut
response
=
result
.unwrap
();
let
mut
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.choices
.len
(),
2
);
assert_eq!
(
response
.
inner.
choices
.len
(),
2
);
response
.choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Ensure the choices are ordered
response
.
inner.
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Ensure the choices are ordered
let
choice0
=
&
response
.choices
[
0
];
let
choice0
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice0
.index
,
0
);
assert_eq!
(
choice0
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice0
.message.content
.as_ref
()
.unwrap
(),
choice0
.message.content
.as_ref
()
.unwrap
(),
...
@@ -728,7 +738,7 @@ mod tests {
...
@@ -728,7 +738,7 @@ mod tests {
dynamo_async_openai
::
types
::
Role
::
Assistant
dynamo_async_openai
::
types
::
Role
::
Assistant
);
);
let
choice1
=
&
response
.choices
[
1
];
let
choice1
=
&
response
.
inner.
choices
[
1
];
assert_eq!
(
choice1
.index
,
1
);
assert_eq!
(
choice1
.index
,
1
);
assert_eq!
(
assert_eq!
(
choice1
.message.content
.as_ref
()
.unwrap
(),
choice1
.message.content
.as_ref
()
.unwrap
(),
...
@@ -773,8 +783,8 @@ mod tests {
...
@@ -773,8 +783,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -816,8 +826,8 @@ mod tests {
...
@@ -816,8 +826,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -859,8 +869,8 @@ mod tests {
...
@@ -859,8 +869,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -900,8 +910,8 @@ mod tests {
...
@@ -900,8 +910,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify no tool calls are present
// Verify no tool calls are present
assert
!
(
choice
.message.tool_calls
.is_none
());
assert
!
(
choice
.message.tool_calls
.is_none
());
...
@@ -928,7 +938,7 @@ mod tests {
...
@@ -928,7 +938,7 @@ mod tests {
// Manually set empty tool calls array
// Manually set empty tool calls array
if
let
Some
(
ref
mut
data
)
=
annotated_delta
.data
{
if
let
Some
(
ref
mut
data
)
=
annotated_delta
.data
{
data
.choices
[
0
]
.delta.tool_calls
=
Some
(
vec!
[]);
// Empty tool calls array
data
.
inner.
choices
[
0
]
.delta.tool_calls
=
Some
(
vec!
[]);
// Empty tool calls array
}
}
let
data
=
annotated_delta
.data
.unwrap
();
let
data
=
annotated_delta
.data
.unwrap
();
...
@@ -945,8 +955,8 @@ mod tests {
...
@@ -945,8 +955,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls array is empty
// Verify tool calls array is empty
assert
!
(
choice
.message.tool_calls
.is_none
());
assert
!
(
choice
.message.tool_calls
.is_none
());
...
@@ -992,8 +1002,8 @@ mod tests {
...
@@ -992,8 +1002,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// There should be one choice
// There should be one choice
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// The tool_calls field should be present and parsed
// The tool_calls field should be present and parsed
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -1050,8 +1060,8 @@ mod tests {
...
@@ -1050,8 +1060,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// There should be one choice
// There should be one choice
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
assert_eq!
(
assert_eq!
(
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
2887cd1c
...
@@ -278,19 +278,21 @@ impl DeltaGenerator {
...
@@ -278,19 +278,21 @@ impl DeltaGenerator {
// According to OpenAI spec: when stream_options.include_usage is true,
// According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
// The final usage chunk will be sent separately with empty choices
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
self
.object
.clone
(),
id
:
self
.id
.clone
(),
created
:
self
.created
,
object
:
self
.object
.clone
(),
model
:
self
.model
.clone
(),
created
:
self
.created
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
model
:
self
.model
.clone
(),
choices
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
:
if
self
.options.enable_usage
&&
self
.options.continuous_usage_stats
{
choices
,
Some
(
self
.get_usage
())
usage
:
if
self
.options.enable_usage
&&
self
.options.continuous_usage_stats
{
}
else
{
Some
(
self
.get_usage
())
None
}
else
{
None
},
service_tier
:
self
.service_tier
.clone
(),
},
},
service_tier
:
self
.service_tier
.clone
(),
nvext
:
None
,
// Will be populated by router layer if needed
nvext
:
None
,
// Will be populated by router layer if needed
}
}
}
}
...
@@ -303,15 +305,17 @@ impl DeltaGenerator {
...
@@ -303,15 +305,17 @@ impl DeltaGenerator {
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateChatCompletionStreamResponse
{
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateChatCompletionStreamResponse
{
let
usage
=
self
.get_usage
();
let
usage
=
self
.get_usage
();
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
self
.object
.clone
(),
id
:
self
.id
.clone
(),
created
:
self
.created
,
object
:
self
.object
.clone
(),
model
:
self
.model
.clone
(),
created
:
self
.created
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
model
:
self
.model
.clone
(),
choices
:
vec!
[],
// Empty choices for usage-only chunk
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
:
Some
(
usage
),
choices
:
vec!
[],
// Empty choices for usage-only chunk
service_tier
:
self
.service_tier
.clone
(),
usage
:
Some
(
usage
),
service_tier
:
self
.service_tier
.clone
(),
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/protocols/openai/chat_completions/jail.rs
View file @
2887cd1c
...
@@ -525,13 +525,13 @@ impl JailedStream {
...
@@ -525,13 +525,13 @@ impl JailedStream {
// Process each item in the stream
// Process each item in the stream
while
let
Some
(
response
)
=
stream
.next
()
.await
{
while
let
Some
(
response
)
=
stream
.next
()
.await
{
if
let
Some
(
chat_response
)
=
response
.data
.as_ref
()
{
if
let
Some
(
chat_response
)
=
response
.data
.as_ref
()
{
last_stream_id
.clone_from
(
&
chat_response
.id
);
last_stream_id
.clone_from
(
&
chat_response
.
inner.
id
);
last_stream_model
.clone_from
(
&
chat_response
.model
);
last_stream_model
.clone_from
(
&
chat_response
.
inner.
model
);
last_stream_created
=
chat_response
.created
;
last_stream_created
=
chat_response
.
inner.
created
;
let
mut
all_emissions
=
Vec
::
new
();
let
mut
all_emissions
=
Vec
::
new
();
if
chat_response
.choices
.is_empty
()
{
if
chat_response
.
inner.
choices
.is_empty
()
{
// No choices processed (e.g., usage-only chunk)
// No choices processed (e.g., usage-only chunk)
// Pass through as-is to preserve usage and other metadata
// Pass through as-is to preserve usage and other metadata
yield
response
;
yield
response
;
...
@@ -539,7 +539,7 @@ impl JailedStream {
...
@@ -539,7 +539,7 @@ impl JailedStream {
}
}
// Process each choice independently using the new architecture
// Process each choice independently using the new architecture
for
choice
in
&
chat_response
.choices
{
for
choice
in
&
chat_response
.
inner.
choices
{
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
// Jailing only applies to text content
// Jailing only applies to text content
let
text_content
=
match
content
{
let
text_content
=
match
content
{
...
@@ -676,14 +676,16 @@ impl JailedStream {
...
@@ -676,14 +676,16 @@ impl JailedStream {
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
// Create a finalization response carrying forward real stream metadata
// Create a finalization response carrying forward real stream metadata
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
id
:
last_stream_id
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
"chat.completion.chunk"
.to_string
(),
id
:
last_stream_id
,
created
:
last_stream_created
,
object
:
"chat.completion.chunk"
.to_string
(),
model
:
last_stream_model
,
created
:
last_stream_created
,
choices
:
Vec
::
new
(),
model
:
last_stream_model
,
usage
:
None
,
choices
:
Vec
::
new
(),
service_tier
:
None
,
usage
:
None
,
system_fingerprint
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -713,7 +715,7 @@ impl JailedStream {
...
@@ -713,7 +715,7 @@ impl JailedStream {
EmissionMode
::
Packed
=>
{
EmissionMode
::
Packed
=>
{
// Pack all choices into a single response
// Pack all choices into a single response
let
mut
response
=
base_response
.clone
();
let
mut
response
=
base_response
.clone
();
response
.choices
=
emissions
.into_iter
()
.map
(|
e
|
e
.into_choice
())
.collect
();
response
.
inner.
choices
=
emissions
.into_iter
()
.map
(|
e
|
e
.into_choice
())
.collect
();
vec!
[
Annotated
{
vec!
[
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -729,7 +731,7 @@ impl JailedStream {
...
@@ -729,7 +731,7 @@ impl JailedStream {
.into_iter
()
.into_iter
()
.map
(|
emission
|
{
.map
(|
emission
|
{
let
mut
response
=
base_response
.clone
();
let
mut
response
=
base_response
.clone
();
response
.choices
=
vec!
[
emission
.into_choice
()];
response
.
inner.
choices
=
vec!
[
emission
.into_choice
()];
Annotated
{
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -1013,7 +1015,7 @@ impl JailedStream {
...
@@ -1013,7 +1015,7 @@ impl JailedStream {
while
let
Some
(
mut
response
)
=
input_stream
.next
()
.await
{
while
let
Some
(
mut
response
)
=
input_stream
.next
()
.await
{
// Track if any choice emitted tool calls
// Track if any choice emitted tool calls
if
let
Some
(
ref
data
)
=
response
.data
{
if
let
Some
(
ref
data
)
=
response
.data
{
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
if
choice
.delta.tool_calls
.is_some
()
{
if
choice
.delta.tool_calls
.is_some
()
{
has_tool_calls_per_choice
.insert
(
choice
.index
,
true
);
has_tool_calls_per_choice
.insert
(
choice
.index
,
true
);
}
}
...
@@ -1022,7 +1024,7 @@ impl JailedStream {
...
@@ -1022,7 +1024,7 @@ impl JailedStream {
// Fix finish_reason based on jail mode and whether tool calls were emitted
// Fix finish_reason based on jail mode and whether tool calls were emitted
if
let
Some
(
ref
mut
data
)
=
response
.data
{
if
let
Some
(
ref
mut
data
)
=
response
.data
{
for
choice
in
&
mut
data
.choices
{
for
choice
in
&
mut
data
.
inner.
choices
{
if
let
Some
(
finish
)
=
choice
.finish_reason
{
if
let
Some
(
finish
)
=
choice
.finish_reason
{
// Only modify Stop finish reason, preserve Length/ContentFilter
// Only modify Stop finish reason, preserve Length/ContentFilter
if
finish
==
FinishReason
::
Stop
{
if
finish
==
FinishReason
::
Stop
{
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
2887cd1c
...
@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest {
...
@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest {
pub
struct
NvCreateCompletionResponse
{
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_async_openai
::
types
::
CreateCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
impl
ContentProvider
for
dynamo_async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_async_openai
::
types
::
Choice
{
...
@@ -296,9 +298,8 @@ impl ResponseFactory {
...
@@ -296,9 +298,8 @@ impl ResponseFactory {
choices
:
vec!
[
choice
],
choices
:
vec!
[
choice
],
system_fingerprint
:
self
.system_fingerprint
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
,
usage
,
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
}
}
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
2887cd1c
...
@@ -86,8 +86,8 @@ impl DeltaAggregator {
...
@@ -86,8 +86,8 @@ impl DeltaAggregator {
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
}
}
// Aggregate nvext field (take the last non-None value)
// Aggregate nvext field (take the last non-None value)
if
delta
.
inner.
nvext
.is_some
()
{
if
delta
.nvext
.is_some
()
{
aggregator
.nvext
=
delta
.
inner.
nvext
;
aggregator
.nvext
=
delta
.nvext
;
}
}
// handle the choices
// handle the choices
...
@@ -168,10 +168,12 @@ impl DeltaAggregator {
...
@@ -168,10 +168,12 @@ impl DeltaAggregator {
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
system_fingerprint
:
aggregator
.system_fingerprint
,
system_fingerprint
:
aggregator
.system_fingerprint
,
choices
,
choices
,
nvext
:
aggregator
.nvext
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
aggregator
.nvext
,
};
Ok
(
response
)
Ok
(
response
)
}
}
...
@@ -256,10 +258,9 @@ mod tests {
...
@@ -256,10 +258,9 @@ mod tests {
logprobs
,
logprobs
,
}],
}],
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
nvext
:
None
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
None
};
Annotated
{
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -387,10 +388,9 @@ mod tests {
...
@@ -387,10 +388,9 @@ mod tests {
},
},
],
],
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
nvext
:
None
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
None
};
let
annotated_delta
=
Annotated
{
let
annotated_delta
=
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
2887cd1c
...
@@ -218,10 +218,9 @@ impl DeltaGenerator {
...
@@ -218,10 +218,9 @@ impl DeltaGenerator {
}
else
{
}
else
{
None
None
},
},
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
/// Creates a final usage-only chunk for OpenAI compliance.
/// Creates a final usage-only chunk for OpenAI compliance.
...
@@ -240,10 +239,9 @@ impl DeltaGenerator {
...
@@ -240,10 +239,9 @@ impl DeltaGenerator {
system_fingerprint
:
self
.system_fingerprint
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[],
// Empty choices for usage-only chunk
choices
:
vec!
[],
// Empty choices for usage-only chunk
usage
:
Some
(
usage
),
usage
:
Some
(
usage
),
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
/// Check if usage tracking is enabled
/// Check if usage tracking is enabled
...
@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
...
@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
};
};
if
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
if
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
response
.
inner.
nvext
=
Some
(
nvext_json
);
response
.nvext
=
Some
(
nvext_json
);
if
let
Some
(
ref
info
)
=
worker_id_info
{
if
let
Some
(
ref
info
)
=
worker_id_info
{
tracing
::
debug!
(
tracing
::
debug!
(
"Injected worker_id into completions nvext: prefill={:?}, decode={:?}"
,
"Injected worker_id into completions nvext: prefill={:?}, decode={:?}"
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment