Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b6a3b0c6
"docs/features/disaggregated-serving/README.md" did not exist on "39d645e58647d6adb074650e46be5de25f3f3bc6"
Unverified
Commit
b6a3b0c6
authored
Apr 01, 2026
by
ishandhanani
Committed by
GitHub
Apr 01, 2026
Browse files
refactor(2/3): rename dynamo-async-openai to dynamo-protocols (#7565)
parent
c84c0934
Changes
155
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
314 additions
and
328 deletions
+314
-328
lib/llm/src/preprocessor/speculative_prefill.rs
lib/llm/src/preprocessor/speculative_prefill.rs
+1
-1
lib/llm/src/protocols/anthropic/stream_converter.rs
lib/llm/src/protocols/anthropic/stream_converter.rs
+15
-19
lib/llm/src/protocols/anthropic/types.rs
lib/llm/src/protocols/anthropic/types.rs
+23
-25
lib/llm/src/protocols/common.rs
lib/llm/src/protocols/common.rs
+10
-10
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+2
-2
lib/llm/src/protocols/common/preprocessor.rs
lib/llm/src/protocols/common/preprocessor.rs
+1
-1
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+5
-5
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+6
-6
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+80
-82
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+25
-27
lib/llm/src/protocols/openai/chat_completions/jail.rs
lib/llm/src/protocols/openai/chat_completions/jail.rs
+9
-9
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+30
-30
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+26
-26
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+12
-12
lib/llm/src/protocols/openai/embeddings.rs
lib/llm/src/protocols/openai/embeddings.rs
+4
-4
lib/llm/src/protocols/openai/embeddings/aggregator.rs
lib/llm/src/protocols/openai/embeddings/aggregator.rs
+6
-6
lib/llm/src/protocols/openai/images.rs
lib/llm/src/protocols/openai/images.rs
+3
-3
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+1
-1
lib/llm/src/protocols/openai/responses/mod.rs
lib/llm/src/protocols/openai/responses/mod.rs
+50
-54
lib/llm/src/protocols/openai/responses/stream_converter.rs
lib/llm/src/protocols/openai/responses/stream_converter.rs
+5
-5
No files found.
lib/llm/src/preprocessor/speculative_prefill.rs
View file @
b6a3b0c6
...
...
@@ -12,7 +12,7 @@ use std::pin::Pin;
use
std
::
sync
::
Arc
;
use
anyhow
::
Result
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
};
...
...
lib/llm/src/protocols/anthropic/stream_converter.rs
View file @
b6a3b0c6
...
...
@@ -10,7 +10,7 @@
use
std
::
collections
::
HashSet
;
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
uuid
::
Uuid
;
use
super
::
types
::{
...
...
@@ -136,17 +136,15 @@ impl AnthropicStreamConverter {
// Track finish reason
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
AnthropicStopReason
::
MaxTokens
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
}
});
...
...
@@ -478,17 +476,15 @@ impl AnthropicStreamConverter {
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
AnthropicStopReason
::
MaxTokens
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
}
});
...
...
@@ -734,7 +730,7 @@ impl AnthropicStreamConverter {
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
...
...
@@ -742,7 +738,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -777,7 +773,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -932,7 +928,7 @@ mod tests {
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
lib/llm/src/protocols/anthropic/types.rs
View file @
b6a3b0c6
...
...
@@ -3,15 +3,15 @@
//! Anthropic Messages API conversion logic.
//!
//! Pure protocol types live in `dynamo_
async_openai
::types::anthropic`.
//! Pure protocol types live in `dynamo_
protocols
::types::anthropic`.
//! This module provides bidirectional conversion to/from the internal
//! chat completions format used by the Dynamo engine.
// Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
// continues to work throughout dynamo-llm.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::
*
;
pub
use
dynamo_
protocols
::
types
::
anthropic
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
...
@@ -96,10 +96,10 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
// Convert stop_sequences -> stop
let
stop
=
req
.stop_sequences
.map
(
dynamo_
async_openai
::
types
::
Stop
::
StringArray
);
.map
(
dynamo_
protocols
::
types
::
Stop
::
StringArray
);
Ok
(
NvCreateChatCompletionRequest
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
{
messages
,
model
:
req
.model
,
temperature
:
req
.temperature
,
...
...
@@ -109,7 +109,7 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
tools
,
tool_choice
,
stream
:
Some
(
true
),
// Always stream internally
stream_options
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
stream_options
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
}),
...
...
@@ -350,7 +350,7 @@ fn convert_assistant_blocks(
tool_calls
.push
(
ChatCompletionMessageToolCall
{
id
:
id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
name
.clone
(),
arguments
:
serde_json
::
to_string
(
input
)
.unwrap_or_default
(),
},
...
...
@@ -487,11 +487,11 @@ pub fn chat_completion_to_anthropic_response(
if
let
Some
(
choice
)
=
choice
{
// Map finish_reason
stop_reason
=
choice
.finish_reason
.map
(|
fr
|
match
fr
{
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
});
// Extract tool calls
...
...
@@ -523,8 +523,8 @@ pub fn chat_completion_to_anthropic_response(
// Extract text content
let
text
=
match
choice
.message.content
{
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
"Multimodal (Parts) content in chat completion response replaced with placeholder text in Anthropic conversion."
);
...
...
@@ -821,24 +821,22 @@ mod tests {
#[test]
fn
test_chat_completion_to_anthropic_response
()
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"Hello!"
.to_string
(),
),
),
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
"Hello!"
.to_string
(),
)),
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
}],
...
...
@@ -847,7 +845,7 @@ mod tests {
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
usage
:
Some
(
dynamo_
async_openai
::
types
::
CompletionUsage
{
usage
:
Some
(
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
10
,
completion_tokens
:
5
,
total_tokens
:
15
,
...
...
lib/llm/src/protocols/common.rs
View file @
b6a3b0c6
...
...
@@ -90,27 +90,27 @@ impl std::str::FromStr for FinishReason {
}
}
impl
From
<
FinishReason
>
for
dynamo_
async_openai
::
types
::
CompletionFinishReason
{
impl
From
<
FinishReason
>
for
dynamo_
protocols
::
types
::
CompletionFinishReason
{
fn
from
(
reason
:
FinishReason
)
->
Self
{
match
reason
{
FinishReason
::
EoS
|
FinishReason
::
Stop
|
FinishReason
::
Cancelled
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
}
FinishReason
::
ContentFilter
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
}
FinishReason
::
Length
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Error
(
_
)
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
,
FinishReason
::
Length
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Error
(
_
)
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
,
}
}
}
impl
From
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
for
FinishReason
{
fn
from
(
reason
:
dynamo_
async_openai
::
types
::
CompletionFinishReason
)
->
Self
{
impl
From
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
for
FinishReason
{
fn
from
(
reason
:
dynamo_
protocols
::
types
::
CompletionFinishReason
)
->
Self
{
match
reason
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
FinishReason
::
ContentFilter
}
}
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
b6a3b0c6
...
...
@@ -6,8 +6,8 @@ use serde::{Deserialize, Serialize};
pub
use
super
::
FinishReason
;
pub
use
super
::
preprocessor
::
PreprocessedRequest
;
use
crate
::
protocols
::
TokenIdType
;
use
dynamo_
async_openai
::
types
::
CompletionUsage
;
use
dynamo_
async_openai
::
types
::
StopReason
;
use
dynamo_
protocols
::
types
::
CompletionUsage
;
use
dynamo_
protocols
::
types
::
StopReason
;
use
dynamo_runtime
::
error
::
DynamoError
;
use
dynamo_runtime
::
protocols
::
maybe_error
::
MaybeError
;
...
...
lib/llm/src/protocols/common/preprocessor.rs
View file @
b6a3b0c6
...
...
@@ -86,7 +86,7 @@ pub struct PrefillResult {
pub
disaggregated_params
:
serde_json
::
Value
,
/// Prompt token details produced during prefill
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
prompt_tokens_details
:
Option
<
dynamo_
async_openai
::
types
::
PromptTokensDetails
>
,
pub
prompt_tokens_details
:
Option
<
dynamo_
protocols
::
types
::
PromptTokensDetails
>
,
}
/// Optional multimodal routing-only data.
...
...
lib/llm/src/protocols/openai.rs
View file @
b6a3b0c6
...
...
@@ -230,15 +230,15 @@ pub(crate) fn convert_backend_top_logprobs(
selected_token
:
&
str
,
selected_token_id
:
TokenIdType
,
selected_logprob
:
f32
,
)
->
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
{
)
->
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
{
let
mut
found_selected
=
false
;
let
mut
result
:
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
=
top_lps
let
mut
result
:
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
dynamo_
async_openai
::
types
::
TopLogprobs
{
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
tok
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
,
...
...
@@ -247,7 +247,7 @@ pub(crate) fn convert_backend_top_logprobs(
.collect
();
if
!
found_selected
{
result
.push
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
result
.push
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
selected_token
.to_string
(),
logprob
:
selected_logprob
,
bytes
:
token_to_utf8_bytes
(
selected_token
),
...
...
@@ -277,7 +277,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + 'static + std::fmt::Debug>:
fn
is_continuous_usage_enabled
(
&
self
)
->
bool
;
/// Get the current usage statistics with properly calculated total_tokens.
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
;
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
;
/// Returns the request tracker if available, for accessing worker timing metrics.
fn
tracker
(
&
self
)
->
Option
<
std
::
sync
::
Arc
<
common
::
timing
::
RequestTracker
>>
{
...
...
lib/llm/src/protocols/openai/chat_completions.rs
View file @
b6a3b0c6
...
...
@@ -35,7 +35,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateChatCompletionRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
,
#[serde(flatten,
default)]
pub
common
:
CommonExt
,
...
...
@@ -68,7 +68,7 @@ pub struct NvCreateChatCompletionRequest {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
...
...
@@ -78,7 +78,7 @@ pub struct NvCreateChatCompletionResponse {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionStreamResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
...
...
@@ -202,7 +202,7 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
if
let
Some
(
response_format
)
=
self
.inner.response_format
.as_ref
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
ResponseFormat
;
match
response_format
{
ResponseFormat
::
Text
=>
{}
ResponseFormat
::
JsonObject
=>
{
...
...
@@ -289,8 +289,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
/// * `None` if no stop conditions are defined.
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
self
.inner.stop
.as_ref
()
.map
(|
stop
|
match
stop
{
dynamo_
async_openai
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
async_openai
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
dynamo_
protocols
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
protocols
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
})
}
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
b6a3b0c6
...
...
@@ -12,7 +12,7 @@ use crate::protocols::{
openai
::
ParsingOptions
,
};
use
dynamo_
async_openai
::
types
::{
ChatCompletionMessageContent
,
StopReason
};
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageContent
,
StopReason
};
use
dynamo_runtime
::
engine
::
DataStream
;
/// Aggregates a stream of [`NvCreateChatCompletionStreamResponse`]s into a single
...
...
@@ -26,7 +26,7 @@ pub struct DeltaAggregator {
/// Timestamp (Unix epoch) indicating when the response was created.
created
:
u32
,
/// Optional usage statistics for the completion request.
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
/// Map of incremental response choices, keyed by index.
...
...
@@ -34,7 +34,7 @@ pub struct DeltaAggregator {
/// Optional error message if an error occurs during aggregation.
error
:
Option
<
String
>
,
/// Optional service tier information for the response.
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
protocols
::
types
::
ServiceTierResponse
>
,
/// Aggregated nvext field from stream responses
nvext
:
Option
<
serde_json
::
Value
>
,
}
...
...
@@ -47,21 +47,21 @@ struct DeltaChoice {
/// The accumulated text content for the choice.
text
:
String
,
/// The role associated with this message (e.g., `system`, `user`, `assistant`).
role
:
Option
<
dynamo_
async_openai
::
types
::
Role
>
,
role
:
Option
<
dynamo_
protocols
::
types
::
Role
>
,
/// The reason the completion was finished (if applicable).
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
FinishReason
>
,
/// The stop string or token that triggered the stop condition.
stop_reason
:
Option
<
StopReason
>
,
/// Optional log probabilities for the chat choice.
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
,
// Optional tool calls for the chat choice.
tool_calls
:
Option
<
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCall
>>
,
tool_calls
:
Option
<
Vec
<
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCall
>>
,
/// Optional reasoning content for the chat choice.
reasoning_content
:
Option
<
String
>
,
/// Accumulated content parts for multimodal responses
content_parts
:
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionResponseContentPart
>
,
content_parts
:
Vec
<
dynamo_
protocols
::
types
::
ChatCompletionResponseContentPart
>
,
}
impl
Default
for
DeltaAggregator
{
...
...
@@ -72,15 +72,15 @@ impl Default for DeltaAggregator {
}
fn
convert_tool_chunk_to_message_tool_call
(
chunk
:
&
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCallChunk
,
)
->
Option
<
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCall
>
{
chunk
:
&
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCallChunk
,
)
->
Option
<
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCall
>
{
// Convert ChatCompletionMessageToolCallChunk to ChatCompletionMessageToolCall
if
let
(
Some
(
id
),
Some
(
r
#
type
),
Some
(
function
))
=
(
&
chunk
.id
,
&
chunk
.r
#
type
,
&
chunk
.function
)
{
if
let
(
Some
(
name
),
Some
(
arguments
))
=
(
&
function
.name
,
&
function
.arguments
)
{
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCall
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCall
{
id
:
id
.clone
(),
r
#
type
:
r
#
type
.clone
(),
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
name
.clone
(),
arguments
:
arguments
.clone
(),
},
...
...
@@ -197,7 +197,7 @@ impl DeltaAggregator {
{
// Convert ChatCompletionMessageToolCallChunk to ChatCompletionMessageToolCall
let
converted_tool_calls
:
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCall
,
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCall
,
>
=
tool_calls
.iter
()
.filter_map
(
convert_tool_chunk_to_message_tool_call
)
...
...
@@ -227,7 +227,7 @@ impl DeltaAggregator {
// Update logprobs
if
let
Some
(
logprobs
)
=
&
choice
.logprobs
{
let
state_lps
=
state_choice
.logprobs
.get_or_insert
(
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
{
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
{
content
:
None
,
refusal
:
None
,
},
...
...
@@ -260,14 +260,14 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.into_values
()
.map
(
dynamo_
async_openai
::
types
::
ChatChoice
::
from
)
.map
(
dynamo_
protocols
::
types
::
ChatChoice
::
from
)
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Construct the final response object.
let
response
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
aggregator
.id
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
...
...
@@ -285,8 +285,8 @@ impl DeltaAggregator {
}
#[allow(deprecated)]
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
ChatChoice
{
/// Converts a [`DeltaChoice`] into an [`dynamo_
async_openai
::types::ChatChoice`].
impl
From
<
DeltaChoice
>
for
dynamo_
protocols
::
types
::
ChatChoice
{
/// Converts a [`DeltaChoice`] into an [`dynamo_
protocols
::types::ChatChoice`].
///
/// # Note
/// The `function_call` field is deprecated.
...
...
@@ -297,7 +297,7 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
.as_ref
()
.is_some_and
(|
calls
|
!
calls
.is_empty
())
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
}
else
{
delta
.finish_reason
};
...
...
@@ -313,8 +313,8 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
None
};
dynamo_
async_openai
::
types
::
ChatChoice
{
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
dynamo_
protocols
::
types
::
ChatChoice
{
message
:
dynamo_
protocols
::
types
::
ChatCompletionResponseMessage
{
role
:
delta
.role
.expect
(
"delta should have a Role"
),
content
,
tool_calls
:
delta
.tool_calls
,
...
...
@@ -390,8 +390,8 @@ mod tests {
fn
create_test_delta
(
index
:
u32
,
text
:
&
str
,
role
:
Option
<
dynamo_
async_openai
::
types
::
Role
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
role
:
Option
<
dynamo_
protocols
::
types
::
Role
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
FinishReason
>
,
logprob
:
Option
<
f32
>
,
tool_calls
:
Option
<&
str
>
,
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
...
...
@@ -402,11 +402,11 @@ mod tests {
let
tool_call_chunks
=
if
let
Some
(
tool_calls
)
=
tool_calls
{
Some
(
vec!
[
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCallChunk
{
dynamo_
protocols
::
types
::
ChatCompletionMessageToolCallChunk
{
index
:
0
,
id
:
Some
(
"test_id"
.to_string
()),
r
#
type
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolType
::
Function
),
function
:
Some
(
dynamo_
async_openai
::
types
::
FunctionCallStream
{
r
#
type
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionToolType
::
Function
),
function
:
Some
(
dynamo_
protocols
::
types
::
FunctionCallStream
{
name
:
tool_calls
[
"name"
]
.as_str
()
.map
(|
s
|
s
.to_string
()),
arguments
:
Some
(
serde_json
::
to_string
(
&
tool_calls
[
"arguments"
])
.unwrap
()),
}),
...
...
@@ -416,7 +416,7 @@ mod tests {
None
};
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
let
delta
=
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.to_string
())),
function_call
:
None
,
tool_calls
:
tool_call_chunks
,
...
...
@@ -426,19 +426,17 @@ mod tests {
};
let
logprobs
=
logprob
.map
(|
lp
|
{
let
token
=
text
.to_string
();
dynamo_async_openai
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
token
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
&
token
),
top_logprobs
:
vec!
[],
},
]),
dynamo_protocols
::
types
::
ChatChoiceLogprobs
{
content
:
Some
(
vec!
[
dynamo_protocols
::
types
::
ChatCompletionTokenLogprob
{
token
:
token
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
&
token
),
top_logprobs
:
vec!
[],
}]),
refusal
:
None
,
}
});
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
,
delta
,
finish_reason
,
...
...
@@ -447,7 +445,7 @@ mod tests {
};
let
data
=
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
created
:
1234567890
,
...
...
@@ -498,7 +496,7 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
User
),
Some
(
dynamo_
protocols
::
types
::
Role
::
User
),
None
,
None
,
None
,
...
...
@@ -528,7 +526,7 @@ mod tests {
&
ChatCompletionMessageContent
::
Text
(
"Hello,"
.to_string
())
);
assert
!
(
choice
.finish_reason
.is_none
());
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_
protocols
::
types
::
Role
::
User
);
assert
!
(
response
.inner.service_tier
.is_none
());
}
...
...
@@ -540,7 +538,7 @@ mod tests {
let
annotated_delta1
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
User
),
Some
(
dynamo_
protocols
::
types
::
Role
::
User
),
None
,
Some
(
-
0.1
),
None
,
...
...
@@ -549,7 +547,7 @@ mod tests {
0
,
" world!"
,
None
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
Some
(
-
0.2
),
None
,
);
...
...
@@ -575,9 +573,9 @@ mod tests {
);
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_
protocols
::
types
::
Role
::
User
);
assert_eq!
(
choice
.logprobs
...
...
@@ -608,7 +606,7 @@ mod tests {
let
annotated_delta1
=
create_test_delta
(
0
,
"Hello"
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
User
),
Some
(
dynamo_
protocols
::
types
::
Role
::
User
),
None
,
None
,
None
,
...
...
@@ -619,7 +617,7 @@ mod tests {
0
,
"world"
,
None
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
None
,
None
,
);
...
...
@@ -646,9 +644,9 @@ mod tests {
);
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_
protocols
::
types
::
Role
::
User
);
}
#[allow(deprecated)]
...
...
@@ -657,7 +655,7 @@ mod tests {
// Create a delta with multiple choices
// ALLOW: function_call is deprecated
let
data
=
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
model
:
"test_model"
.to_string
(),
created
:
1234567890
,
...
...
@@ -665,10 +663,10 @@ mod tests {
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
:
0
,
delta
:
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 0"
.to_string
(),
)),
...
...
@@ -677,14 +675,14 @@ mod tests {
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
:
1
,
delta
:
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 1"
.to_string
(),
)),
...
...
@@ -693,7 +691,7 @@ mod tests {
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
...
...
@@ -731,11 +729,11 @@ mod tests {
);
assert_eq!
(
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice0
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
Assistant
dynamo_
protocols
::
types
::
Role
::
Assistant
);
let
choice1
=
&
response
.inner.choices
[
1
];
...
...
@@ -746,11 +744,11 @@ mod tests {
);
assert_eq!
(
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice1
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
Assistant
dynamo_
protocols
::
types
::
Role
::
Assistant
);
}
...
...
@@ -763,8 +761,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"I'll check the weather for you."
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
// Original finish reason is Stop
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
// Original finish reason is Stop
None
,
Some
(
tool_call_json
),
);
...
...
@@ -794,7 +792,7 @@ mod tests {
// Most importantly, verify that finish reason was overridden to ToolCalls despite original being Stop
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
);
}
...
...
@@ -806,8 +804,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"Let me search for that."
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
),
// Original finish reason is Length
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
),
// Original finish reason is Length
None
,
Some
(
tool_call_json
),
);
...
...
@@ -837,7 +835,7 @@ mod tests {
// Verify that finish reason was overridden to ToolCalls despite original being Length
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
);
}
...
...
@@ -849,7 +847,7 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"I'll calculate that for you."
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
None
,
// Original finish reason is None
None
,
Some
(
tool_call_json
),
...
...
@@ -880,7 +878,7 @@ mod tests {
// Verify that finish reason was set to ToolCalls despite original being None
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
);
}
...
...
@@ -890,8 +888,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"This is a regular response without tool calls."
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
None
,
None
,
// No tool calls
);
...
...
@@ -919,7 +917,7 @@ mod tests {
// Verify that original finish reason (Stop) is preserved
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
);
}
...
...
@@ -930,8 +928,8 @@ mod tests {
let
mut
annotated_delta
=
create_test_delta
(
0
,
"Response with empty tool calls array."
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
),
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
),
None
,
None
,
);
...
...
@@ -964,7 +962,7 @@ mod tests {
// Verify that original finish reason (Length) is preserved since tool calls are empty
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
)
);
}
...
...
@@ -977,8 +975,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"Hey Dude ! What's the weather in San Francisco in Fahrenheit?"
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
),
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
),
None
,
Some
(
tool_call_json
),
);
...
...
@@ -1027,11 +1025,11 @@ mod tests {
// The finish_reason should be ToolCalls
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
);
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
Assistant
dynamo_
protocols
::
types
::
Role
::
Assistant
);
}
...
...
@@ -1044,8 +1042,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
"Getting weather for New York"
,
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
// This should be overridden
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
// This should be overridden
None
,
Some
(
tool_call_json
),
);
...
...
@@ -1066,7 +1064,7 @@ mod tests {
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
)
);
// Verify tool calls are present
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
b6a3b0c6
...
...
@@ -32,7 +32,7 @@ impl NvCreateChatCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
});
...
...
@@ -116,9 +116,9 @@ pub struct DeltaGenerator {
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
/// Optional service tier information for the response.
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
protocols
::
types
::
ServiceTierResponse
>
,
/// Tracks token usage for the completion request.
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
/// Counter tracking the number of messages issued.
msg_counter
:
u64
,
/// Configuration options for response generation.
...
...
@@ -147,7 +147,7 @@ impl DeltaGenerator {
// but this will not be an issue until 2106.
let
now
:
u32
=
now
.try_into
()
.expect
(
"timestamp exceeds u32::MAX"
);
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
0
,
completion_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -194,7 +194,7 @@ impl DeltaGenerator {
token_ids
:
&
[
TokenIdType
],
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -216,7 +216,7 @@ impl DeltaGenerator {
.zip
(
top_logprobs
)
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
dynamo_
async_openai
::
types
::
ChatCompletionTokenLogprob
{
dynamo_
protocols
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
t
),
...
...
@@ -226,7 +226,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
{
Some
(
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
{
content
,
refusal
:
None
,
})
...
...
@@ -242,22 +242,22 @@ impl DeltaGenerator {
/// * `stop_reason` - Optional stop string or token that triggered the stop.
///
/// # Returns
/// * An [`dynamo_
async_openai
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
/// * An [`dynamo_
protocols
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
#[allow(deprecated)]
pub
fn
create_choice
(
&
mut
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
)
->
NvCreateChatCompletionStreamResponse
{
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
.map
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
),
let
delta
=
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
.map
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
),
function_call
:
None
,
tool_calls
:
None
,
role
:
if
self
.msg_counter
==
0
{
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
)
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
)
}
else
{
None
},
...
...
@@ -265,7 +265,7 @@ impl DeltaGenerator {
reasoning_content
:
None
,
};
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
,
delta
,
finish_reason
,
...
...
@@ -279,7 +279,7 @@ impl DeltaGenerator {
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -306,7 +306,7 @@ impl DeltaGenerator {
let
usage
=
self
.get_usage
();
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -330,7 +330,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
...
...
@@ -387,18 +387,16 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
// Map backend finish reasons to OpenAI's finish reasons.
let
finish_reason
=
match
delta
.finish_reason
{
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
{
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Length
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
)
}
Some
(
common
::
FinishReason
::
Cancelled
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
ContentFilter
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
)
}
Some
(
common
::
FinishReason
::
Error
(
err_msg
))
=>
{
return
Err
(
anyhow
::
anyhow!
(
err_msg
));
...
...
@@ -490,7 +488,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
}
...
...
@@ -502,7 +500,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessage
,
ChatCompletionRequestUserMessageContent
,
CreateChatCompletionRequest
,
};
...
...
lib/llm/src/protocols/openai/chat_completions/jail.rs
View file @
b6a3b0c6
...
...
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
use
async_stream
::
stream
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceLogprobs
,
ChatChoiceStream
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
FinishReason
,
FunctionCallStream
,
Role
,
};
...
...
@@ -116,7 +116,7 @@ fn create_choice_stream(
content
:
&
str
,
tool_calls
:
Option
<
Vec
<
ChatCompletionMessageToolCallChunk
>>
,
finish_reason
:
Option
<
FinishReason
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
logprobs
:
Option
<
ChatChoiceLogprobs
>
,
)
->
ChatChoiceStream
{
#[allow(deprecated)]
...
...
@@ -124,9 +124,9 @@ fn create_choice_stream(
index
,
delta
:
ChatCompletionStreamResponseDelta
{
role
,
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
.to_string
()
)
,
),
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
content
.to_string
(),
)
),
tool_calls
,
function_call
:
None
,
refusal
:
None
,
...
...
@@ -543,8 +543,8 @@ impl JailedStream {
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
// Jailing only applies to text content
let
text_content
=
match
content
{
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
};
if
let
Some
(
text
)
=
text_content
{
...
...
@@ -676,7 +676,7 @@ impl JailedStream {
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
// Create a finalization response carrying forward real stream metadata
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
last_stream_id
,
object
:
"chat.completion.chunk"
.to_string
(),
created
:
last_stream_created
,
...
...
@@ -932,7 +932,7 @@ impl JailedStream {
ChatCompletionMessageToolCallChunk
{
index
,
id
:
Some
(
format!
(
"call-{}"
,
Uuid
::
new_v4
())),
r
#
type
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolType
::
Function
),
r
#
type
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionToolType
::
Function
),
function
:
Some
(
FunctionCallStream
{
name
:
Some
(
name
),
arguments
:
Some
(
arguments
),
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
b6a3b0c6
...
...
@@ -27,7 +27,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionRequest
,
#[serde(flatten)]
pub
common
:
CommonExt
,
...
...
@@ -47,27 +47,27 @@ pub struct NvCreateCompletionRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
impl
ContentProvider
for
dynamo_
async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_
protocols
::
types
::
Choice
{
fn
content
(
&
self
)
->
String
{
self
.text
.clone
()
}
}
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
String
{
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
String
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
&
num
|
num
.to_string
())
.collect
::
<
Vec
<
_
>>
()
.join
(
" "
),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
inner
|
{
inner
...
...
@@ -82,12 +82,12 @@ pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
}
/// Get the batch size from a prompt (1 for single prompts, array length for batch prompts)
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
usize
{
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
usize
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
}
}
...
...
@@ -95,21 +95,21 @@ pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usi
/// For single prompts, returns a clone regardless of index.
/// For batch prompts, returns the prompt at the specified index.
pub
fn
extract_single_prompt
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
,
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
,
index
:
usize
,
)
->
dynamo_
async_openai
::
types
::
Prompt
{
)
->
dynamo_
protocols
::
types
::
Prompt
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
}
}
}
...
...
@@ -241,7 +241,7 @@ impl OpenAIStopConditionsProvider for NvCreateCompletionRequest {
}
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
use
dynamo_
async_openai
::
types
::
Stop
;
use
dynamo_
protocols
::
types
::
Stop
;
self
.inner.stop
.as_ref
()
.map
(|
s
|
match
s
{
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
...
...
@@ -287,10 +287,10 @@ impl ResponseFactory {
pub
fn
make_response
(
&
self
,
choice
:
dynamo_
async_openai
::
types
::
Choice
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
choice
:
dynamo_
protocols
::
types
::
Choice
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
)
->
NvCreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -361,7 +361,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
}
}
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
protocols
::
types
::
Choice
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
common
::
StreamingCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
...
...
@@ -382,10 +382,10 @@ impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types
// TODO handle aggregating logprobs
let
logprobs
=
None
;
let
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
=
let
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
=
response
.delta.finish_reason
.map
(
Into
::
into
);
let
choice
=
dynamo_
async_openai
::
types
::
Choice
{
let
choice
=
dynamo_
protocols
::
types
::
Choice
{
text
,
index
,
logprobs
,
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
b6a3b0c6
...
...
@@ -20,7 +20,7 @@ pub struct DeltaAggregator {
id
:
String
,
model
:
String
,
created
:
u32
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
system_fingerprint
:
Option
<
String
>
,
choices
:
HashMap
<
u32
,
DeltaChoice
>
,
error
:
Option
<
String
>
,
...
...
@@ -31,7 +31,7 @@ struct DeltaChoice {
index
:
u32
,
text
:
String
,
finish_reason
:
Option
<
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
}
impl
Default
for
DeltaAggregator
{
...
...
@@ -109,14 +109,14 @@ impl DeltaAggregator {
// Handle CompletionFinishReason -> FinishReason conversation
state_choice
.finish_reason
=
match
choice
.finish_reason
{
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
FinishReason
::
Stop
)
}
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
FinishReason
::
Length
)
}
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
,
)
=>
Some
(
FinishReason
::
ContentFilter
),
None
=>
None
,
};
...
...
@@ -124,7 +124,7 @@ impl DeltaAggregator {
// Update logprobs
if
let
Some
(
logprobs
)
=
&
choice
.logprobs
{
let
state_lps
=
state_choice
.logprobs
.get_or_insert
(
dynamo_
async_openai
::
types
::
Logprobs
{
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
Vec
::
new
(),
token_logprobs
:
Vec
::
new
(),
top_logprobs
:
Vec
::
new
(),
...
...
@@ -155,12 +155,12 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.into_values
()
.map
(
dynamo_
async_openai
::
types
::
Choice
::
from
)
.map
(
dynamo_
protocols
::
types
::
Choice
::
from
)
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
aggregator
.id
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
...
...
@@ -179,11 +179,11 @@ impl DeltaAggregator {
}
}
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
From
<
DeltaChoice
>
for
dynamo_
protocols
::
types
::
Choice
{
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
let
finish_reason
=
delta
.finish_reason
.map
(
Into
::
into
);
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
delta
.index
,
text
:
delta
.text
,
finish_reason
,
...
...
@@ -231,11 +231,11 @@ mod tests {
.and_then
(|
s
|
FinishReason
::
from_str
(
s
)
.ok
())
.map
(
Into
::
into
);
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
async_openai
::
types
::
Logprobs
{
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
vec!
[
text
.to_string
()],
token_logprobs
:
vec!
[
Some
(
lp
)],
top_logprobs
:
vec!
[
serde_json
::
to_value
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
serde_json
::
to_value
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
text
.to_string
(),
logprob
:
lp
,
bytes
:
None
,
...
...
@@ -245,13 +245,13 @@ mod tests {
text_offset
:
vec!
[
0
],
});
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
index
,
text
:
text
.to_string
(),
finish_reason
,
...
...
@@ -319,11 +319,11 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello,"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
assert
!
(
choice
.logprobs
.is_none
());
}
...
...
@@ -355,7 +355,7 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello, world!"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice
.logprobs
.as_ref
()
.unwrap
()
.tokens
.len
(),
2
);
assert_eq!
(
...
...
@@ -367,23 +367,23 @@ mod tests {
#[tokio::test]
async
fn
test_multiple_choices
()
{
// Create a delta with multiple choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
0
,
text
:
"Choice 0"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
1
,
text
:
"Choice 1"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
],
...
...
@@ -418,11 +418,11 @@ mod tests {
assert_eq!
(
choice0
.text
,
"Choice 0"
.to_string
());
assert_eq!
(
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
let
choice1
=
&
response
.inner.choices
[
1
];
...
...
@@ -430,11 +430,11 @@ mod tests {
assert_eq!
(
choice1
.text
,
"Choice 1"
.to_string
());
assert_eq!
(
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
}
}
lib/llm/src/protocols/openai/completions/delta.rs
View file @
b6a3b0c6
...
...
@@ -31,7 +31,7 @@ impl NvCreateCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
});
...
...
@@ -95,7 +95,7 @@ pub struct DeltaGenerator {
created
:
u32
,
model
:
String
,
system_fingerprint
:
Option
<
String
>
,
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
options
:
DeltaGeneratorOptions
,
tracker
:
Option
<
Arc
<
RequestTracker
>>
,
}
...
...
@@ -113,7 +113,7 @@ impl DeltaGenerator {
// Previously, our home-rolled CompletionUsage impl'd Default
// PR !387 - https://github.com/64bit/async-openai/pull/387
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
completion_tokens
:
0
,
prompt_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -154,7 +154,7 @@ impl DeltaGenerator {
token_ids
:
Vec
<
TokenIdType
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -181,7 +181,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
dynamo_
async_openai
::
types
::
Logprobs
{
Some
(
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
toks
.iter
()
.map
(|(
t
,
_
)|
t
.clone
())
.collect
(),
token_logprobs
:
tok_lps
.into_iter
()
.map
(
Some
)
.collect
(),
text_offset
:
vec!
[],
...
...
@@ -193,21 +193,21 @@ impl DeltaGenerator {
&
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
)
->
NvCreateCompletionResponse
{
// todo - update for tool calling
// According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
model
:
self
.model
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
text
:
text
.unwrap_or_default
(),
index
,
finish_reason
,
...
...
@@ -231,7 +231,7 @@ impl DeltaGenerator {
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateCompletionResponse
{
let
usage
=
self
.get_usage
();
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -254,7 +254,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
...
...
@@ -377,7 +377,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
}
...
...
lib/llm/src/protocols/openai/embeddings.rs
View file @
b6a3b0c6
...
...
@@ -15,7 +15,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -30,17 +30,17 @@ pub struct NvCreateEmbeddingRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
,
}
impl
NvCreateEmbeddingResponse
{
pub
fn
empty
()
->
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"embedding"
.to_string
(),
data
:
vec!
[],
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
:
0
,
total_tokens
:
0
,
},
...
...
lib/llm/src/protocols/openai/embeddings/aggregator.rs
View file @
b6a3b0c6
...
...
@@ -133,16 +133,16 @@ mod tests {
use
futures
::
stream
;
fn
create_test_embedding_response
(
embeddings
:
Vec
<
dynamo_
async_openai
::
types
::
Embedding
>
,
embeddings
:
Vec
<
dynamo_
protocols
::
types
::
Embedding
>
,
prompt_tokens
:
u32
,
total_tokens
:
u32
,
)
->
Annotated
<
NvCreateEmbeddingResponse
>
{
let
response
=
NvCreateEmbeddingResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"test-model"
.to_string
(),
data
:
embeddings
,
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
,
total_tokens
,
},
...
...
@@ -166,7 +166,7 @@ mod tests {
#[tokio::test]
async
fn
test_single_embedding
()
{
let
embedding
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
...
...
@@ -188,13 +188,13 @@ mod tests {
#[tokio::test]
async
fn
test_multiple_embeddings
()
{
let
embedding1
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding1
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
};
let
embedding2
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding2
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
1
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.4
,
0.5
,
0.6
],
...
...
lib/llm/src/protocols/openai/images.rs
View file @
b6a3b0c6
...
...
@@ -14,7 +14,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateImageRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateImageRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateImageRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -28,13 +28,13 @@ pub struct NvCreateImageRequest {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvImagesResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
,
}
impl
NvImagesResponse
{
pub
fn
empty
()
->
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
{
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
{
created
:
0
,
data
:
vec!
[],
},
...
...
lib/llm/src/protocols/openai/nvext.rs
View file @
b6a3b0c6
...
...
@@ -216,7 +216,7 @@ pub struct AgentHints {
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
pub
use
dynamo_
protocols
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
impl
Default
for
NvExt
{
fn
default
()
->
Self
{
...
...
lib/llm/src/protocols/openai/responses/mod.rs
View file @
b6a3b0c6
...
...
@@ -3,7 +3,7 @@
pub
mod
stream_converter
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionCallOutput
,
FunctionToolCall
,
IncludeEnum
,
InputContent
,
InputItem
,
InputParam
,
InputRole
,
InputTokenDetails
,
Instructions
,
Item
,
MessageItem
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
...
...
@@ -11,7 +11,7 @@ use dynamo_async_openai::types::responses::{
ServiceTier
,
Status
,
Summary
,
SummaryPart
,
TextResponseFormatConfiguration
,
Tool
,
ToolChoiceOptions
,
ToolChoiceParam
,
Truncation
,
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
...
@@ -38,7 +38,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
pub
struct
NvCreateResponse
{
/// Flattened CreateResponse fields (model, input, temperature, etc.)
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
CreateResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
CreateResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -48,7 +48,7 @@ pub struct NvCreateResponse {
pub
struct
NvResponse
{
/// Flattened Response fields.
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
Response
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
Response
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -144,12 +144,12 @@ impl OpenAIStopConditionsProvider for NvCreateResponse {
/// Convert a Responses API ImageDetail to the Chat Completions ImageDetail.
fn
convert_image_detail
(
detail
:
&
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
,
detail
:
&
dynamo_
protocols
::
types
::
responses
::
ImageDetail
,
)
->
ChatImageDetail
{
match
detail
{
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
}
}
...
...
@@ -316,7 +316,7 @@ fn convert_input_items_to_messages(
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
fc
.call_id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
fc
.name
.clone
(),
arguments
:
fc
.arguments
.clone
(),
},
...
...
@@ -349,10 +349,10 @@ fn convert_input_items_to_messages(
InputItem
::
EasyMessage
(
easy
)
=>
{
// Handle easy input messages based on role
let
content_text
=
match
&
easy
.content
{
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
text
.clone
()
}
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
convert_input_content_to_text
(
parts
)
}
};
...
...
@@ -737,10 +737,8 @@ pub fn chat_completion_to_response(
// Handle text content -- also parse <tool_call> blocks from models
// that emit tool calls as text (e.g. Qwen3)
let
content_text
=
match
choice
.message.content
{
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
{
Some
(
text
)
}
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
Some
(
text
),
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
"Multimodal content in responses API not yet supported, using placeholder"
);
...
...
@@ -880,12 +878,12 @@ pub fn chat_completion_to_response(
#[cfg(test)]
mod
tests
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
CreateResponse
,
FunctionCallOutput
,
FunctionCallOutputItemParam
,
FunctionTool
,
FunctionToolCall
,
ImageDetail
,
InputContent
,
InputImageContent
,
InputItem
,
InputMessage
,
InputParam
,
InputRole
,
InputTextContent
,
Item
,
MessageItem
,
Tool
,
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessageContent
,
};
...
...
@@ -1167,19 +1165,17 @@ mod tests {
fn
test_into_nvresponse_from_chat_response
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
),
),
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
)),
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
...
...
@@ -1225,22 +1221,22 @@ mod tests {
fn
test_response_with_tool_calls
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_
protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
None
,
refusal
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
"call_abc"
.into
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
"get_weather"
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
},
}]),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
...
...
@@ -1335,8 +1331,8 @@ thinking
#[test]
fn
test_reasoning_effort_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
mut
req
=
make_response_with_input
(
"think hard"
);
req
.inner.reasoning
=
Some
(
Reasoning
{
...
...
@@ -1357,8 +1353,8 @@ thinking
#[test]
fn
test_text_format_json_object_mapped
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1374,10 +1370,10 @@ thinking
#[test]
fn
test_text_format_json_schema_mapped
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
use
dynamo_
async_openai
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
use
dynamo_
protocols
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
let
schema
=
ResponseFormatJsonSchema
{
name
:
"city"
.into
(),
...
...
@@ -1402,7 +1398,7 @@ thinking
#[test]
fn
test_text_format_plain_text_leaves_response_format_none
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1418,8 +1414,8 @@ thinking
#[test]
fn
test_service_tier_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
use
dynamo_
protocols
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
let
mut
req
=
make_response_with_input
(
"priority"
);
req
.inner.service_tier
=
Some
(
RespServiceTier
::
Priority
);
...
...
@@ -1430,8 +1426,8 @@ thinking
#[test]
fn
test_response_echoes_reasoning
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
params
=
ResponseParams
{
reasoning
:
Some
(
Reasoning
{
...
...
@@ -1442,7 +1438,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1462,7 +1458,7 @@ thinking
#[test]
fn
test_response_echoes_text_format
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1475,7 +1471,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1495,7 +1491,7 @@ thinking
#[test]
fn
test_response_echoes_service_tier
()
{
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
;
let
params
=
ResponseParams
{
service_tier
:
Some
(
ServiceTier
::
Flex
),
...
...
@@ -1503,7 +1499,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1522,7 +1518,7 @@ thinking
#[test]
fn
test_output_message_deserializes_without_id_and_status
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
...
...
@@ -1544,7 +1540,7 @@ thinking
#[test]
fn
test_output_message_with_id_and_status_still_works
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
...
...
@@ -1567,17 +1563,17 @@ thinking
// ── PR2: include filtering + truncation echo-back tests ──
fn
make_chat_resp_with_text
(
text
:
&
str
)
->
NvCreateChatCompletionResponse
{
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoice
,
ChatCompletionMessageContent
,
ChatCompletionResponseMessage
,
FinishReason
,
};
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[
ChatChoice
{
index
:
0
,
#[allow(deprecated)]
message
:
ChatCompletionResponseMessage
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
...
...
@@ -1622,7 +1618,7 @@ thinking
#[test]
fn
test_include_logprobs_kept_when_requested
()
{
use
dynamo_
async_openai
::
types
::
responses
::
IncludeEnum
;
use
dynamo_
protocols
::
types
::
responses
::
IncludeEnum
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
...
...
@@ -1650,7 +1646,7 @@ thinking
#[test]
fn
test_truncation_auto_echoed_back
()
{
use
dynamo_
async_openai
::
types
::
responses
::
Truncation
;
use
dynamo_
protocols
::
types
::
responses
::
Truncation
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
...
...
lib/llm/src/protocols/openai/responses/stream_converter.rs
View file @
b6a3b0c6
...
...
@@ -12,7 +12,7 @@
use
std
::
time
::{
SystemTime
,
UNIX_EPOCH
};
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionToolCall
,
InputTokenDetails
,
Instructions
,
OutputContent
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
Response
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
...
...
@@ -24,7 +24,7 @@ use dynamo_async_openai::types::responses::{
};
use
uuid
::
Uuid
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
super
::
ResponseParams
;
use
crate
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionStreamResponse
;
...
...
@@ -673,7 +673,7 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
mod
tests
{
use
super
::
*
;
use
crate
::
protocols
::
unified
::
ResponsesContext
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
...
...
@@ -704,7 +704,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -742,7 +742,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment