Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b6a3b0c6
Unverified
Commit
b6a3b0c6
authored
Apr 01, 2026
by
ishandhanani
Committed by
GitHub
Apr 01, 2026
Browse files
refactor(2/3): rename dynamo-async-openai to dynamo-protocols (#7565)
parent
c84c0934
Changes
155
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
314 additions
and
328 deletions
+314
-328
lib/llm/src/preprocessor/speculative_prefill.rs
lib/llm/src/preprocessor/speculative_prefill.rs
+1
-1
lib/llm/src/protocols/anthropic/stream_converter.rs
lib/llm/src/protocols/anthropic/stream_converter.rs
+15
-19
lib/llm/src/protocols/anthropic/types.rs
lib/llm/src/protocols/anthropic/types.rs
+23
-25
lib/llm/src/protocols/common.rs
lib/llm/src/protocols/common.rs
+10
-10
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+2
-2
lib/llm/src/protocols/common/preprocessor.rs
lib/llm/src/protocols/common/preprocessor.rs
+1
-1
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+5
-5
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+6
-6
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+80
-82
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+25
-27
lib/llm/src/protocols/openai/chat_completions/jail.rs
lib/llm/src/protocols/openai/chat_completions/jail.rs
+9
-9
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+30
-30
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+26
-26
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+12
-12
lib/llm/src/protocols/openai/embeddings.rs
lib/llm/src/protocols/openai/embeddings.rs
+4
-4
lib/llm/src/protocols/openai/embeddings/aggregator.rs
lib/llm/src/protocols/openai/embeddings/aggregator.rs
+6
-6
lib/llm/src/protocols/openai/images.rs
lib/llm/src/protocols/openai/images.rs
+3
-3
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+1
-1
lib/llm/src/protocols/openai/responses/mod.rs
lib/llm/src/protocols/openai/responses/mod.rs
+50
-54
lib/llm/src/protocols/openai/responses/stream_converter.rs
lib/llm/src/protocols/openai/responses/stream_converter.rs
+5
-5
No files found.
lib/llm/src/preprocessor/speculative_prefill.rs
View file @
b6a3b0c6
...
...
@@ -12,7 +12,7 @@ use std::pin::Pin;
use
std
::
sync
::
Arc
;
use
anyhow
::
Result
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
};
...
...
lib/llm/src/protocols/anthropic/stream_converter.rs
View file @
b6a3b0c6
...
...
@@ -10,7 +10,7 @@
use
std
::
collections
::
HashSet
;
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
uuid
::
Uuid
;
use
super
::
types
::{
...
...
@@ -136,17 +136,15 @@ impl AnthropicStreamConverter {
// Track finish reason
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
AnthropicStopReason
::
MaxTokens
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
}
});
...
...
@@ -478,17 +476,15 @@ impl AnthropicStreamConverter {
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
AnthropicStopReason
::
MaxTokens
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
}
});
...
...
@@ -734,7 +730,7 @@ impl AnthropicStreamConverter {
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
...
...
@@ -742,7 +738,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -777,7 +773,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -932,7 +928,7 @@ mod tests {
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
lib/llm/src/protocols/anthropic/types.rs
View file @
b6a3b0c6
...
...
@@ -3,15 +3,15 @@
//! Anthropic Messages API conversion logic.
//!
//! Pure protocol types live in `dynamo_
async_openai
::types::anthropic`.
//! Pure protocol types live in `dynamo_
protocols
::types::anthropic`.
//! This module provides bidirectional conversion to/from the internal
//! chat completions format used by the Dynamo engine.
// Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
// continues to work throughout dynamo-llm.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::
*
;
pub
use
dynamo_
protocols
::
types
::
anthropic
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
...
@@ -96,10 +96,10 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
// Convert stop_sequences -> stop
let
stop
=
req
.stop_sequences
.map
(
dynamo_
async_openai
::
types
::
Stop
::
StringArray
);
.map
(
dynamo_
protocols
::
types
::
Stop
::
StringArray
);
Ok
(
NvCreateChatCompletionRequest
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
{
messages
,
model
:
req
.model
,
temperature
:
req
.temperature
,
...
...
@@ -109,7 +109,7 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
tools
,
tool_choice
,
stream
:
Some
(
true
),
// Always stream internally
stream_options
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
stream_options
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
}),
...
...
@@ -350,7 +350,7 @@ fn convert_assistant_blocks(
tool_calls
.push
(
ChatCompletionMessageToolCall
{
id
:
id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
name
.clone
(),
arguments
:
serde_json
::
to_string
(
input
)
.unwrap_or_default
(),
},
...
...
@@ -487,11 +487,11 @@ pub fn chat_completion_to_anthropic_response(
if
let
Some
(
choice
)
=
choice
{
// Map finish_reason
stop_reason
=
choice
.finish_reason
.map
(|
fr
|
match
fr
{
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
});
// Extract tool calls
...
...
@@ -523,8 +523,8 @@ pub fn chat_completion_to_anthropic_response(
// Extract text content
let
text
=
match
choice
.message.content
{
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
"Multimodal (Parts) content in chat completion response replaced with placeholder text in Anthropic conversion."
);
...
...
@@ -821,24 +821,22 @@ mod tests {
#[test]
fn
test_chat_completion_to_anthropic_response
()
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
"Hello!"
.to_string
(),
),
),
)),
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
}],
...
...
@@ -847,7 +845,7 @@ mod tests {
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
usage
:
Some
(
dynamo_
async_openai
::
types
::
CompletionUsage
{
usage
:
Some
(
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
10
,
completion_tokens
:
5
,
total_tokens
:
15
,
...
...
lib/llm/src/protocols/common.rs
View file @
b6a3b0c6
...
...
@@ -90,27 +90,27 @@ impl std::str::FromStr for FinishReason {
}
}
impl
From
<
FinishReason
>
for
dynamo_
async_openai
::
types
::
CompletionFinishReason
{
impl
From
<
FinishReason
>
for
dynamo_
protocols
::
types
::
CompletionFinishReason
{
fn
from
(
reason
:
FinishReason
)
->
Self
{
match
reason
{
FinishReason
::
EoS
|
FinishReason
::
Stop
|
FinishReason
::
Cancelled
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
}
FinishReason
::
ContentFilter
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
}
FinishReason
::
Length
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Error
(
_
)
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
,
FinishReason
::
Length
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Error
(
_
)
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
,
}
}
}
impl
From
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
for
FinishReason
{
fn
from
(
reason
:
dynamo_
async_openai
::
types
::
CompletionFinishReason
)
->
Self
{
impl
From
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
for
FinishReason
{
fn
from
(
reason
:
dynamo_
protocols
::
types
::
CompletionFinishReason
)
->
Self
{
match
reason
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
FinishReason
::
ContentFilter
}
}
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
b6a3b0c6
...
...
@@ -6,8 +6,8 @@ use serde::{Deserialize, Serialize};
pub
use
super
::
FinishReason
;
pub
use
super
::
preprocessor
::
PreprocessedRequest
;
use
crate
::
protocols
::
TokenIdType
;
use
dynamo_
async_openai
::
types
::
CompletionUsage
;
use
dynamo_
async_openai
::
types
::
StopReason
;
use
dynamo_
protocols
::
types
::
CompletionUsage
;
use
dynamo_
protocols
::
types
::
StopReason
;
use
dynamo_runtime
::
error
::
DynamoError
;
use
dynamo_runtime
::
protocols
::
maybe_error
::
MaybeError
;
...
...
lib/llm/src/protocols/common/preprocessor.rs
View file @
b6a3b0c6
...
...
@@ -86,7 +86,7 @@ pub struct PrefillResult {
pub
disaggregated_params
:
serde_json
::
Value
,
/// Prompt token details produced during prefill
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
prompt_tokens_details
:
Option
<
dynamo_
async_openai
::
types
::
PromptTokensDetails
>
,
pub
prompt_tokens_details
:
Option
<
dynamo_
protocols
::
types
::
PromptTokensDetails
>
,
}
/// Optional multimodal routing-only data.
...
...
lib/llm/src/protocols/openai.rs
View file @
b6a3b0c6
...
...
@@ -230,15 +230,15 @@ pub(crate) fn convert_backend_top_logprobs(
selected_token
:
&
str
,
selected_token_id
:
TokenIdType
,
selected_logprob
:
f32
,
)
->
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
{
)
->
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
{
let
mut
found_selected
=
false
;
let
mut
result
:
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
=
top_lps
let
mut
result
:
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
=
top_lps
.iter
()
.map
(|
top_lp
|
{
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
dynamo_
async_openai
::
types
::
TopLogprobs
{
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
tok
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
,
...
...
@@ -247,7 +247,7 @@ pub(crate) fn convert_backend_top_logprobs(
.collect
();
if
!
found_selected
{
result
.push
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
result
.push
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
selected_token
.to_string
(),
logprob
:
selected_logprob
,
bytes
:
token_to_utf8_bytes
(
selected_token
),
...
...
@@ -277,7 +277,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + 'static + std::fmt::Debug>:
fn
is_continuous_usage_enabled
(
&
self
)
->
bool
;
/// Get the current usage statistics with properly calculated total_tokens.
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
;
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
;
/// Returns the request tracker if available, for accessing worker timing metrics.
fn
tracker
(
&
self
)
->
Option
<
std
::
sync
::
Arc
<
common
::
timing
::
RequestTracker
>>
{
...
...
lib/llm/src/protocols/openai/chat_completions.rs
View file @
b6a3b0c6
...
...
@@ -35,7 +35,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateChatCompletionRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
,
#[serde(flatten,
default)]
pub
common
:
CommonExt
,
...
...
@@ -68,7 +68,7 @@ pub struct NvCreateChatCompletionRequest {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
...
...
@@ -78,7 +78,7 @@ pub struct NvCreateChatCompletionResponse {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionStreamResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
...
...
@@ -202,7 +202,7 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
if
let
Some
(
response_format
)
=
self
.inner.response_format
.as_ref
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
ResponseFormat
;
match
response_format
{
ResponseFormat
::
Text
=>
{}
ResponseFormat
::
JsonObject
=>
{
...
...
@@ -289,8 +289,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
/// * `None` if no stop conditions are defined.
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
self
.inner.stop
.as_ref
()
.map
(|
stop
|
match
stop
{
dynamo_
async_openai
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
async_openai
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
dynamo_
protocols
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
protocols
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
})
}
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
b6a3b0c6
This diff is collapsed.
Click to expand it.
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
b6a3b0c6
...
...
@@ -32,7 +32,7 @@ impl NvCreateChatCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
});
...
...
@@ -116,9 +116,9 @@ pub struct DeltaGenerator {
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
/// Optional service tier information for the response.
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
protocols
::
types
::
ServiceTierResponse
>
,
/// Tracks token usage for the completion request.
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
/// Counter tracking the number of messages issued.
msg_counter
:
u64
,
/// Configuration options for response generation.
...
...
@@ -147,7 +147,7 @@ impl DeltaGenerator {
// but this will not be an issue until 2106.
let
now
:
u32
=
now
.try_into
()
.expect
(
"timestamp exceeds u32::MAX"
);
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
0
,
completion_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -194,7 +194,7 @@ impl DeltaGenerator {
token_ids
:
&
[
TokenIdType
],
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -216,7 +216,7 @@ impl DeltaGenerator {
.zip
(
top_logprobs
)
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
dynamo_
async_openai
::
types
::
ChatCompletionTokenLogprob
{
dynamo_
protocols
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
t
),
...
...
@@ -226,7 +226,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
{
Some
(
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
{
content
,
refusal
:
None
,
})
...
...
@@ -242,22 +242,22 @@ impl DeltaGenerator {
/// * `stop_reason` - Optional stop string or token that triggered the stop.
///
/// # Returns
/// * An [`dynamo_
async_openai
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
/// * An [`dynamo_
protocols
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
#[allow(deprecated)]
pub
fn
create_choice
(
&
mut
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
)
->
NvCreateChatCompletionStreamResponse
{
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
.map
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
),
let
delta
=
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
.map
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
),
function_call
:
None
,
tool_calls
:
None
,
role
:
if
self
.msg_counter
==
0
{
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
)
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
)
}
else
{
None
},
...
...
@@ -265,7 +265,7 @@ impl DeltaGenerator {
reasoning_content
:
None
,
};
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
,
delta
,
finish_reason
,
...
...
@@ -279,7 +279,7 @@ impl DeltaGenerator {
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -306,7 +306,7 @@ impl DeltaGenerator {
let
usage
=
self
.get_usage
();
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -330,7 +330,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
...
...
@@ -387,18 +387,16 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
// Map backend finish reasons to OpenAI's finish reasons.
let
finish_reason
=
match
delta
.finish_reason
{
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
{
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Length
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
)
}
Some
(
common
::
FinishReason
::
Cancelled
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
ContentFilter
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
)
}
Some
(
common
::
FinishReason
::
Error
(
err_msg
))
=>
{
return
Err
(
anyhow
::
anyhow!
(
err_msg
));
...
...
@@ -490,7 +488,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
}
...
...
@@ -502,7 +500,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessage
,
ChatCompletionRequestUserMessageContent
,
CreateChatCompletionRequest
,
};
...
...
lib/llm/src/protocols/openai/chat_completions/jail.rs
View file @
b6a3b0c6
...
...
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
use
async_stream
::
stream
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceLogprobs
,
ChatChoiceStream
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
FinishReason
,
FunctionCallStream
,
Role
,
};
...
...
@@ -116,7 +116,7 @@ fn create_choice_stream(
content
:
&
str
,
tool_calls
:
Option
<
Vec
<
ChatCompletionMessageToolCallChunk
>>
,
finish_reason
:
Option
<
FinishReason
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
logprobs
:
Option
<
ChatChoiceLogprobs
>
,
)
->
ChatChoiceStream
{
#[allow(deprecated)]
...
...
@@ -124,9 +124,9 @@ fn create_choice_stream(
index
,
delta
:
ChatCompletionStreamResponseDelta
{
role
,
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
.to_string
()
)
,
),
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
content
.to_string
(),
)
),
tool_calls
,
function_call
:
None
,
refusal
:
None
,
...
...
@@ -543,8 +543,8 @@ impl JailedStream {
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
// Jailing only applies to text content
let
text_content
=
match
content
{
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
};
if
let
Some
(
text
)
=
text_content
{
...
...
@@ -676,7 +676,7 @@ impl JailedStream {
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
// Create a finalization response carrying forward real stream metadata
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
last_stream_id
,
object
:
"chat.completion.chunk"
.to_string
(),
created
:
last_stream_created
,
...
...
@@ -932,7 +932,7 @@ impl JailedStream {
ChatCompletionMessageToolCallChunk
{
index
,
id
:
Some
(
format!
(
"call-{}"
,
Uuid
::
new_v4
())),
r
#
type
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolType
::
Function
),
r
#
type
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionToolType
::
Function
),
function
:
Some
(
FunctionCallStream
{
name
:
Some
(
name
),
arguments
:
Some
(
arguments
),
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
b6a3b0c6
...
...
@@ -27,7 +27,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionRequest
,
#[serde(flatten)]
pub
common
:
CommonExt
,
...
...
@@ -47,27 +47,27 @@ pub struct NvCreateCompletionRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
impl
ContentProvider
for
dynamo_
async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_
protocols
::
types
::
Choice
{
fn
content
(
&
self
)
->
String
{
self
.text
.clone
()
}
}
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
String
{
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
String
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
&
num
|
num
.to_string
())
.collect
::
<
Vec
<
_
>>
()
.join
(
" "
),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
inner
|
{
inner
...
...
@@ -82,12 +82,12 @@ pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
}
/// Get the batch size from a prompt (1 for single prompts, array length for batch prompts)
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
usize
{
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
usize
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
}
}
...
...
@@ -95,21 +95,21 @@ pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usi
/// For single prompts, returns a clone regardless of index.
/// For batch prompts, returns the prompt at the specified index.
pub
fn
extract_single_prompt
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
,
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
,
index
:
usize
,
)
->
dynamo_
async_openai
::
types
::
Prompt
{
)
->
dynamo_
protocols
::
types
::
Prompt
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
}
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
}
}
}
...
...
@@ -241,7 +241,7 @@ impl OpenAIStopConditionsProvider for NvCreateCompletionRequest {
}
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
use
dynamo_
async_openai
::
types
::
Stop
;
use
dynamo_
protocols
::
types
::
Stop
;
self
.inner.stop
.as_ref
()
.map
(|
s
|
match
s
{
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
...
...
@@ -287,10 +287,10 @@ impl ResponseFactory {
pub
fn
make_response
(
&
self
,
choice
:
dynamo_
async_openai
::
types
::
Choice
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
choice
:
dynamo_
protocols
::
types
::
Choice
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
)
->
NvCreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -361,7 +361,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
}
}
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
protocols
::
types
::
Choice
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
common
::
StreamingCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
...
...
@@ -382,10 +382,10 @@ impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types
// TODO handle aggregating logprobs
let
logprobs
=
None
;
let
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
=
let
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
=
response
.delta.finish_reason
.map
(
Into
::
into
);
let
choice
=
dynamo_
async_openai
::
types
::
Choice
{
let
choice
=
dynamo_
protocols
::
types
::
Choice
{
text
,
index
,
logprobs
,
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
b6a3b0c6
...
...
@@ -20,7 +20,7 @@ pub struct DeltaAggregator {
id
:
String
,
model
:
String
,
created
:
u32
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
system_fingerprint
:
Option
<
String
>
,
choices
:
HashMap
<
u32
,
DeltaChoice
>
,
error
:
Option
<
String
>
,
...
...
@@ -31,7 +31,7 @@ struct DeltaChoice {
index
:
u32
,
text
:
String
,
finish_reason
:
Option
<
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
}
impl
Default
for
DeltaAggregator
{
...
...
@@ -109,14 +109,14 @@ impl DeltaAggregator {
// Handle CompletionFinishReason -> FinishReason conversation
state_choice
.finish_reason
=
match
choice
.finish_reason
{
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
FinishReason
::
Stop
)
}
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
FinishReason
::
Length
)
}
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
,
)
=>
Some
(
FinishReason
::
ContentFilter
),
None
=>
None
,
};
...
...
@@ -124,7 +124,7 @@ impl DeltaAggregator {
// Update logprobs
if
let
Some
(
logprobs
)
=
&
choice
.logprobs
{
let
state_lps
=
state_choice
.logprobs
.get_or_insert
(
dynamo_
async_openai
::
types
::
Logprobs
{
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
Vec
::
new
(),
token_logprobs
:
Vec
::
new
(),
top_logprobs
:
Vec
::
new
(),
...
...
@@ -155,12 +155,12 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.into_values
()
.map
(
dynamo_
async_openai
::
types
::
Choice
::
from
)
.map
(
dynamo_
protocols
::
types
::
Choice
::
from
)
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
aggregator
.id
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
...
...
@@ -179,11 +179,11 @@ impl DeltaAggregator {
}
}
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
From
<
DeltaChoice
>
for
dynamo_
protocols
::
types
::
Choice
{
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
let
finish_reason
=
delta
.finish_reason
.map
(
Into
::
into
);
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
delta
.index
,
text
:
delta
.text
,
finish_reason
,
...
...
@@ -231,11 +231,11 @@ mod tests {
.and_then
(|
s
|
FinishReason
::
from_str
(
s
)
.ok
())
.map
(
Into
::
into
);
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
async_openai
::
types
::
Logprobs
{
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
vec!
[
text
.to_string
()],
token_logprobs
:
vec!
[
Some
(
lp
)],
top_logprobs
:
vec!
[
serde_json
::
to_value
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
serde_json
::
to_value
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
text
.to_string
(),
logprob
:
lp
,
bytes
:
None
,
...
...
@@ -245,13 +245,13 @@ mod tests {
text_offset
:
vec!
[
0
],
});
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
index
,
text
:
text
.to_string
(),
finish_reason
,
...
...
@@ -319,11 +319,11 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello,"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
assert
!
(
choice
.logprobs
.is_none
());
}
...
...
@@ -355,7 +355,7 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello, world!"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice
.logprobs
.as_ref
()
.unwrap
()
.tokens
.len
(),
2
);
assert_eq!
(
...
...
@@ -367,23 +367,23 @@ mod tests {
#[tokio::test]
async
fn
test_multiple_choices
()
{
// Create a delta with multiple choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
0
,
text
:
"Choice 0"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
1
,
text
:
"Choice 1"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
],
...
...
@@ -418,11 +418,11 @@ mod tests {
assert_eq!
(
choice0
.text
,
"Choice 0"
.to_string
());
assert_eq!
(
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
let
choice1
=
&
response
.inner.choices
[
1
];
...
...
@@ -430,11 +430,11 @@ mod tests {
assert_eq!
(
choice1
.text
,
"Choice 1"
.to_string
());
assert_eq!
(
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
}
}
lib/llm/src/protocols/openai/completions/delta.rs
View file @
b6a3b0c6
...
...
@@ -31,7 +31,7 @@ impl NvCreateCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
continuous_usage_stats
:
false
,
});
...
...
@@ -95,7 +95,7 @@ pub struct DeltaGenerator {
created
:
u32
,
model
:
String
,
system_fingerprint
:
Option
<
String
>
,
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
options
:
DeltaGeneratorOptions
,
tracker
:
Option
<
Arc
<
RequestTracker
>>
,
}
...
...
@@ -113,7 +113,7 @@ impl DeltaGenerator {
// Previously, our home-rolled CompletionUsage impl'd Default
// PR !387 - https://github.com/64bit/async-openai/pull/387
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
completion_tokens
:
0
,
prompt_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -154,7 +154,7 @@ impl DeltaGenerator {
token_ids
:
Vec
<
TokenIdType
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -181,7 +181,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
dynamo_
async_openai
::
types
::
Logprobs
{
Some
(
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
toks
.iter
()
.map
(|(
t
,
_
)|
t
.clone
())
.collect
(),
token_logprobs
:
tok_lps
.into_iter
()
.map
(
Some
)
.collect
(),
text_offset
:
vec!
[],
...
...
@@ -193,21 +193,21 @@ impl DeltaGenerator {
&
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
)
->
NvCreateCompletionResponse
{
// todo - update for tool calling
// According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
model
:
self
.model
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
text
:
text
.unwrap_or_default
(),
index
,
finish_reason
,
...
...
@@ -231,7 +231,7 @@ impl DeltaGenerator {
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateCompletionResponse
{
let
usage
=
self
.get_usage
();
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -254,7 +254,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
...
...
@@ -377,7 +377,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
}
...
...
lib/llm/src/protocols/openai/embeddings.rs
View file @
b6a3b0c6
...
...
@@ -15,7 +15,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -30,17 +30,17 @@ pub struct NvCreateEmbeddingRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
,
}
impl
NvCreateEmbeddingResponse
{
pub
fn
empty
()
->
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"embedding"
.to_string
(),
data
:
vec!
[],
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
:
0
,
total_tokens
:
0
,
},
...
...
lib/llm/src/protocols/openai/embeddings/aggregator.rs
View file @
b6a3b0c6
...
...
@@ -133,16 +133,16 @@ mod tests {
use
futures
::
stream
;
fn
create_test_embedding_response
(
embeddings
:
Vec
<
dynamo_
async_openai
::
types
::
Embedding
>
,
embeddings
:
Vec
<
dynamo_
protocols
::
types
::
Embedding
>
,
prompt_tokens
:
u32
,
total_tokens
:
u32
,
)
->
Annotated
<
NvCreateEmbeddingResponse
>
{
let
response
=
NvCreateEmbeddingResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"test-model"
.to_string
(),
data
:
embeddings
,
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
,
total_tokens
,
},
...
...
@@ -166,7 +166,7 @@ mod tests {
#[tokio::test]
async
fn
test_single_embedding
()
{
let
embedding
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
...
...
@@ -188,13 +188,13 @@ mod tests {
#[tokio::test]
async
fn
test_multiple_embeddings
()
{
let
embedding1
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding1
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
};
let
embedding2
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding2
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
1
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.4
,
0.5
,
0.6
],
...
...
lib/llm/src/protocols/openai/images.rs
View file @
b6a3b0c6
...
...
@@ -14,7 +14,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateImageRequest
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateImageRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateImageRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -28,13 +28,13 @@ pub struct NvCreateImageRequest {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvImagesResponse
{
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
,
}
impl
NvImagesResponse
{
pub
fn
empty
()
->
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
{
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
{
created
:
0
,
data
:
vec!
[],
},
...
...
lib/llm/src/protocols/openai/nvext.rs
View file @
b6a3b0c6
...
...
@@ -216,7 +216,7 @@ pub struct AgentHints {
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
pub
use
dynamo_
protocols
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
impl
Default
for
NvExt
{
fn
default
()
->
Self
{
...
...
lib/llm/src/protocols/openai/responses/mod.rs
View file @
b6a3b0c6
...
...
@@ -3,7 +3,7 @@
pub
mod
stream_converter
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionCallOutput
,
FunctionToolCall
,
IncludeEnum
,
InputContent
,
InputItem
,
InputParam
,
InputRole
,
InputTokenDetails
,
Instructions
,
Item
,
MessageItem
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
...
...
@@ -11,7 +11,7 @@ use dynamo_async_openai::types::responses::{
ServiceTier
,
Status
,
Summary
,
SummaryPart
,
TextResponseFormatConfiguration
,
Tool
,
ToolChoiceOptions
,
ToolChoiceParam
,
Truncation
,
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
...
@@ -38,7 +38,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
pub
struct
NvCreateResponse
{
/// Flattened CreateResponse fields (model, input, temperature, etc.)
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
CreateResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
CreateResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -48,7 +48,7 @@ pub struct NvCreateResponse {
pub
struct
NvResponse
{
/// Flattened Response fields.
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
Response
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
Response
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -144,12 +144,12 @@ impl OpenAIStopConditionsProvider for NvCreateResponse {
/// Convert a Responses API ImageDetail to the Chat Completions ImageDetail.
fn
convert_image_detail
(
detail
:
&
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
,
detail
:
&
dynamo_
protocols
::
types
::
responses
::
ImageDetail
,
)
->
ChatImageDetail
{
match
detail
{
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
}
}
...
...
@@ -316,7 +316,7 @@ fn convert_input_items_to_messages(
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
fc
.call_id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
fc
.name
.clone
(),
arguments
:
fc
.arguments
.clone
(),
},
...
...
@@ -349,10 +349,10 @@ fn convert_input_items_to_messages(
InputItem
::
EasyMessage
(
easy
)
=>
{
// Handle easy input messages based on role
let
content_text
=
match
&
easy
.content
{
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
text
.clone
()
}
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
convert_input_content_to_text
(
parts
)
}
};
...
...
@@ -737,10 +737,8 @@ pub fn chat_completion_to_response(
// Handle text content -- also parse <tool_call> blocks from models
// that emit tool calls as text (e.g. Qwen3)
let
content_text
=
match
choice
.message.content
{
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
{
Some
(
text
)
}
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
Some
(
text
),
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
"Multimodal content in responses API not yet supported, using placeholder"
);
...
...
@@ -880,12 +878,12 @@ pub fn chat_completion_to_response(
#[cfg(test)]
mod
tests
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
CreateResponse
,
FunctionCallOutput
,
FunctionCallOutputItemParam
,
FunctionTool
,
FunctionToolCall
,
ImageDetail
,
InputContent
,
InputImageContent
,
InputItem
,
InputMessage
,
InputParam
,
InputRole
,
InputTextContent
,
Item
,
MessageItem
,
Tool
,
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessageContent
,
};
...
...
@@ -1167,19 +1165,17 @@ mod tests {
fn
test_into_nvresponse_from_chat_response
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
),
),
)),
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
...
...
@@ -1225,22 +1221,22 @@ mod tests {
fn
test_response_with_tool_calls
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_
protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
None
,
refusal
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
"call_abc"
.into
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
"get_weather"
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
},
}]),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
...
...
@@ -1335,8 +1331,8 @@ thinking
#[test]
fn
test_reasoning_effort_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
mut
req
=
make_response_with_input
(
"think hard"
);
req
.inner.reasoning
=
Some
(
Reasoning
{
...
...
@@ -1357,8 +1353,8 @@ thinking
#[test]
fn
test_text_format_json_object_mapped
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1374,10 +1370,10 @@ thinking
#[test]
fn
test_text_format_json_schema_mapped
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
use
dynamo_
async_openai
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
use
dynamo_
protocols
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
let
schema
=
ResponseFormatJsonSchema
{
name
:
"city"
.into
(),
...
...
@@ -1402,7 +1398,7 @@ thinking
#[test]
fn
test_text_format_plain_text_leaves_response_format_none
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1418,8 +1414,8 @@ thinking
#[test]
fn
test_service_tier_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
use
dynamo_
protocols
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
let
mut
req
=
make_response_with_input
(
"priority"
);
req
.inner.service_tier
=
Some
(
RespServiceTier
::
Priority
);
...
...
@@ -1430,8 +1426,8 @@ thinking
#[test]
fn
test_response_echoes_reasoning
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
params
=
ResponseParams
{
reasoning
:
Some
(
Reasoning
{
...
...
@@ -1442,7 +1438,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1462,7 +1458,7 @@ thinking
#[test]
fn
test_response_echoes_text_format
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
...
...
@@ -1475,7 +1471,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1495,7 +1491,7 @@ thinking
#[test]
fn
test_response_echoes_service_tier
()
{
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
;
let
params
=
ResponseParams
{
service_tier
:
Some
(
ServiceTier
::
Flex
),
...
...
@@ -1503,7 +1499,7 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
...
...
@@ -1522,7 +1518,7 @@ thinking
#[test]
fn
test_output_message_deserializes_without_id_and_status
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
...
...
@@ -1544,7 +1540,7 @@ thinking
#[test]
fn
test_output_message_with_id_and_status_still_works
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
...
...
@@ -1567,17 +1563,17 @@ thinking
// ── PR2: include filtering + truncation echo-back tests ──
fn
make_chat_resp_with_text
(
text
:
&
str
)
->
NvCreateChatCompletionResponse
{
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoice
,
ChatCompletionMessageContent
,
ChatCompletionResponseMessage
,
FinishReason
,
};
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[
ChatChoice
{
index
:
0
,
#[allow(deprecated)]
message
:
ChatCompletionResponseMessage
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
...
...
@@ -1622,7 +1618,7 @@ thinking
#[test]
fn
test_include_logprobs_kept_when_requested
()
{
use
dynamo_
async_openai
::
types
::
responses
::
IncludeEnum
;
use
dynamo_
protocols
::
types
::
responses
::
IncludeEnum
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
...
...
@@ -1650,7 +1646,7 @@ thinking
#[test]
fn
test_truncation_auto_echoed_back
()
{
use
dynamo_
async_openai
::
types
::
responses
::
Truncation
;
use
dynamo_
protocols
::
types
::
responses
::
Truncation
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
...
...
lib/llm/src/protocols/openai/responses/stream_converter.rs
View file @
b6a3b0c6
...
...
@@ -12,7 +12,7 @@
use
std
::
time
::{
SystemTime
,
UNIX_EPOCH
};
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionToolCall
,
InputTokenDetails
,
Instructions
,
OutputContent
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
Response
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
...
...
@@ -24,7 +24,7 @@ use dynamo_async_openai::types::responses::{
};
use
uuid
::
Uuid
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
super
::
ResponseParams
;
use
crate
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionStreamResponse
;
...
...
@@ -673,7 +673,7 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
mod
tests
{
use
super
::
*
;
use
crate
::
protocols
::
unified
::
ResponsesContext
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
...
...
@@ -704,7 +704,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
@@ -742,7 +742,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment