Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b6a3b0c6
Unverified
Commit
b6a3b0c6
authored
Apr 01, 2026
by
ishandhanani
Committed by
GitHub
Apr 01, 2026
Browse files
refactor(2/3): rename dynamo-async-openai to dynamo-protocols (#7565)
parent
c84c0934
Changes
155
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
314 additions
and
328 deletions
+314
-328
lib/llm/src/preprocessor/speculative_prefill.rs
lib/llm/src/preprocessor/speculative_prefill.rs
+1
-1
lib/llm/src/protocols/anthropic/stream_converter.rs
lib/llm/src/protocols/anthropic/stream_converter.rs
+15
-19
lib/llm/src/protocols/anthropic/types.rs
lib/llm/src/protocols/anthropic/types.rs
+23
-25
lib/llm/src/protocols/common.rs
lib/llm/src/protocols/common.rs
+10
-10
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+2
-2
lib/llm/src/protocols/common/preprocessor.rs
lib/llm/src/protocols/common/preprocessor.rs
+1
-1
lib/llm/src/protocols/openai.rs
lib/llm/src/protocols/openai.rs
+5
-5
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+6
-6
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+80
-82
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+25
-27
lib/llm/src/protocols/openai/chat_completions/jail.rs
lib/llm/src/protocols/openai/chat_completions/jail.rs
+9
-9
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+30
-30
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+26
-26
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+12
-12
lib/llm/src/protocols/openai/embeddings.rs
lib/llm/src/protocols/openai/embeddings.rs
+4
-4
lib/llm/src/protocols/openai/embeddings/aggregator.rs
lib/llm/src/protocols/openai/embeddings/aggregator.rs
+6
-6
lib/llm/src/protocols/openai/images.rs
lib/llm/src/protocols/openai/images.rs
+3
-3
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+1
-1
lib/llm/src/protocols/openai/responses/mod.rs
lib/llm/src/protocols/openai/responses/mod.rs
+50
-54
lib/llm/src/protocols/openai/responses/stream_converter.rs
lib/llm/src/protocols/openai/responses/stream_converter.rs
+5
-5
No files found.
lib/llm/src/preprocessor/speculative_prefill.rs
View file @
b6a3b0c6
...
@@ -12,7 +12,7 @@ use std::pin::Pin;
...
@@ -12,7 +12,7 @@ use std::pin::Pin;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
anyhow
::
Result
;
use
anyhow
::
Result
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
};
};
...
...
lib/llm/src/protocols/anthropic/stream_converter.rs
View file @
b6a3b0c6
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
use
std
::
collections
::
HashSet
;
use
std
::
collections
::
HashSet
;
use
axum
::
response
::
sse
::
Event
;
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
super
::
types
::{
use
super
::
types
::{
...
@@ -136,17 +136,15 @@ impl AnthropicStreamConverter {
...
@@ -136,17 +136,15 @@ impl AnthropicStreamConverter {
// Track finish reason
// Track finish reason
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
AnthropicStopReason
::
MaxTokens
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
AnthropicStopReason
::
ToolUse
}
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
AnthropicStopReason
::
EndTurn
}
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
AnthropicStopReason
::
ToolUse
}
}
});
});
...
@@ -478,17 +476,15 @@ impl AnthropicStreamConverter {
...
@@ -478,17 +476,15 @@ impl AnthropicStreamConverter {
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
self
.stop_reason
=
Some
(
match
fr
{
self
.stop_reason
=
Some
(
match
fr
{
dynamo_async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_async_openai
::
types
::
FinishReason
::
Length
=>
{
dynamo_protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
AnthropicStopReason
::
MaxTokens
dynamo_protocols
::
types
::
FinishReason
::
ToolCalls
=>
{
}
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
=>
{
AnthropicStopReason
::
ToolUse
AnthropicStopReason
::
ToolUse
}
}
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
{
AnthropicStopReason
::
EndTurn
AnthropicStopReason
::
EndTurn
}
}
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
{
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
{
AnthropicStopReason
::
ToolUse
AnthropicStopReason
::
ToolUse
}
}
});
});
...
@@ -734,7 +730,7 @@ impl AnthropicStreamConverter {
...
@@ -734,7 +730,7 @@ impl AnthropicStreamConverter {
#[cfg(test)]
#[cfg(test)]
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
};
...
@@ -742,7 +738,7 @@ mod tests {
...
@@ -742,7 +738,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
index
:
0
,
...
@@ -777,7 +773,7 @@ mod tests {
...
@@ -777,7 +773,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
index
:
0
,
...
@@ -932,7 +928,7 @@ mod tests {
...
@@ -932,7 +928,7 @@ mod tests {
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
index
:
0
,
...
...
lib/llm/src/protocols/anthropic/types.rs
View file @
b6a3b0c6
...
@@ -3,15 +3,15 @@
...
@@ -3,15 +3,15 @@
//! Anthropic Messages API conversion logic.
//! Anthropic Messages API conversion logic.
//!
//!
//! Pure protocol types live in `dynamo_
async_openai
::types::anthropic`.
//! Pure protocol types live in `dynamo_
protocols
::types::anthropic`.
//! This module provides bidirectional conversion to/from the internal
//! This module provides bidirectional conversion to/from the internal
//! chat completions format used by the Dynamo engine.
//! chat completions format used by the Dynamo engine.
// Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
// Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
// continues to work throughout dynamo-llm.
// continues to work throughout dynamo-llm.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::
*
;
pub
use
dynamo_
protocols
::
types
::
anthropic
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
@@ -96,10 +96,10 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
...
@@ -96,10 +96,10 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
// Convert stop_sequences -> stop
// Convert stop_sequences -> stop
let
stop
=
req
let
stop
=
req
.stop_sequences
.stop_sequences
.map
(
dynamo_
async_openai
::
types
::
Stop
::
StringArray
);
.map
(
dynamo_
protocols
::
types
::
Stop
::
StringArray
);
Ok
(
NvCreateChatCompletionRequest
{
Ok
(
NvCreateChatCompletionRequest
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
{
messages
,
messages
,
model
:
req
.model
,
model
:
req
.model
,
temperature
:
req
.temperature
,
temperature
:
req
.temperature
,
...
@@ -109,7 +109,7 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
...
@@ -109,7 +109,7 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
tools
,
tools
,
tool_choice
,
tool_choice
,
stream
:
Some
(
true
),
// Always stream internally
stream
:
Some
(
true
),
// Always stream internally
stream_options
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
stream_options
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
include_usage
:
true
,
continuous_usage_stats
:
false
,
continuous_usage_stats
:
false
,
}),
}),
...
@@ -350,7 +350,7 @@ fn convert_assistant_blocks(
...
@@ -350,7 +350,7 @@ fn convert_assistant_blocks(
tool_calls
.push
(
ChatCompletionMessageToolCall
{
tool_calls
.push
(
ChatCompletionMessageToolCall
{
id
:
id
.clone
(),
id
:
id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
name
.clone
(),
name
:
name
.clone
(),
arguments
:
serde_json
::
to_string
(
input
)
.unwrap_or_default
(),
arguments
:
serde_json
::
to_string
(
input
)
.unwrap_or_default
(),
},
},
...
@@ -487,11 +487,11 @@ pub fn chat_completion_to_anthropic_response(
...
@@ -487,11 +487,11 @@ pub fn chat_completion_to_anthropic_response(
if
let
Some
(
choice
)
=
choice
{
if
let
Some
(
choice
)
=
choice
{
// Map finish_reason
// Map finish_reason
stop_reason
=
choice
.finish_reason
.map
(|
fr
|
match
fr
{
stop_reason
=
choice
.finish_reason
.map
(|
fr
|
match
fr
{
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
Stop
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
protocols
::
types
::
FinishReason
::
Length
=>
AnthropicStopReason
::
MaxTokens
,
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
ToolCalls
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
=>
AnthropicStopReason
::
EndTurn
,
dynamo_
async_openai
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
dynamo_
protocols
::
types
::
FinishReason
::
FunctionCall
=>
AnthropicStopReason
::
ToolUse
,
});
});
// Extract tool calls
// Extract tool calls
...
@@ -523,8 +523,8 @@ pub fn chat_completion_to_anthropic_response(
...
@@ -523,8 +523,8 @@ pub fn chat_completion_to_anthropic_response(
// Extract text content
// Extract text content
let
text
=
match
choice
.message.content
{
let
text
=
match
choice
.message.content
{
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
t
))
=>
Some
(
t
),
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
tracing
::
warn!
(
"Multimodal (Parts) content in chat completion response replaced with placeholder text in Anthropic conversion."
"Multimodal (Parts) content in chat completion response replaced with placeholder text in Anthropic conversion."
);
);
...
@@ -821,24 +821,22 @@ mod tests {
...
@@ -821,24 +821,22 @@ mod tests {
#[test]
#[test]
fn
test_chat_completion_to_anthropic_response
()
{
fn
test_chat_completion_to_anthropic_response
()
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"Hello!"
.to_string
(),
"Hello!"
.to_string
(),
),
)),
),
refusal
:
None
,
refusal
:
None
,
tool_calls
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
function_call
:
None
,
audio
:
None
,
audio
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
},
},
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
logprobs
:
None
,
}],
}],
...
@@ -847,7 +845,7 @@ mod tests {
...
@@ -847,7 +845,7 @@ mod tests {
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
object
:
"chat.completion"
.to_string
(),
usage
:
Some
(
dynamo_
async_openai
::
types
::
CompletionUsage
{
usage
:
Some
(
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
10
,
prompt_tokens
:
10
,
completion_tokens
:
5
,
completion_tokens
:
5
,
total_tokens
:
15
,
total_tokens
:
15
,
...
...
lib/llm/src/protocols/common.rs
View file @
b6a3b0c6
...
@@ -90,27 +90,27 @@ impl std::str::FromStr for FinishReason {
...
@@ -90,27 +90,27 @@ impl std::str::FromStr for FinishReason {
}
}
}
}
impl
From
<
FinishReason
>
for
dynamo_
async_openai
::
types
::
CompletionFinishReason
{
impl
From
<
FinishReason
>
for
dynamo_
protocols
::
types
::
CompletionFinishReason
{
fn
from
(
reason
:
FinishReason
)
->
Self
{
fn
from
(
reason
:
FinishReason
)
->
Self
{
match
reason
{
match
reason
{
FinishReason
::
EoS
|
FinishReason
::
Stop
|
FinishReason
::
Cancelled
=>
{
FinishReason
::
EoS
|
FinishReason
::
Stop
|
FinishReason
::
Cancelled
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
}
}
FinishReason
::
ContentFilter
=>
{
FinishReason
::
ContentFilter
=>
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
}
}
FinishReason
::
Length
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Length
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
,
FinishReason
::
Error
(
_
)
=>
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
,
FinishReason
::
Error
(
_
)
=>
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
,
}
}
}
}
}
}
impl
From
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
for
FinishReason
{
impl
From
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
for
FinishReason
{
fn
from
(
reason
:
dynamo_
async_openai
::
types
::
CompletionFinishReason
)
->
Self
{
fn
from
(
reason
:
dynamo_
protocols
::
types
::
CompletionFinishReason
)
->
Self
{
match
reason
{
match
reason
{
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
=>
FinishReason
::
Stop
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
=>
FinishReason
::
Length
,
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
=>
{
FinishReason
::
ContentFilter
FinishReason
::
ContentFilter
}
}
}
}
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
b6a3b0c6
...
@@ -6,8 +6,8 @@ use serde::{Deserialize, Serialize};
...
@@ -6,8 +6,8 @@ use serde::{Deserialize, Serialize};
pub
use
super
::
FinishReason
;
pub
use
super
::
FinishReason
;
pub
use
super
::
preprocessor
::
PreprocessedRequest
;
pub
use
super
::
preprocessor
::
PreprocessedRequest
;
use
crate
::
protocols
::
TokenIdType
;
use
crate
::
protocols
::
TokenIdType
;
use
dynamo_
async_openai
::
types
::
CompletionUsage
;
use
dynamo_
protocols
::
types
::
CompletionUsage
;
use
dynamo_
async_openai
::
types
::
StopReason
;
use
dynamo_
protocols
::
types
::
StopReason
;
use
dynamo_runtime
::
error
::
DynamoError
;
use
dynamo_runtime
::
error
::
DynamoError
;
use
dynamo_runtime
::
protocols
::
maybe_error
::
MaybeError
;
use
dynamo_runtime
::
protocols
::
maybe_error
::
MaybeError
;
...
...
lib/llm/src/protocols/common/preprocessor.rs
View file @
b6a3b0c6
...
@@ -86,7 +86,7 @@ pub struct PrefillResult {
...
@@ -86,7 +86,7 @@ pub struct PrefillResult {
pub
disaggregated_params
:
serde_json
::
Value
,
pub
disaggregated_params
:
serde_json
::
Value
,
/// Prompt token details produced during prefill
/// Prompt token details produced during prefill
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
prompt_tokens_details
:
Option
<
dynamo_
async_openai
::
types
::
PromptTokensDetails
>
,
pub
prompt_tokens_details
:
Option
<
dynamo_
protocols
::
types
::
PromptTokensDetails
>
,
}
}
/// Optional multimodal routing-only data.
/// Optional multimodal routing-only data.
...
...
lib/llm/src/protocols/openai.rs
View file @
b6a3b0c6
...
@@ -230,15 +230,15 @@ pub(crate) fn convert_backend_top_logprobs(
...
@@ -230,15 +230,15 @@ pub(crate) fn convert_backend_top_logprobs(
selected_token
:
&
str
,
selected_token
:
&
str
,
selected_token_id
:
TokenIdType
,
selected_token_id
:
TokenIdType
,
selected_logprob
:
f32
,
selected_logprob
:
f32
,
)
->
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
{
)
->
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
{
let
mut
found_selected
=
false
;
let
mut
found_selected
=
false
;
let
mut
result
:
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>
=
top_lps
let
mut
result
:
Vec
<
dynamo_
protocols
::
types
::
TopLogprobs
>
=
top_lps
.iter
()
.iter
()
.map
(|
top_lp
|
{
.map
(|
top_lp
|
{
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
tok
=
top_lp
.token
.clone
()
.unwrap_or_default
();
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
found_selected
=
found_selected
||
top_lp
.token_id
==
selected_token_id
;
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
let
bytes
=
top_lp
.bytes
.clone
()
.or_else
(||
token_to_utf8_bytes
(
&
tok
));
dynamo_
async_openai
::
types
::
TopLogprobs
{
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
tok
,
token
:
tok
,
logprob
:
top_lp
.logprob
as
f32
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
,
bytes
,
...
@@ -247,7 +247,7 @@ pub(crate) fn convert_backend_top_logprobs(
...
@@ -247,7 +247,7 @@ pub(crate) fn convert_backend_top_logprobs(
.collect
();
.collect
();
if
!
found_selected
{
if
!
found_selected
{
result
.push
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
result
.push
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
selected_token
.to_string
(),
token
:
selected_token
.to_string
(),
logprob
:
selected_logprob
,
logprob
:
selected_logprob
,
bytes
:
token_to_utf8_bytes
(
selected_token
),
bytes
:
token_to_utf8_bytes
(
selected_token
),
...
@@ -277,7 +277,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + 'static + std::fmt::Debug>:
...
@@ -277,7 +277,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + 'static + std::fmt::Debug>:
fn
is_continuous_usage_enabled
(
&
self
)
->
bool
;
fn
is_continuous_usage_enabled
(
&
self
)
->
bool
;
/// Get the current usage statistics with properly calculated total_tokens.
/// Get the current usage statistics with properly calculated total_tokens.
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
;
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
;
/// Returns the request tracker if available, for accessing worker timing metrics.
/// Returns the request tracker if available, for accessing worker timing metrics.
fn
tracker
(
&
self
)
->
Option
<
std
::
sync
::
Arc
<
common
::
timing
::
RequestTracker
>>
{
fn
tracker
(
&
self
)
->
Option
<
std
::
sync
::
Arc
<
common
::
timing
::
RequestTracker
>>
{
...
...
lib/llm/src/protocols/openai/chat_completions.rs
View file @
b6a3b0c6
...
@@ -35,7 +35,7 @@ pub use delta::DeltaGenerator;
...
@@ -35,7 +35,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateChatCompletionRequest
{
pub
struct
NvCreateChatCompletionRequest
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionRequest
,
#[serde(flatten,
default)]
#[serde(flatten,
default)]
pub
common
:
CommonExt
,
pub
common
:
CommonExt
,
...
@@ -68,7 +68,7 @@ pub struct NvCreateChatCompletionRequest {
...
@@ -68,7 +68,7 @@ pub struct NvCreateChatCompletionRequest {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionResponse
{
pub
struct
NvCreateChatCompletionResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
...
@@ -78,7 +78,7 @@ pub struct NvCreateChatCompletionResponse {
...
@@ -78,7 +78,7 @@ pub struct NvCreateChatCompletionResponse {
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
pub
struct
NvCreateChatCompletionStreamResponse
{
pub
struct
NvCreateChatCompletionStreamResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
...
@@ -202,7 +202,7 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
...
@@ -202,7 +202,7 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
if
let
Some
(
response_format
)
=
self
.inner.response_format
.as_ref
()
{
if
let
Some
(
response_format
)
=
self
.inner.response_format
.as_ref
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
ResponseFormat
;
match
response_format
{
match
response_format
{
ResponseFormat
::
Text
=>
{}
ResponseFormat
::
Text
=>
{}
ResponseFormat
::
JsonObject
=>
{
ResponseFormat
::
JsonObject
=>
{
...
@@ -289,8 +289,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
...
@@ -289,8 +289,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
/// * `None` if no stop conditions are defined.
/// * `None` if no stop conditions are defined.
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
self
.inner.stop
.as_ref
()
.map
(|
stop
|
match
stop
{
self
.inner.stop
.as_ref
()
.map
(|
stop
|
match
stop
{
dynamo_
async_openai
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
protocols
::
types
::
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
dynamo_
async_openai
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
dynamo_
protocols
::
types
::
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
})
})
}
}
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
b6a3b0c6
This diff is collapsed.
Click to expand it.
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
b6a3b0c6
...
@@ -32,7 +32,7 @@ impl NvCreateChatCompletionRequest {
...
@@ -32,7 +32,7 @@ impl NvCreateChatCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
include_usage
:
true
,
continuous_usage_stats
:
false
,
continuous_usage_stats
:
false
,
});
});
...
@@ -116,9 +116,9 @@ pub struct DeltaGenerator {
...
@@ -116,9 +116,9 @@ pub struct DeltaGenerator {
/// Optional system fingerprint for version tracking.
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
system_fingerprint
:
Option
<
String
>
,
/// Optional service tier information for the response.
/// Optional service tier information for the response.
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
protocols
::
types
::
ServiceTierResponse
>
,
/// Tracks token usage for the completion request.
/// Tracks token usage for the completion request.
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
/// Counter tracking the number of messages issued.
/// Counter tracking the number of messages issued.
msg_counter
:
u64
,
msg_counter
:
u64
,
/// Configuration options for response generation.
/// Configuration options for response generation.
...
@@ -147,7 +147,7 @@ impl DeltaGenerator {
...
@@ -147,7 +147,7 @@ impl DeltaGenerator {
// but this will not be an issue until 2106.
// but this will not be an issue until 2106.
let
now
:
u32
=
now
.try_into
()
.expect
(
"timestamp exceeds u32::MAX"
);
let
now
:
u32
=
now
.try_into
()
.expect
(
"timestamp exceeds u32::MAX"
);
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
prompt_tokens
:
0
,
prompt_tokens
:
0
,
completion_tokens
:
0
,
completion_tokens
:
0
,
total_tokens
:
0
,
total_tokens
:
0
,
...
@@ -194,7 +194,7 @@ impl DeltaGenerator {
...
@@ -194,7 +194,7 @@ impl DeltaGenerator {
token_ids
:
&
[
TokenIdType
],
token_ids
:
&
[
TokenIdType
],
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
return
None
;
}
}
...
@@ -216,7 +216,7 @@ impl DeltaGenerator {
...
@@ -216,7 +216,7 @@ impl DeltaGenerator {
.zip
(
top_logprobs
)
.zip
(
top_logprobs
)
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
.map
(|(((
t
,
tid
),
lp
),
top_lps
)|
{
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
let
converted
=
convert_backend_top_logprobs
(
&
top_lps
,
t
,
*
tid
,
lp
);
dynamo_
async_openai
::
types
::
ChatCompletionTokenLogprob
{
dynamo_
protocols
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
token
:
t
.clone
(),
logprob
:
lp
,
logprob
:
lp
,
bytes
:
token_to_utf8_bytes
(
t
),
bytes
:
token_to_utf8_bytes
(
t
),
...
@@ -226,7 +226,7 @@ impl DeltaGenerator {
...
@@ -226,7 +226,7 @@ impl DeltaGenerator {
.collect
()
.collect
()
});
});
Some
(
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
{
Some
(
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
{
content
,
content
,
refusal
:
None
,
refusal
:
None
,
})
})
...
@@ -242,22 +242,22 @@ impl DeltaGenerator {
...
@@ -242,22 +242,22 @@ impl DeltaGenerator {
/// * `stop_reason` - Optional stop string or token that triggered the stop.
/// * `stop_reason` - Optional stop string or token that triggered the stop.
///
///
/// # Returns
/// # Returns
/// * An [`dynamo_
async_openai
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
/// * An [`dynamo_
protocols
::types::CreateChatCompletionStreamResponse`] instance representing the choice.
#[allow(deprecated)]
#[allow(deprecated)]
pub
fn
create_choice
(
pub
fn
create_choice
(
&
mut
self
,
&
mut
self
,
index
:
u32
,
index
:
u32
,
text
:
Option
<
String
>
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
ChatChoiceLogprobs
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
let
delta
=
dynamo_
protocols
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
.map
(
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
),
content
:
text
.map
(
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
),
function_call
:
None
,
function_call
:
None
,
tool_calls
:
None
,
tool_calls
:
None
,
role
:
if
self
.msg_counter
==
0
{
role
:
if
self
.msg_counter
==
0
{
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
)
Some
(
dynamo_
protocols
::
types
::
Role
::
Assistant
)
}
else
{
}
else
{
None
None
},
},
...
@@ -265,7 +265,7 @@ impl DeltaGenerator {
...
@@ -265,7 +265,7 @@ impl DeltaGenerator {
reasoning_content
:
None
,
reasoning_content
:
None
,
};
};
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
protocols
::
types
::
ChatChoiceStream
{
index
,
index
,
delta
,
delta
,
finish_reason
,
finish_reason
,
...
@@ -279,7 +279,7 @@ impl DeltaGenerator {
...
@@ -279,7 +279,7 @@ impl DeltaGenerator {
// all intermediate chunks should have usage: null
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
// The final usage chunk will be sent separately with empty choices
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
created
:
self
.created
,
...
@@ -306,7 +306,7 @@ impl DeltaGenerator {
...
@@ -306,7 +306,7 @@ impl DeltaGenerator {
let
usage
=
self
.get_usage
();
let
usage
=
self
.get_usage
();
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
created
:
self
.created
,
...
@@ -330,7 +330,7 @@ impl DeltaGenerator {
...
@@ -330,7 +330,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
self
.options.continuous_usage_stats
}
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
usage
...
@@ -387,18 +387,16 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
...
@@ -387,18 +387,16 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
// Map backend finish reasons to OpenAI's finish reasons.
// Map backend finish reasons to OpenAI's finish reasons.
let
finish_reason
=
match
delta
.finish_reason
{
let
finish_reason
=
match
delta
.finish_reason
{
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
{
Some
(
common
::
FinishReason
::
Stop
)
=>
Some
(
dynamo_protocols
::
types
::
FinishReason
::
Stop
),
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
Length
)
=>
{
Some
(
common
::
FinishReason
::
Length
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Length
)
}
}
Some
(
common
::
FinishReason
::
Cancelled
)
=>
{
Some
(
common
::
FinishReason
::
Cancelled
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
Stop
)
}
}
Some
(
common
::
FinishReason
::
ContentFilter
)
=>
{
Some
(
common
::
FinishReason
::
ContentFilter
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
)
Some
(
dynamo_
protocols
::
types
::
FinishReason
::
ContentFilter
)
}
}
Some
(
common
::
FinishReason
::
Error
(
err_msg
))
=>
{
Some
(
common
::
FinishReason
::
Error
(
err_msg
))
=>
{
return
Err
(
anyhow
::
anyhow!
(
err_msg
));
return
Err
(
anyhow
::
anyhow!
(
err_msg
));
...
@@ -490,7 +488,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
...
@@ -490,7 +488,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
DeltaGenerator
::
get_usage
(
self
)
}
}
...
@@ -502,7 +500,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
...
@@ -502,7 +500,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
#[cfg(test)]
#[cfg(test)]
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessage
,
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessage
,
ChatCompletionRequestUserMessageContent
,
CreateChatCompletionRequest
,
ChatCompletionRequestUserMessageContent
,
CreateChatCompletionRequest
,
};
};
...
...
lib/llm/src/protocols/openai/chat_completions/jail.rs
View file @
b6a3b0c6
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-License-Identifier: Apache-2.0
use
async_stream
::
stream
;
use
async_stream
::
stream
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceLogprobs
,
ChatChoiceStream
,
ChatCompletionMessageToolCallChunk
,
ChatChoiceLogprobs
,
ChatChoiceStream
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
FinishReason
,
FunctionCallStream
,
Role
,
ChatCompletionStreamResponseDelta
,
FinishReason
,
FunctionCallStream
,
Role
,
};
};
...
@@ -116,7 +116,7 @@ fn create_choice_stream(
...
@@ -116,7 +116,7 @@ fn create_choice_stream(
content
:
&
str
,
content
:
&
str
,
tool_calls
:
Option
<
Vec
<
ChatCompletionMessageToolCallChunk
>>
,
tool_calls
:
Option
<
Vec
<
ChatCompletionMessageToolCallChunk
>>
,
finish_reason
:
Option
<
FinishReason
>
,
finish_reason
:
Option
<
FinishReason
>
,
stop_reason
:
Option
<
dynamo_
async_openai
::
types
::
StopReason
>
,
stop_reason
:
Option
<
dynamo_
protocols
::
types
::
StopReason
>
,
logprobs
:
Option
<
ChatChoiceLogprobs
>
,
logprobs
:
Option
<
ChatChoiceLogprobs
>
,
)
->
ChatChoiceStream
{
)
->
ChatChoiceStream
{
#[allow(deprecated)]
#[allow(deprecated)]
...
@@ -124,9 +124,9 @@ fn create_choice_stream(
...
@@ -124,9 +124,9 @@ fn create_choice_stream(
index
,
index
,
delta
:
ChatCompletionStreamResponseDelta
{
delta
:
ChatCompletionStreamResponseDelta
{
role
,
role
,
content
:
Some
(
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
.to_string
()
)
,
content
.to_string
(),
),
)
),
tool_calls
,
tool_calls
,
function_call
:
None
,
function_call
:
None
,
refusal
:
None
,
refusal
:
None
,
...
@@ -543,8 +543,8 @@ impl JailedStream {
...
@@ -543,8 +543,8 @@ impl JailedStream {
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
// Jailing only applies to text content
// Jailing only applies to text content
let
text_content
=
match
content
{
let
text_content
=
match
content
{
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.as_str
()),
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
)
=>
None
,
};
};
if
let
Some
(
text
)
=
text_content
{
if
let
Some
(
text
)
=
text_content
{
...
@@ -676,7 +676,7 @@ impl JailedStream {
...
@@ -676,7 +676,7 @@ impl JailedStream {
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
// Create a finalization response carrying forward real stream metadata
// Create a finalization response carrying forward real stream metadata
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
last_stream_id
,
id
:
last_stream_id
,
object
:
"chat.completion.chunk"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
created
:
last_stream_created
,
created
:
last_stream_created
,
...
@@ -932,7 +932,7 @@ impl JailedStream {
...
@@ -932,7 +932,7 @@ impl JailedStream {
ChatCompletionMessageToolCallChunk
{
ChatCompletionMessageToolCallChunk
{
index
,
index
,
id
:
Some
(
format!
(
"call-{}"
,
Uuid
::
new_v4
())),
id
:
Some
(
format!
(
"call-{}"
,
Uuid
::
new_v4
())),
r
#
type
:
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolType
::
Function
),
r
#
type
:
Some
(
dynamo_
protocols
::
types
::
ChatCompletionToolType
::
Function
),
function
:
Some
(
FunctionCallStream
{
function
:
Some
(
FunctionCallStream
{
name
:
Some
(
name
),
name
:
Some
(
name
),
arguments
:
Some
(
arguments
),
arguments
:
Some
(
arguments
),
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
b6a3b0c6
...
@@ -27,7 +27,7 @@ pub use delta::DeltaGenerator;
...
@@ -27,7 +27,7 @@ pub use delta::DeltaGenerator;
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionRequest
{
pub
struct
NvCreateCompletionRequest
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionRequest
,
#[serde(flatten)]
#[serde(flatten)]
pub
common
:
CommonExt
,
pub
common
:
CommonExt
,
...
@@ -47,27 +47,27 @@ pub struct NvCreateCompletionRequest {
...
@@ -47,27 +47,27 @@ pub struct NvCreateCompletionRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionResponse
{
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
impl
ContentProvider
for
dynamo_
async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_
protocols
::
types
::
Choice
{
fn
content
(
&
self
)
->
String
{
fn
content
(
&
self
)
->
String
{
self
.text
.clone
()
self
.text
.clone
()
}
}
}
}
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
String
{
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
String
{
match
prompt
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
.iter
()
.iter
()
.map
(|
&
num
|
num
.to_string
())
.map
(|
&
num
|
num
.to_string
())
.collect
::
<
Vec
<
_
>>
()
.collect
::
<
Vec
<
_
>>
()
.join
(
" "
),
.join
(
" "
),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.iter
()
.iter
()
.map
(|
inner
|
{
.map
(|
inner
|
{
inner
inner
...
@@ -82,12 +82,12 @@ pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
...
@@ -82,12 +82,12 @@ pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
}
}
/// Get the batch size from a prompt (1 for single prompts, array length for batch prompts)
/// Get the batch size from a prompt (1 for single prompts, array length for batch prompts)
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
usize
{
pub
fn
get_prompt_batch_size
(
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
)
->
usize
{
match
prompt
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
String
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
_
)
=>
1
,
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.len
(),
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.len
(),
}
}
}
}
...
@@ -95,21 +95,21 @@ pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usi
...
@@ -95,21 +95,21 @@ pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usi
/// For single prompts, returns a clone regardless of index.
/// For single prompts, returns a clone regardless of index.
/// For batch prompts, returns the prompt at the specified index.
/// For batch prompts, returns the prompt at the specified index.
pub
fn
extract_single_prompt
(
pub
fn
extract_single_prompt
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
,
prompt
:
&
dynamo_
protocols
::
types
::
Prompt
,
index
:
usize
,
index
:
usize
,
)
->
dynamo_
async_openai
::
types
::
Prompt
{
)
->
dynamo_
protocols
::
types
::
Prompt
{
match
prompt
{
match
prompt
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
String
(
s
.clone
())
}
}
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
.clone
())
}
}
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
String
(
arr
[
index
]
.clone
())
}
}
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
protocols
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
dynamo_
protocols
::
types
::
Prompt
::
IntegerArray
(
arr
[
index
]
.clone
())
}
}
}
}
}
}
...
@@ -241,7 +241,7 @@ impl OpenAIStopConditionsProvider for NvCreateCompletionRequest {
...
@@ -241,7 +241,7 @@ impl OpenAIStopConditionsProvider for NvCreateCompletionRequest {
}
}
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
fn
get_stop
(
&
self
)
->
Option
<
Vec
<
String
>>
{
use
dynamo_
async_openai
::
types
::
Stop
;
use
dynamo_
protocols
::
types
::
Stop
;
self
.inner.stop
.as_ref
()
.map
(|
s
|
match
s
{
self
.inner.stop
.as_ref
()
.map
(|
s
|
match
s
{
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
Stop
::
String
(
s
)
=>
vec!
[
s
.clone
()],
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
Stop
::
StringArray
(
arr
)
=>
arr
.clone
(),
...
@@ -287,10 +287,10 @@ impl ResponseFactory {
...
@@ -287,10 +287,10 @@ impl ResponseFactory {
pub
fn
make_response
(
pub
fn
make_response
(
&
self
,
&
self
,
choice
:
dynamo_
async_openai
::
types
::
Choice
,
choice
:
dynamo_
protocols
::
types
::
Choice
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
)
->
NvCreateCompletionResponse
{
)
->
NvCreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
created
:
self
.created
,
...
@@ -361,7 +361,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
...
@@ -361,7 +361,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
}
}
}
}
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
protocols
::
types
::
Choice
{
type
Error
=
anyhow
::
Error
;
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
common
::
StreamingCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
fn
try_from
(
response
:
common
::
StreamingCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
...
@@ -382,10 +382,10 @@ impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types
...
@@ -382,10 +382,10 @@ impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types
// TODO handle aggregating logprobs
// TODO handle aggregating logprobs
let
logprobs
=
None
;
let
logprobs
=
None
;
let
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
=
let
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
=
response
.delta.finish_reason
.map
(
Into
::
into
);
response
.delta.finish_reason
.map
(
Into
::
into
);
let
choice
=
dynamo_
async_openai
::
types
::
Choice
{
let
choice
=
dynamo_
protocols
::
types
::
Choice
{
text
,
text
,
index
,
index
,
logprobs
,
logprobs
,
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
b6a3b0c6
...
@@ -20,7 +20,7 @@ pub struct DeltaAggregator {
...
@@ -20,7 +20,7 @@ pub struct DeltaAggregator {
id
:
String
,
id
:
String
,
model
:
String
,
model
:
String
,
created
:
u32
,
created
:
u32
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
protocols
::
types
::
CompletionUsage
>
,
system_fingerprint
:
Option
<
String
>
,
system_fingerprint
:
Option
<
String
>
,
choices
:
HashMap
<
u32
,
DeltaChoice
>
,
choices
:
HashMap
<
u32
,
DeltaChoice
>
,
error
:
Option
<
String
>
,
error
:
Option
<
String
>
,
...
@@ -31,7 +31,7 @@ struct DeltaChoice {
...
@@ -31,7 +31,7 @@ struct DeltaChoice {
index
:
u32
,
index
:
u32
,
text
:
String
,
text
:
String
,
finish_reason
:
Option
<
FinishReason
>
,
finish_reason
:
Option
<
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
}
}
impl
Default
for
DeltaAggregator
{
impl
Default
for
DeltaAggregator
{
...
@@ -109,14 +109,14 @@ impl DeltaAggregator {
...
@@ -109,14 +109,14 @@ impl DeltaAggregator {
// Handle CompletionFinishReason -> FinishReason conversation
// Handle CompletionFinishReason -> FinishReason conversation
state_choice
.finish_reason
=
match
choice
.finish_reason
{
state_choice
.finish_reason
=
match
choice
.finish_reason
{
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
FinishReason
::
Stop
)
Some
(
FinishReason
::
Stop
)
}
}
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
FinishReason
::
Length
)
Some
(
FinishReason
::
Length
)
}
}
Some
(
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
,
dynamo_
protocols
::
types
::
CompletionFinishReason
::
ContentFilter
,
)
=>
Some
(
FinishReason
::
ContentFilter
),
)
=>
Some
(
FinishReason
::
ContentFilter
),
None
=>
None
,
None
=>
None
,
};
};
...
@@ -124,7 +124,7 @@ impl DeltaAggregator {
...
@@ -124,7 +124,7 @@ impl DeltaAggregator {
// Update logprobs
// Update logprobs
if
let
Some
(
logprobs
)
=
&
choice
.logprobs
{
if
let
Some
(
logprobs
)
=
&
choice
.logprobs
{
let
state_lps
=
state_choice
.logprobs
.get_or_insert
(
let
state_lps
=
state_choice
.logprobs
.get_or_insert
(
dynamo_
async_openai
::
types
::
Logprobs
{
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
Vec
::
new
(),
tokens
:
Vec
::
new
(),
token_logprobs
:
Vec
::
new
(),
token_logprobs
:
Vec
::
new
(),
top_logprobs
:
Vec
::
new
(),
top_logprobs
:
Vec
::
new
(),
...
@@ -155,12 +155,12 @@ impl DeltaAggregator {
...
@@ -155,12 +155,12 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.choices
.into_values
()
.into_values
()
.map
(
dynamo_
async_openai
::
types
::
Choice
::
from
)
.map
(
dynamo_
protocols
::
types
::
Choice
::
from
)
.collect
();
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
aggregator
.id
,
id
:
aggregator
.id
,
created
:
aggregator
.created
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
usage
:
aggregator
.usage
,
...
@@ -179,11 +179,11 @@ impl DeltaAggregator {
...
@@ -179,11 +179,11 @@ impl DeltaAggregator {
}
}
}
}
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
Choice
{
impl
From
<
DeltaChoice
>
for
dynamo_
protocols
::
types
::
Choice
{
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
let
finish_reason
=
delta
.finish_reason
.map
(
Into
::
into
);
let
finish_reason
=
delta
.finish_reason
.map
(
Into
::
into
);
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
delta
.index
,
index
:
delta
.index
,
text
:
delta
.text
,
text
:
delta
.text
,
finish_reason
,
finish_reason
,
...
@@ -231,11 +231,11 @@ mod tests {
...
@@ -231,11 +231,11 @@ mod tests {
.and_then
(|
s
|
FinishReason
::
from_str
(
s
)
.ok
())
.and_then
(|
s
|
FinishReason
::
from_str
(
s
)
.ok
())
.map
(
Into
::
into
);
.map
(
Into
::
into
);
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
async_openai
::
types
::
Logprobs
{
let
logprobs
=
logprob
.map
(|
lp
|
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
vec!
[
text
.to_string
()],
tokens
:
vec!
[
text
.to_string
()],
token_logprobs
:
vec!
[
Some
(
lp
)],
token_logprobs
:
vec!
[
Some
(
lp
)],
top_logprobs
:
vec!
[
top_logprobs
:
vec!
[
serde_json
::
to_value
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
serde_json
::
to_value
(
dynamo_
protocols
::
types
::
TopLogprobs
{
token
:
text
.to_string
(),
token
:
text
.to_string
(),
logprob
:
lp
,
logprob
:
lp
,
bytes
:
None
,
bytes
:
None
,
...
@@ -245,13 +245,13 @@ mod tests {
...
@@ -245,13 +245,13 @@ mod tests {
text_offset
:
vec!
[
0
],
text_offset
:
vec!
[
0
],
});
});
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
created
:
1234567890
,
usage
:
None
,
usage
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
index
,
index
,
text
:
text
.to_string
(),
text
:
text
.to_string
(),
finish_reason
,
finish_reason
,
...
@@ -319,11 +319,11 @@ mod tests {
...
@@ -319,11 +319,11 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello,"
.to_string
());
assert_eq!
(
choice
.text
,
"Hello,"
.to_string
());
assert_eq!
(
assert_eq!
(
choice
.finish_reason
,
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
);
assert_eq!
(
assert_eq!
(
choice
.finish_reason
,
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Length
)
);
);
assert
!
(
choice
.logprobs
.is_none
());
assert
!
(
choice
.logprobs
.is_none
());
}
}
...
@@ -355,7 +355,7 @@ mod tests {
...
@@ -355,7 +355,7 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello, world!"
.to_string
());
assert_eq!
(
choice
.text
,
"Hello, world!"
.to_string
());
assert_eq!
(
assert_eq!
(
choice
.finish_reason
,
choice
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
);
assert_eq!
(
choice
.logprobs
.as_ref
()
.unwrap
()
.tokens
.len
(),
2
);
assert_eq!
(
choice
.logprobs
.as_ref
()
.unwrap
()
.tokens
.len
(),
2
);
assert_eq!
(
assert_eq!
(
...
@@ -367,23 +367,23 @@ mod tests {
...
@@ -367,23 +367,23 @@ mod tests {
#[tokio::test]
#[tokio::test]
async
fn
test_multiple_choices
()
{
async
fn
test_multiple_choices
()
{
// Create a delta with multiple choices
// Create a delta with multiple choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
created
:
1234567890
,
usage
:
None
,
usage
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
0
,
index
:
0
,
text
:
"Choice 0"
.to_string
(),
text
:
"Choice 0"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
logprobs
:
None
,
},
},
dynamo_
async_openai
::
types
::
Choice
{
dynamo_
protocols
::
types
::
Choice
{
index
:
1
,
index
:
1
,
text
:
"Choice 1"
.to_string
(),
text
:
"Choice 1"
.to_string
(),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
logprobs
:
None
,
},
},
],
],
...
@@ -418,11 +418,11 @@ mod tests {
...
@@ -418,11 +418,11 @@ mod tests {
assert_eq!
(
choice0
.text
,
"Choice 0"
.to_string
());
assert_eq!
(
choice0
.text
,
"Choice 0"
.to_string
());
assert_eq!
(
assert_eq!
(
choice0
.finish_reason
,
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
);
assert_eq!
(
assert_eq!
(
choice0
.finish_reason
,
choice0
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
);
let
choice1
=
&
response
.inner.choices
[
1
];
let
choice1
=
&
response
.inner.choices
[
1
];
...
@@ -430,11 +430,11 @@ mod tests {
...
@@ -430,11 +430,11 @@ mod tests {
assert_eq!
(
choice1
.text
,
"Choice 1"
.to_string
());
assert_eq!
(
choice1
.text
,
"Choice 1"
.to_string
());
assert_eq!
(
assert_eq!
(
choice1
.finish_reason
,
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
);
assert_eq!
(
assert_eq!
(
choice1
.finish_reason
,
choice1
.finish_reason
,
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
protocols
::
types
::
CompletionFinishReason
::
Stop
)
);
);
}
}
}
}
lib/llm/src/protocols/openai/completions/delta.rs
View file @
b6a3b0c6
...
@@ -31,7 +31,7 @@ impl NvCreateCompletionRequest {
...
@@ -31,7 +31,7 @@ impl NvCreateCompletionRequest {
// For non-streaming requests (stream=false), enable usage by default
// For non-streaming requests (stream=false), enable usage by default
if
self
.inner.stream_options
.is_none
()
{
if
self
.inner.stream_options
.is_none
()
{
self
.inner.stream_options
=
self
.inner.stream_options
=
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionStreamOptions
{
Some
(
dynamo_
protocols
::
types
::
ChatCompletionStreamOptions
{
include_usage
:
true
,
include_usage
:
true
,
continuous_usage_stats
:
false
,
continuous_usage_stats
:
false
,
});
});
...
@@ -95,7 +95,7 @@ pub struct DeltaGenerator {
...
@@ -95,7 +95,7 @@ pub struct DeltaGenerator {
created
:
u32
,
created
:
u32
,
model
:
String
,
model
:
String
,
system_fingerprint
:
Option
<
String
>
,
system_fingerprint
:
Option
<
String
>
,
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
protocols
::
types
::
CompletionUsage
,
options
:
DeltaGeneratorOptions
,
options
:
DeltaGeneratorOptions
,
tracker
:
Option
<
Arc
<
RequestTracker
>>
,
tracker
:
Option
<
Arc
<
RequestTracker
>>
,
}
}
...
@@ -113,7 +113,7 @@ impl DeltaGenerator {
...
@@ -113,7 +113,7 @@ impl DeltaGenerator {
// Previously, our home-rolled CompletionUsage impl'd Default
// Previously, our home-rolled CompletionUsage impl'd Default
// PR !387 - https://github.com/64bit/async-openai/pull/387
// PR !387 - https://github.com/64bit/async-openai/pull/387
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
protocols
::
types
::
CompletionUsage
{
completion_tokens
:
0
,
completion_tokens
:
0
,
prompt_tokens
:
0
,
prompt_tokens
:
0
,
total_tokens
:
0
,
total_tokens
:
0
,
...
@@ -154,7 +154,7 @@ impl DeltaGenerator {
...
@@ -154,7 +154,7 @@ impl DeltaGenerator {
token_ids
:
Vec
<
TokenIdType
>
,
token_ids
:
Vec
<
TokenIdType
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
{
)
->
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
return
None
;
}
}
...
@@ -181,7 +181,7 @@ impl DeltaGenerator {
...
@@ -181,7 +181,7 @@ impl DeltaGenerator {
.collect
()
.collect
()
});
});
Some
(
dynamo_
async_openai
::
types
::
Logprobs
{
Some
(
dynamo_
protocols
::
types
::
Logprobs
{
tokens
:
toks
.iter
()
.map
(|(
t
,
_
)|
t
.clone
())
.collect
(),
tokens
:
toks
.iter
()
.map
(|(
t
,
_
)|
t
.clone
())
.collect
(),
token_logprobs
:
tok_lps
.into_iter
()
.map
(
Some
)
.collect
(),
token_logprobs
:
tok_lps
.into_iter
()
.map
(
Some
)
.collect
(),
text_offset
:
vec!
[],
text_offset
:
vec!
[],
...
@@ -193,21 +193,21 @@ impl DeltaGenerator {
...
@@ -193,21 +193,21 @@ impl DeltaGenerator {
&
self
,
&
self
,
index
:
u32
,
index
:
u32
,
text
:
Option
<
String
>
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
,
finish_reason
:
Option
<
dynamo_
protocols
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
logprobs
:
Option
<
dynamo_
protocols
::
types
::
Logprobs
>
,
)
->
NvCreateCompletionResponse
{
)
->
NvCreateCompletionResponse
{
// todo - update for tool calling
// todo - update for tool calling
// According to OpenAI spec: when stream_options.include_usage is true,
// According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
// The final usage chunk will be sent separately with empty choices
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
created
:
self
.created
,
model
:
self
.model
.clone
(),
model
:
self
.model
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
Choice
{
text
:
text
.unwrap_or_default
(),
text
:
text
.unwrap_or_default
(),
index
,
index
,
finish_reason
,
finish_reason
,
...
@@ -231,7 +231,7 @@ impl DeltaGenerator {
...
@@ -231,7 +231,7 @@ impl DeltaGenerator {
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateCompletionResponse
{
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateCompletionResponse
{
let
usage
=
self
.get_usage
();
let
usage
=
self
.get_usage
();
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
protocols
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
created
:
self
.created
,
...
@@ -254,7 +254,7 @@ impl DeltaGenerator {
...
@@ -254,7 +254,7 @@ impl DeltaGenerator {
self
.options.continuous_usage_stats
self
.options.continuous_usage_stats
}
}
pub
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
pub
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
let
mut
usage
=
self
.usage
.clone
();
let
mut
usage
=
self
.usage
.clone
();
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
.total_tokens
=
usage
.prompt_tokens
.saturating_add
(
usage
.completion_tokens
);
usage
usage
...
@@ -377,7 +377,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
...
@@ -377,7 +377,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
DeltaGenerator
::
is_continuous_usage_enabled
(
self
)
}
}
fn
get_usage
(
&
self
)
->
dynamo_
async_openai
::
types
::
CompletionUsage
{
fn
get_usage
(
&
self
)
->
dynamo_
protocols
::
types
::
CompletionUsage
{
DeltaGenerator
::
get_usage
(
self
)
DeltaGenerator
::
get_usage
(
self
)
}
}
...
...
lib/llm/src/protocols/openai/embeddings.rs
View file @
b6a3b0c6
...
@@ -15,7 +15,7 @@ pub use nvext::{NvExt, NvExtProvider};
...
@@ -15,7 +15,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingRequest
{
pub
struct
NvCreateEmbeddingRequest
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
pub
nvext
:
Option
<
NvExt
>
,
...
@@ -30,17 +30,17 @@ pub struct NvCreateEmbeddingRequest {
...
@@ -30,17 +30,17 @@ pub struct NvCreateEmbeddingRequest {
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(ToSchema,
Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingResponse
{
pub
struct
NvCreateEmbeddingResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
,
}
}
impl
NvCreateEmbeddingResponse
{
impl
NvCreateEmbeddingResponse
{
pub
fn
empty
()
->
Self
{
pub
fn
empty
()
->
Self
{
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
object
:
"list"
.to_string
(),
model
:
"embedding"
.to_string
(),
model
:
"embedding"
.to_string
(),
data
:
vec!
[],
data
:
vec!
[],
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
:
0
,
prompt_tokens
:
0
,
total_tokens
:
0
,
total_tokens
:
0
,
},
},
...
...
lib/llm/src/protocols/openai/embeddings/aggregator.rs
View file @
b6a3b0c6
...
@@ -133,16 +133,16 @@ mod tests {
...
@@ -133,16 +133,16 @@ mod tests {
use
futures
::
stream
;
use
futures
::
stream
;
fn
create_test_embedding_response
(
fn
create_test_embedding_response
(
embeddings
:
Vec
<
dynamo_
async_openai
::
types
::
Embedding
>
,
embeddings
:
Vec
<
dynamo_
protocols
::
types
::
Embedding
>
,
prompt_tokens
:
u32
,
prompt_tokens
:
u32
,
total_tokens
:
u32
,
total_tokens
:
u32
,
)
->
Annotated
<
NvCreateEmbeddingResponse
>
{
)
->
Annotated
<
NvCreateEmbeddingResponse
>
{
let
response
=
NvCreateEmbeddingResponse
{
let
response
=
NvCreateEmbeddingResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
object
:
"list"
.to_string
(),
model
:
"test-model"
.to_string
(),
model
:
"test-model"
.to_string
(),
data
:
embeddings
,
data
:
embeddings
,
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
protocols
::
types
::
EmbeddingUsage
{
prompt_tokens
,
prompt_tokens
,
total_tokens
,
total_tokens
,
},
},
...
@@ -166,7 +166,7 @@ mod tests {
...
@@ -166,7 +166,7 @@ mod tests {
#[tokio::test]
#[tokio::test]
async
fn
test_single_embedding
()
{
async
fn
test_single_embedding
()
{
let
embedding
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
index
:
0
,
object
:
"embedding"
.to_string
(),
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
...
@@ -188,13 +188,13 @@ mod tests {
...
@@ -188,13 +188,13 @@ mod tests {
#[tokio::test]
#[tokio::test]
async
fn
test_multiple_embeddings
()
{
async
fn
test_multiple_embeddings
()
{
let
embedding1
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding1
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
0
,
index
:
0
,
object
:
"embedding"
.to_string
(),
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
};
};
let
embedding2
=
dynamo_
async_openai
::
types
::
Embedding
{
let
embedding2
=
dynamo_
protocols
::
types
::
Embedding
{
index
:
1
,
index
:
1
,
object
:
"embedding"
.to_string
(),
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.4
,
0.5
,
0.6
],
embedding
:
vec!
[
0.4
,
0.5
,
0.6
],
...
...
lib/llm/src/protocols/openai/images.rs
View file @
b6a3b0c6
...
@@ -14,7 +14,7 @@ pub use nvext::{NvExt, NvExtProvider};
...
@@ -14,7 +14,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateImageRequest
{
pub
struct
NvCreateImageRequest
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
CreateImageRequest
,
pub
inner
:
dynamo_
protocols
::
types
::
CreateImageRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
pub
nvext
:
Option
<
NvExt
>
,
...
@@ -28,13 +28,13 @@ pub struct NvCreateImageRequest {
...
@@ -28,13 +28,13 @@ pub struct NvCreateImageRequest {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvImagesResponse
{
pub
struct
NvImagesResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
,
}
}
impl
NvImagesResponse
{
impl
NvImagesResponse
{
pub
fn
empty
()
->
Self
{
pub
fn
empty
()
->
Self
{
Self
{
Self
{
inner
:
dynamo_
async_openai
::
types
::
ImagesResponse
{
inner
:
dynamo_
protocols
::
types
::
ImagesResponse
{
created
:
0
,
created
:
0
,
data
:
vec!
[],
data
:
vec!
[],
},
},
...
...
lib/llm/src/protocols/openai/nvext.rs
View file @
b6a3b0c6
...
@@ -216,7 +216,7 @@ pub struct AgentHints {
...
@@ -216,7 +216,7 @@ pub struct AgentHints {
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
// alongside the Anthropic protocol types they originate from.
pub
use
dynamo_
async_openai
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
pub
use
dynamo_
protocols
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
impl
Default
for
NvExt
{
impl
Default
for
NvExt
{
fn
default
()
->
Self
{
fn
default
()
->
Self
{
...
...
lib/llm/src/protocols/openai/responses/mod.rs
View file @
b6a3b0c6
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
pub
mod
stream_converter
;
pub
mod
stream_converter
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionCallOutput
,
FunctionToolCall
,
IncludeEnum
,
InputContent
,
InputItem
,
AssistantRole
,
FunctionCallOutput
,
FunctionToolCall
,
IncludeEnum
,
InputContent
,
InputItem
,
InputParam
,
InputRole
,
InputTokenDetails
,
Instructions
,
Item
,
MessageItem
,
OutputItem
,
InputParam
,
InputRole
,
InputTokenDetails
,
Instructions
,
Item
,
MessageItem
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
...
@@ -11,7 +11,7 @@ use dynamo_async_openai::types::responses::{
...
@@ -11,7 +11,7 @@ use dynamo_async_openai::types::responses::{
ServiceTier
,
Status
,
Summary
,
SummaryPart
,
TextResponseFormatConfiguration
,
Tool
,
ServiceTier
,
Status
,
Summary
,
SummaryPart
,
TextResponseFormatConfiguration
,
Tool
,
ToolChoiceOptions
,
ToolChoiceParam
,
Truncation
,
ToolChoiceOptions
,
ToolChoiceParam
,
Truncation
,
};
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionMessageToolCall
,
ChatCompletionNamedToolChoice
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestAssistantMessage
,
ChatCompletionRequestAssistantMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
ChatCompletionRequestMessage
,
ChatCompletionRequestMessageContentPartImage
,
...
@@ -38,7 +38,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
...
@@ -38,7 +38,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
pub
struct
NvCreateResponse
{
pub
struct
NvCreateResponse
{
/// Flattened CreateResponse fields (model, input, temperature, etc.)
/// Flattened CreateResponse fields (model, input, temperature, etc.)
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
CreateResponse
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
CreateResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
pub
nvext
:
Option
<
NvExt
>
,
...
@@ -48,7 +48,7 @@ pub struct NvCreateResponse {
...
@@ -48,7 +48,7 @@ pub struct NvCreateResponse {
pub
struct
NvResponse
{
pub
struct
NvResponse
{
/// Flattened Response fields.
/// Flattened Response fields.
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
Response
,
pub
inner
:
dynamo_
protocols
::
types
::
responses
::
Response
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
@@ -144,12 +144,12 @@ impl OpenAIStopConditionsProvider for NvCreateResponse {
...
@@ -144,12 +144,12 @@ impl OpenAIStopConditionsProvider for NvCreateResponse {
/// Convert a Responses API ImageDetail to the Chat Completions ImageDetail.
/// Convert a Responses API ImageDetail to the Chat Completions ImageDetail.
fn
convert_image_detail
(
fn
convert_image_detail
(
detail
:
&
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
,
detail
:
&
dynamo_
protocols
::
types
::
responses
::
ImageDetail
,
)
->
ChatImageDetail
{
)
->
ChatImageDetail
{
match
detail
{
match
detail
{
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Auto
=>
ChatImageDetail
::
Auto
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
Low
=>
ChatImageDetail
::
Low
,
dynamo_
async_openai
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
dynamo_
protocols
::
types
::
responses
::
ImageDetail
::
High
=>
ChatImageDetail
::
High
,
}
}
}
}
...
@@ -316,7 +316,7 @@ fn convert_input_items_to_messages(
...
@@ -316,7 +316,7 @@ fn convert_input_items_to_messages(
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
fc
.call_id
.clone
(),
id
:
fc
.call_id
.clone
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
fc
.name
.clone
(),
name
:
fc
.name
.clone
(),
arguments
:
fc
.arguments
.clone
(),
arguments
:
fc
.arguments
.clone
(),
},
},
...
@@ -349,10 +349,10 @@ fn convert_input_items_to_messages(
...
@@ -349,10 +349,10 @@ fn convert_input_items_to_messages(
InputItem
::
EasyMessage
(
easy
)
=>
{
InputItem
::
EasyMessage
(
easy
)
=>
{
// Handle easy input messages based on role
// Handle easy input messages based on role
let
content_text
=
match
&
easy
.content
{
let
content_text
=
match
&
easy
.content
{
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
Text
(
text
)
=>
{
text
.clone
()
text
.clone
()
}
}
dynamo_
async_openai
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
dynamo_
protocols
::
types
::
responses
::
EasyInputContent
::
ContentList
(
parts
)
=>
{
convert_input_content_to_text
(
parts
)
convert_input_content_to_text
(
parts
)
}
}
};
};
...
@@ -737,10 +737,8 @@ pub fn chat_completion_to_response(
...
@@ -737,10 +737,8 @@ pub fn chat_completion_to_response(
// Handle text content -- also parse <tool_call> blocks from models
// Handle text content -- also parse <tool_call> blocks from models
// that emit tool calls as text (e.g. Qwen3)
// that emit tool calls as text (e.g. Qwen3)
let
content_text
=
match
choice
.message.content
{
let
content_text
=
match
choice
.message.content
{
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
{
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
text
))
=>
Some
(
text
),
Some
(
text
)
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
}
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Parts
(
_
))
=>
{
tracing
::
warn!
(
tracing
::
warn!
(
"Multimodal content in responses API not yet supported, using placeholder"
"Multimodal content in responses API not yet supported, using placeholder"
);
);
...
@@ -880,12 +878,12 @@ pub fn chat_completion_to_response(
...
@@ -880,12 +878,12 @@ pub fn chat_completion_to_response(
#[cfg(test)]
#[cfg(test)]
mod
tests
{
mod
tests
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
CreateResponse
,
FunctionCallOutput
,
FunctionCallOutputItemParam
,
FunctionTool
,
CreateResponse
,
FunctionCallOutput
,
FunctionCallOutputItemParam
,
FunctionTool
,
FunctionToolCall
,
ImageDetail
,
InputContent
,
InputImageContent
,
InputItem
,
InputMessage
,
FunctionToolCall
,
ImageDetail
,
InputContent
,
InputImageContent
,
InputItem
,
InputMessage
,
InputParam
,
InputRole
,
InputTextContent
,
Item
,
MessageItem
,
Tool
,
InputParam
,
InputRole
,
InputTextContent
,
Item
,
MessageItem
,
Tool
,
};
};
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessageContent
,
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessageContent
,
};
};
...
@@ -1167,19 +1165,17 @@ mod tests {
...
@@ -1167,19 +1165,17 @@ mod tests {
fn
test_into_nvresponse_from_chat_response
()
{
fn
test_into_nvresponse_from_chat_response
()
{
let
now
=
1_726_000_000
;
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
content
:
Some
(
dynamo_protocols
::
types
::
ChatCompletionMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
"This is a reply"
.to_string
(),
),
)),
),
refusal
:
None
,
refusal
:
None
,
tool_calls
:
None
,
tool_calls
:
None
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
function_call
:
None
,
audio
:
None
,
audio
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
...
@@ -1225,22 +1221,22 @@ mod tests {
...
@@ -1225,22 +1221,22 @@ mod tests {
fn
test_response_with_tool_calls
()
{
fn
test_response_with_tool_calls
()
{
let
now
=
1_726_000_000
;
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
protocols
::
types
::
ChatChoice
{
index
:
0
,
index
:
0
,
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_
protocols
::
types
::
ChatCompletionResponseMessage
{
content
:
None
,
content
:
None
,
refusal
:
None
,
refusal
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
"call_abc"
.into
(),
id
:
"call_abc"
.into
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_
async_openai
::
types
::
FunctionCall
{
function
:
dynamo_
protocols
::
types
::
FunctionCall
{
name
:
"get_weather"
.into
(),
name
:
"get_weather"
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
},
},
}]),
}]),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
function_call
:
None
,
function_call
:
None
,
audio
:
None
,
audio
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
...
@@ -1335,8 +1331,8 @@ thinking
...
@@ -1335,8 +1331,8 @@ thinking
#[test]
#[test]
fn
test_reasoning_effort_mapped_to_chat_completion
()
{
fn
test_reasoning_effort_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
mut
req
=
make_response_with_input
(
"think hard"
);
let
mut
req
=
make_response_with_input
(
"think hard"
);
req
.inner.reasoning
=
Some
(
Reasoning
{
req
.inner.reasoning
=
Some
(
Reasoning
{
...
@@ -1357,8 +1353,8 @@ thinking
...
@@ -1357,8 +1353,8 @@ thinking
#[test]
#[test]
fn
test_text_format_json_object_mapped
()
{
fn
test_text_format_json_object_mapped
()
{
use
dynamo_
async_openai
::
types
::
ResponseFormat
;
use
dynamo_
protocols
::
types
::
ResponseFormat
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
};
...
@@ -1374,10 +1370,10 @@ thinking
...
@@ -1374,10 +1370,10 @@ thinking
#[test]
#[test]
fn
test_text_format_json_schema_mapped
()
{
fn
test_text_format_json_schema_mapped
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
};
use
dynamo_
async_openai
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
use
dynamo_
protocols
::
types
::{
ResponseFormat
,
ResponseFormatJsonSchema
};
let
schema
=
ResponseFormatJsonSchema
{
let
schema
=
ResponseFormatJsonSchema
{
name
:
"city"
.into
(),
name
:
"city"
.into
(),
...
@@ -1402,7 +1398,7 @@ thinking
...
@@ -1402,7 +1398,7 @@ thinking
#[test]
#[test]
fn
test_text_format_plain_text_leaves_response_format_none
()
{
fn
test_text_format_plain_text_leaves_response_format_none
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
};
...
@@ -1418,8 +1414,8 @@ thinking
...
@@ -1418,8 +1414,8 @@ thinking
#[test]
#[test]
fn
test_service_tier_mapped_to_chat_completion
()
{
fn
test_service_tier_mapped_to_chat_completion
()
{
use
dynamo_
async_openai
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
protocols
::
types
::
ServiceTier
as
ChatServiceTier
;
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
as
RespServiceTier
;
let
mut
req
=
make_response_with_input
(
"priority"
);
let
mut
req
=
make_response_with_input
(
"priority"
);
req
.inner.service_tier
=
Some
(
RespServiceTier
::
Priority
);
req
.inner.service_tier
=
Some
(
RespServiceTier
::
Priority
);
...
@@ -1430,8 +1426,8 @@ thinking
...
@@ -1430,8 +1426,8 @@ thinking
#[test]
#[test]
fn
test_response_echoes_reasoning
()
{
fn
test_response_echoes_reasoning
()
{
use
dynamo_
async_openai
::
types
::
ReasoningEffort
;
use
dynamo_
protocols
::
types
::
ReasoningEffort
;
use
dynamo_
async_openai
::
types
::
responses
::
Reasoning
;
use
dynamo_
protocols
::
types
::
responses
::
Reasoning
;
let
params
=
ResponseParams
{
let
params
=
ResponseParams
{
reasoning
:
Some
(
Reasoning
{
reasoning
:
Some
(
Reasoning
{
...
@@ -1442,7 +1438,7 @@ thinking
...
@@ -1442,7 +1438,7 @@ thinking
};
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
choices
:
vec!
[],
created
:
0
,
created
:
0
,
id
:
"test"
.into
(),
id
:
"test"
.into
(),
...
@@ -1462,7 +1458,7 @@ thinking
...
@@ -1462,7 +1458,7 @@ thinking
#[test]
#[test]
fn
test_response_echoes_text_format
()
{
fn
test_response_echoes_text_format
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
ResponseTextParam
,
TextResponseFormatConfiguration
,
ResponseTextParam
,
TextResponseFormatConfiguration
,
};
};
...
@@ -1475,7 +1471,7 @@ thinking
...
@@ -1475,7 +1471,7 @@ thinking
};
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
choices
:
vec!
[],
created
:
0
,
created
:
0
,
id
:
"test"
.into
(),
id
:
"test"
.into
(),
...
@@ -1495,7 +1491,7 @@ thinking
...
@@ -1495,7 +1491,7 @@ thinking
#[test]
#[test]
fn
test_response_echoes_service_tier
()
{
fn
test_response_echoes_service_tier
()
{
use
dynamo_
async_openai
::
types
::
responses
::
ServiceTier
;
use
dynamo_
protocols
::
types
::
responses
::
ServiceTier
;
let
params
=
ResponseParams
{
let
params
=
ResponseParams
{
service_tier
:
Some
(
ServiceTier
::
Flex
),
service_tier
:
Some
(
ServiceTier
::
Flex
),
...
@@ -1503,7 +1499,7 @@ thinking
...
@@ -1503,7 +1499,7 @@ thinking
};
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
choices
:
vec!
[],
created
:
0
,
created
:
0
,
id
:
"test"
.into
(),
id
:
"test"
.into
(),
...
@@ -1522,7 +1518,7 @@ thinking
...
@@ -1522,7 +1518,7 @@ thinking
#[test]
#[test]
fn
test_output_message_deserializes_without_id_and_status
()
{
fn
test_output_message_deserializes_without_id_and_status
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
};
let
json
=
serde_json
::
json!
({
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
"role"
:
"assistant"
,
...
@@ -1544,7 +1540,7 @@ thinking
...
@@ -1544,7 +1540,7 @@ thinking
#[test]
#[test]
fn
test_output_message_with_id_and_status_still_works
()
{
fn
test_output_message_with_id_and_status_still_works
()
{
use
dynamo_
async_openai
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
use
dynamo_
protocols
::
types
::
responses
::{
InputItem
,
Item
,
MessageItem
,
OutputStatus
};
let
json
=
serde_json
::
json!
({
let
json
=
serde_json
::
json!
({
"role"
:
"assistant"
,
"role"
:
"assistant"
,
...
@@ -1567,17 +1563,17 @@ thinking
...
@@ -1567,17 +1563,17 @@ thinking
// ── PR2: include filtering + truncation echo-back tests ──
// ── PR2: include filtering + truncation echo-back tests ──
fn
make_chat_resp_with_text
(
text
:
&
str
)
->
NvCreateChatCompletionResponse
{
fn
make_chat_resp_with_text
(
text
:
&
str
)
->
NvCreateChatCompletionResponse
{
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoice
,
ChatCompletionMessageContent
,
ChatCompletionResponseMessage
,
FinishReason
,
ChatChoice
,
ChatCompletionMessageContent
,
ChatCompletionResponseMessage
,
FinishReason
,
};
};
NvCreateChatCompletionResponse
{
NvCreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[
ChatChoice
{
choices
:
vec!
[
ChatChoice
{
index
:
0
,
index
:
0
,
#[allow(deprecated)]
#[allow(deprecated)]
message
:
ChatCompletionResponseMessage
{
message
:
ChatCompletionResponseMessage
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
protocols
::
types
::
Role
::
Assistant
,
tool_calls
:
None
,
tool_calls
:
None
,
refusal
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
...
@@ -1622,7 +1618,7 @@ thinking
...
@@ -1622,7 +1618,7 @@ thinking
#[test]
#[test]
fn
test_include_logprobs_kept_when_requested
()
{
fn
test_include_logprobs_kept_when_requested
()
{
use
dynamo_
async_openai
::
types
::
responses
::
IncludeEnum
;
use
dynamo_
protocols
::
types
::
responses
::
IncludeEnum
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
let
params
=
ResponseParams
{
...
@@ -1650,7 +1646,7 @@ thinking
...
@@ -1650,7 +1646,7 @@ thinking
#[test]
#[test]
fn
test_truncation_auto_echoed_back
()
{
fn
test_truncation_auto_echoed_back
()
{
use
dynamo_
async_openai
::
types
::
responses
::
Truncation
;
use
dynamo_
protocols
::
types
::
responses
::
Truncation
;
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
chat_resp
=
make_chat_resp_with_text
(
"hello"
);
let
params
=
ResponseParams
{
let
params
=
ResponseParams
{
...
...
lib/llm/src/protocols/openai/responses/stream_converter.rs
View file @
b6a3b0c6
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
use
std
::
time
::{
SystemTime
,
UNIX_EPOCH
};
use
std
::
time
::{
SystemTime
,
UNIX_EPOCH
};
use
axum
::
response
::
sse
::
Event
;
use
axum
::
response
::
sse
::
Event
;
use
dynamo_
async_openai
::
types
::
responses
::{
use
dynamo_
protocols
::
types
::
responses
::{
AssistantRole
,
FunctionToolCall
,
InputTokenDetails
,
Instructions
,
OutputContent
,
OutputItem
,
AssistantRole
,
FunctionToolCall
,
InputTokenDetails
,
Instructions
,
OutputContent
,
OutputItem
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
OutputMessage
,
OutputMessageContent
,
OutputStatus
,
OutputTextContent
,
OutputTokenDetails
,
Response
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
Response
,
ResponseCompletedEvent
,
ResponseContentPartAddedEvent
,
ResponseContentPartDoneEvent
,
...
@@ -24,7 +24,7 @@ use dynamo_async_openai::types::responses::{
...
@@ -24,7 +24,7 @@ use dynamo_async_openai::types::responses::{
};
};
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
dynamo_
async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_
protocols
::
types
::
ChatCompletionMessageContent
;
use
super
::
ResponseParams
;
use
super
::
ResponseParams
;
use
crate
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionStreamResponse
;
use
crate
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionStreamResponse
;
...
@@ -673,7 +673,7 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
...
@@ -673,7 +673,7 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
mod
tests
{
mod
tests
{
use
super
::
*
;
use
super
::
*
;
use
crate
::
protocols
::
unified
::
ResponsesContext
;
use
crate
::
protocols
::
unified
::
ResponsesContext
;
use
dynamo_
async_openai
::
types
::{
use
dynamo_
protocols
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionMessageToolCallChunk
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
ChatCompletionStreamResponseDelta
,
ChatCompletionToolType
,
FunctionCallStream
,
};
};
...
@@ -704,7 +704,7 @@ mod tests {
...
@@ -704,7 +704,7 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
index
:
0
,
...
@@ -742,7 +742,7 @@ mod tests {
...
@@ -742,7 +742,7 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
inner
:
dynamo_
protocols
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
index
:
0
,
...
...
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment