Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2887cd1c
Unverified
Commit
2887cd1c
authored
Mar 30, 2026
by
ishandhanani
Committed by
GitHub
Mar 30, 2026
Browse files
refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)
parent
d6136f4a
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
519 additions
and
382 deletions
+519
-382
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+3
-44
lib/llm/src/protocols/openai/responses/mod.rs
lib/llm/src/protocols/openai/responses/mod.rs
+115
-103
lib/llm/src/protocols/openai/responses/stream_converter.rs
lib/llm/src/protocols/openai/responses/stream_converter.rs
+56
-52
lib/llm/tests/aggregators.rs
lib/llm/tests/aggregators.rs
+122
-3
lib/llm/tests/logprob_analysis_integration.rs
lib/llm/tests/logprob_analysis_integration.rs
+20
-16
lib/llm/tests/postprocessor_parsing_stream.rs
lib/llm/tests/postprocessor_parsing_stream.rs
+1
-1
lib/llm/tests/test_jail.rs
lib/llm/tests/test_jail.rs
+124
-97
lib/llm/tests/test_reasoning_parser.rs
lib/llm/tests/test_reasoning_parser.rs
+25
-23
lib/llm/tests/test_streaming_tool_parsers.rs
lib/llm/tests/test_streaming_tool_parsers.rs
+12
-10
lib/llm/tests/test_streaming_usage.rs
lib/llm/tests/test_streaming_usage.rs
+22
-18
lib/llm/tests/tool_choice.rs
lib/llm/tests/tool_choice.rs
+13
-9
lib/llm/tests/tool_choice_finish_reasons.rs
lib/llm/tests/tool_choice_finish_reasons.rs
+6
-6
No files found.
lib/llm/src/protocols/openai/nvext.rs
View file @
2887cd1c
...
...
@@ -214,50 +214,9 @@ pub struct AgentHints {
pub
latency_sensitivity
:
Option
<
f64
>
,
}
/// Anthropic-style cache control hint for prefix pinning with TTL.
#[derive(ToSchema,
Serialize,
Deserialize,
Debug,
Clone,
Default,
PartialEq)]
pub
struct
CacheControl
{
#[serde(rename
=
"type"
)]
pub
control_type
:
CacheControlType
,
/// TTL as seconds (integer) or shorthand ("5m" = 300s, "1h" = 3600s). Clamped to [300, 3600].
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
ttl
:
Option
<
String
>
,
}
#[derive(ToSchema,
Serialize,
Deserialize,
Debug,
Clone,
Default,
PartialEq)]
#[serde(rename_all
=
"lowercase"
)]
pub
enum
CacheControlType
{
#[default]
Ephemeral
,
#[serde(other)]
Unknown
,
}
const
MIN_TTL_SECONDS
:
u64
=
300
;
const
MAX_TTL_SECONDS
:
u64
=
3600
;
impl
CacheControl
{
/// Parse TTL string to seconds, clamped to [300, 3600].
///
/// Accepts integer seconds ("120", "600") or shorthand ("5m", "1h").
/// Values below 300 are clamped to 300; values above 3600 are clamped to 3600.
/// Unrecognized strings default to 300s.
pub
fn
ttl_seconds
(
&
self
)
->
u64
{
let
raw
=
match
self
.ttl
.as_deref
()
{
None
=>
return
MIN_TTL_SECONDS
,
Some
(
"5m"
)
=>
300
,
Some
(
"1h"
)
=>
3600
,
Some
(
other
)
=>
match
other
.parse
::
<
u64
>
()
{
Ok
(
secs
)
=>
secs
,
Err
(
_
)
=>
{
tracing
::
warn!
(
"Unrecognized TTL '{}', defaulting to 300s"
,
other
);
return
MIN_TTL_SECONDS
;
}
},
};
raw
.clamp
(
MIN_TTL_SECONDS
,
MAX_TTL_SECONDS
)
}
}
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
pub
use
dynamo_async_openai
::
types
::
anthropic
::{
CacheControl
,
CacheControlType
};
impl
Default
for
NvExt
{
fn
default
()
->
Self
{
...
...
lib/llm/src/protocols/openai/responses/mod.rs
View file @
2887cd1c
...
...
@@ -696,8 +696,8 @@ pub fn chat_completion_to_response(
nv_resp
:
NvCreateChatCompletionResponse
,
params
:
&
ResponseParams
,
)
->
Result
<
NvResponse
,
anyhow
::
Error
>
{
let
chat_resp
=
nv_resp
;
let
nvext
=
chat_resp
.nvext
.clone
()
;
let
nvext
=
nv_resp
.nvext
.clone
()
;
let
chat_resp
=
nv_resp
.inner
;
let
message_id
=
format!
(
"msg_{}"
,
Uuid
::
new_v4
()
.simple
());
let
response_id
=
format!
(
"resp_{}"
,
Uuid
::
new_v4
()
.simple
());
...
...
@@ -1163,32 +1163,34 @@ mod tests {
fn
test_into_nvresponse_from_chat_response
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"This is a reply"
.to_string
(),
),
),
)
,
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
}
,
finish
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}]
,
created
:
now
,
model
:
"llama-3.1-8b-instruct"
.into
()
,
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
()
,
usage
:
None
,
refusal
:
None
,
tool_calls
:
None
,
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
}
,
finish_reason
:
None
,
stop
_reason
:
None
,
logprobs
:
None
,
}]
,
created
:
now
,
model
:
"llama-3.1-8b-instruct"
.into
()
,
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
()
,
usage
:
None
,
}
,
nvext
:
None
,
};
...
...
@@ -1218,35 +1220,37 @@ mod tests {
fn
test_response_with_tool_calls
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
None
,
refusal
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
"call_abc"
.into
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_async_openai
::
types
::
FunctionCall
{
name
:
"get_weather"
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
},
}]),
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
now
,
model
:
"test-model"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
dynamo_async_openai
::
types
::
ChatChoice
{
index
:
0
,
message
:
dynamo_async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
None
,
refusal
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCall
{
id
:
"call_abc"
.into
(),
r
#
type
:
ChatCompletionToolType
::
Function
,
function
:
dynamo_async_openai
::
types
::
FunctionCall
{
name
:
"get_weather"
.into
(),
arguments
:
r#"{"location":"SF"}"#
.into
(),
},
}]),
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
now
,
model
:
"test-model"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
};
...
...
@@ -1432,14 +1436,16 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
},
nvext
:
None
,
};
...
...
@@ -1463,14 +1469,16 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
},
nvext
:
None
,
};
...
...
@@ -1489,14 +1497,16 @@ thinking
};
let
chat_resp
=
NvCreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
},
nvext
:
None
,
};
...
...
@@ -1555,29 +1565,31 @@ thinking
ChatChoice
,
ChatCompletionMessageContent
,
ChatCompletionResponseMessage
,
FinishReason
,
};
NvCreateChatCompletionResponse
{
choices
:
vec!
[
ChatChoice
{
index
:
0
,
#[allow(deprecated)]
message
:
ChatCompletionResponseMessage
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
function_call
:
None
,
audio
:
None
,
},
finish_reason
:
Some
(
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
choices
:
vec!
[
ChatChoice
{
index
:
0
,
#[allow(deprecated)]
message
:
ChatCompletionResponseMessage
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
role
:
dynamo_async_openai
::
types
::
Role
::
Assistant
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
function_call
:
None
,
audio
:
None
,
},
finish_reason
:
Some
(
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
id
:
"test"
.into
(),
model
:
"m"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion"
.into
(),
usage
:
None
,
},
nvext
:
None
,
}
}
...
...
lib/llm/src/protocols/openai/responses/stream_converter.rs
View file @
2887cd1c
...
...
@@ -183,7 +183,7 @@ impl ResponseStreamConverter {
let
mut
events
=
Vec
::
new
();
// Capture usage stats from the final chunk (sent when stream_options.include_usage=true)
if
let
Some
(
ref
u
)
=
chunk
.usage
{
if
let
Some
(
ref
u
)
=
chunk
.
inner.
usage
{
self
.usage
=
Some
(
ResponseUsage
{
input_tokens
:
u
.prompt_tokens
,
input_tokens_details
:
InputTokenDetails
{
...
...
@@ -205,7 +205,7 @@ impl ResponseStreamConverter {
});
}
for
choice
in
&
chunk
.choices
{
for
choice
in
&
chunk
.
inner.
choices
{
let
delta
=
&
choice
.delta
;
// Handle text content deltas — extract text from the enum
...
...
@@ -685,35 +685,37 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
delta
:
ChatCompletionStreamResponseDelta
{
content
:
None
,
function_call
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
index
:
tc_index
,
id
:
id
.map
(
String
::
from
),
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
function
:
Some
(
FunctionCallStream
{
name
:
name
.map
(
String
::
from
),
arguments
:
args
.map
(
String
::
from
),
}),
}]),
role
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
model
:
"test"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
delta
:
ChatCompletionStreamResponseDelta
{
content
:
None
,
function_call
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
index
:
tc_index
,
id
:
id
.map
(
String
::
from
),
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
function
:
Some
(
FunctionCallStream
{
name
:
name
.map
(
String
::
from
),
arguments
:
args
.map
(
String
::
from
),
}),
}]),
role
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
model
:
"test"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
}
}
...
...
@@ -721,27 +723,29 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
delta
:
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
function_call
:
None
,
tool_calls
:
None
,
role
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
model
:
"test"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
choices
:
vec!
[
ChatChoiceStream
{
index
:
0
,
delta
:
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
function_call
:
None
,
tool_calls
:
None
,
role
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}],
created
:
0
,
model
:
"test"
.into
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
}
}
...
...
lib/llm/tests/aggregators.rs
View file @
2887cd1c
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
;
use
dynamo_async_openai
::
types
::{
ChatChoiceStream
,
ChatCompletionMessageContent
,
ChatCompletionStreamResponseDelta
,
CreateChatCompletionStreamResponse
,
Role
,
};
use
dynamo_llm
::
protocols
::{
ContentProvider
,
DataStream
,
Annotated
,
ContentProvider
,
DataStream
,
codec
::{
Message
,
SseCodecError
,
create_message_stream
},
openai
::{
ParsingOptions
,
chat_completions
::{
NvCreateChatCompletionResponse
,
aggregator
::
ChatCompletionAggregator
},
chat_completions
::{
NvCreateChatCompletionResponse
,
NvCreateChatCompletionStreamResponse
,
aggregator
::
ChatCompletionAggregator
,
},
completions
::
NvCreateCompletionResponse
,
},
};
...
...
@@ -45,6 +51,7 @@ async fn test_openai_chat_stream() {
assert_eq!
(
get_text
(
result
.inner
.choices
.first
()
.unwrap
()
...
...
@@ -70,6 +77,7 @@ async fn test_openai_chat_edge_case_multi_line_data() {
assert_eq!
(
get_text
(
result
.inner
.choices
.first
()
.unwrap
()
...
...
@@ -95,6 +103,7 @@ async fn test_openai_chat_edge_case_comments_per_response() {
assert_eq!
(
get_text
(
result
.inner
.choices
.first
()
.unwrap
()
...
...
@@ -138,3 +147,113 @@ async fn test_openai_cmpl_stream() {
" This is a question that is often asked by those outside of AI research and development"
);
}
// ===================================
// nvext aggregation regression tests
// ===================================
#[allow(deprecated)]
fn
make_stream_delta
(
content
:
Option
<&
str
>
,
nvext
:
Option
<
serde_json
::
Value
>
,
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
Annotated
::
from_data
(
NvCreateChatCompletionStreamResponse
{
inner
:
CreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
choices
:
if
let
Some
(
text
)
=
content
{
vec!
[
ChatChoiceStream
{
index
:
0
,
delta
:
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.to_string
())),
function_call
:
None
,
tool_calls
:
None
,
role
:
Some
(
Role
::
Assistant
),
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
}]
}
else
{
vec!
[]
},
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
,
})
}
/// Verify that nvext set on a stream delta survives aggregation into the final response.
#[tokio::test]
async
fn
test_nvext_passthrough_aggregation
()
{
let
nvext_value
=
serde_json
::
json!
({
"custom_field"
:
"test_value"
});
let
deltas
=
vec!
[
make_stream_delta
(
Some
(
"Hello"
),
None
),
make_stream_delta
(
Some
(
" world"
),
Some
(
nvext_value
.clone
())),
make_stream_delta
(
Some
(
"!"
),
None
),
];
let
stream
=
futures
::
stream
::
iter
(
deltas
);
let
result
=
NvCreateChatCompletionResponse
::
from_annotated_stream
(
stream
,
ParsingOptions
::
default
())
.await
.unwrap
();
assert_eq!
(
result
.nvext
,
Some
(
nvext_value
));
assert_eq!
(
get_text
(
result
.inner
.choices
.first
()
.unwrap
()
.message
.content
.as_ref
()
.unwrap
()
),
"Hello world!"
);
}
/// Verify that the last non-None nvext wins when multiple deltas carry nvext.
#[tokio::test]
async
fn
test_nvext_last_value_wins
()
{
let
first_nvext
=
serde_json
::
json!
({
"version"
:
1
});
let
last_nvext
=
serde_json
::
json!
({
"version"
:
2
});
let
deltas
=
vec!
[
make_stream_delta
(
Some
(
"a"
),
Some
(
first_nvext
)),
make_stream_delta
(
Some
(
"b"
),
None
),
make_stream_delta
(
Some
(
"c"
),
Some
(
last_nvext
.clone
())),
];
let
stream
=
futures
::
stream
::
iter
(
deltas
);
let
result
=
NvCreateChatCompletionResponse
::
from_annotated_stream
(
stream
,
ParsingOptions
::
default
())
.await
.unwrap
();
assert_eq!
(
result
.nvext
,
Some
(
last_nvext
));
}
/// Verify that nvext remains None when no delta carries it.
#[tokio::test]
async
fn
test_nvext_none_when_absent
()
{
let
deltas
=
vec!
[
make_stream_delta
(
Some
(
"hello"
),
None
)];
let
stream
=
futures
::
stream
::
iter
(
deltas
);
let
result
=
NvCreateChatCompletionResponse
::
from_annotated_stream
(
stream
,
ParsingOptions
::
default
())
.await
.unwrap
();
assert_eq!
(
result
.nvext
,
None
);
}
lib/llm/tests/logprob_analysis_integration.rs
View file @
2887cd1c
...
...
@@ -397,14 +397,16 @@ fn create_response_with_linear_probs(
};
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
choices
:
vec!
[
choice
],
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
choices
:
vec!
[
choice
],
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
}
}
...
...
@@ -479,14 +481,16 @@ fn create_multi_choice_response(
.collect
();
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
choices
,
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
choices
,
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
service_tier
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
}
}
lib/llm/tests/postprocessor_parsing_stream.rs
View file @
2887cd1c
...
...
@@ -192,7 +192,7 @@ async fn postprocessor_parsing_stream_replays_interval_20_fixture() {
continue
;
};
for
choice
in
&
output_data
.choices
{
for
choice
in
&
output_data
.
inner.
choices
{
if
let
Some
(
reasoning_content
)
=
&
choice
.delta.reasoning_content
{
reasoning
.push_str
(
reasoning_content
);
}
...
...
lib/llm/tests/test_jail.rs
View file @
2887cd1c
This diff is collapsed.
Click to expand it.
lib/llm/tests/test_reasoning_parser.rs
View file @
2887cd1c
...
...
@@ -39,14 +39,16 @@ fn create_mock_response_chunk(
};
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
choices
:
vec!
[
choice
],
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
choices
:
vec!
[
choice
],
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
};
...
...
@@ -125,7 +127,7 @@ mod tests {
let
mut
all_content
=
String
::
new
();
while
let
Some
(
item
)
=
output_stream
.next
()
.await
{
if
let
Some
(
ref
data
)
=
item
.data
{
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
if
let
Some
(
ref
r
)
=
choice
.delta.reasoning_content
{
all_reasoning
.push_str
(
r
);
}
...
...
@@ -177,15 +179,15 @@ mod tests {
assert_eq!
(
output_chunks
.len
(),
3
);
// Chunk 0: "<think>This"
let
output_choice_0
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_0
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_0
,
None
,
Some
(
"This"
));
// Chunk 1: " is reasoning content"
let
output_choice_1
=
&
output_chunks
[
1
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_1
=
&
output_chunks
[
1
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_1
,
None
,
Some
(
" is reasoning content"
));
// Chunk 2: "</think> Here's my answer."
let
output_choice_2
=
&
output_chunks
[
2
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_2
=
&
output_chunks
[
2
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_2
,
Some
(
" Here's my answer."
),
None
);
}
...
...
@@ -223,15 +225,15 @@ mod tests {
assert_eq!
(
output_chunks
.len
(),
3
);
// Chunk 0: "<think>Only"
let
output_choice_0
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_0
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_0
,
None
,
Some
(
"Only"
));
// Chunk 1: " reasoning"
let
output_choice_1
=
&
output_chunks
[
1
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_1
=
&
output_chunks
[
1
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_1
,
None
,
Some
(
" reasoning"
));
// Chunk 2: " here</think>"
let
output_choice_2
=
&
output_chunks
[
2
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice_2
=
&
output_chunks
[
2
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice_2
,
None
,
Some
(
" here"
));
}
...
...
@@ -266,7 +268,7 @@ mod tests {
// Verify that only normal content is present
assert_eq!
(
output_chunks
.len
(),
1
);
let
output_choice
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice
,
Some
(
"Just normal text without reasoning tags."
),
...
...
@@ -304,8 +306,8 @@ mod tests {
assert_eq!
(
output_chunks
.len
(),
input_chunks
.len
());
for
(
input
,
output
)
in
input_chunks
.iter
()
.zip
(
output_chunks
.iter
())
{
let
input_choice
=
&
input
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice
=
&
output
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
input_choice
=
&
input
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
let
output_choice
=
&
output
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert_choice
(
output_choice
,
input_choice
.delta.content
.as_ref
()
.map
(
get_text
),
...
...
@@ -345,7 +347,7 @@ mod tests {
// Verify that Mistral-style reasoning is parsed correctly
assert_eq!
(
output_chunks
.len
(),
1
);
let
output_choice
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.choices
[
0
];
let
output_choice
=
&
output_chunks
[
0
]
.data
.as_ref
()
.unwrap
()
.
inner.
choices
[
0
];
assert
!
(
output_choice
.delta.reasoning_content
.is_some
(),
...
...
@@ -422,7 +424,7 @@ mod tests {
for
chunk
in
output_chunks
.iter
()
{
if
let
Some
(
ref
response_data
)
=
chunk
.data
{
for
choice
in
&
response_data
.choices
{
for
choice
in
&
response_data
.
inner.
choices
{
// Collect reasoning content
if
let
Some
(
ref
reasoning
)
=
choice
.delta.reasoning_content
{
all_reasoning
.push_str
(
reasoning
);
...
...
@@ -574,7 +576,7 @@ mod tests {
for
chunk
in
output_chunks
.iter
()
{
if
let
Some
(
ref
response_data
)
=
chunk
.data
{
for
choice
in
&
response_data
.choices
{
for
choice
in
&
response_data
.
inner.
choices
{
// Collect reasoning content
if
let
Some
(
ref
reasoning
)
=
choice
.delta.reasoning_content
{
all_reasoning
.push_str
(
reasoning
);
...
...
@@ -685,7 +687,7 @@ mod tests {
for
chunk
in
output_chunks
.iter
()
{
if
let
Some
(
ref
data
)
=
chunk
.data
{
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
if
let
Some
(
ref
r
)
=
choice
.delta.reasoning_content
{
all_reasoning
.push_str
(
r
);
}
...
...
@@ -782,7 +784,7 @@ mod tests {
for
chunk
in
output_chunks
.iter
()
{
if
let
Some
(
ref
response_data
)
=
chunk
.data
{
for
choice
in
&
response_data
.choices
{
for
choice
in
&
response_data
.
inner.
choices
{
if
let
Some
(
ref
reasoning
)
=
choice
.delta.reasoning_content
{
all_reasoning
.push_str
(
reasoning
);
}
...
...
lib/llm/tests/test_streaming_tool_parsers.rs
View file @
2887cd1c
...
...
@@ -107,14 +107,16 @@ fn load_test_data(file_path: &str) -> TestData {
.expect
(
"Failed to parse choices"
);
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
id
.clone
(),
choices
,
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
id
.clone
(),
choices
,
created
:
1234567890
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
};
...
...
@@ -231,7 +233,7 @@ fn aggregate_content_from_chunks(
for
chunk
in
chunks
.iter
()
{
if
let
Some
(
ref
response_data
)
=
chunk
.data
{
for
choice
in
&
response_data
.choices
{
for
choice
in
&
response_data
.
inner.
choices
{
// Collect reasoning content
if
let
Some
(
ref
reasoning
)
=
choice
.delta.reasoning_content
{
reasoning_content
.push_str
(
reasoning
);
...
...
@@ -279,7 +281,7 @@ fn validate_finish_reason(
// Count finish_reason occurrences and track position
for
(
idx
,
chunk
)
in
chunks
.iter
()
.enumerate
()
{
if
let
Some
(
ref
response_data
)
=
chunk
.data
{
for
choice
in
&
response_data
.choices
{
for
choice
in
&
response_data
.
inner.
choices
{
if
let
Some
(
reason
)
=
choice
.finish_reason
{
finish_reason_count
+=
1
;
last_chunk_index
=
Some
(
idx
);
...
...
lib/llm/tests/test_streaming_usage.rs
View file @
2887cd1c
...
...
@@ -241,12 +241,12 @@ async fn test_streaming_without_usage() {
for
(
i
,
chunk
)
in
content_chunks
.iter
()
.enumerate
()
{
if
let
Some
(
response
)
=
&
chunk
.data
{
assert
!
(
response
.usage
.is_none
(),
response
.
inner.
usage
.is_none
(),
"Chunk {} should have usage: None when stream_options not set"
,
i
);
assert
!
(
!
response
.choices
.is_empty
(),
!
response
.
inner.
choices
.is_empty
(),
"Chunk {} should have choices"
,
i
);
...
...
@@ -286,12 +286,12 @@ async fn test_streaming_with_usage_compliance() {
for
(
i
,
chunk
)
in
chunks
.iter
()
.take
(
3
)
.enumerate
()
{
if
let
Some
(
response
)
=
&
chunk
.data
{
assert
!
(
response
.usage
.is_none
(),
response
.
inner.
usage
.is_none
(),
"Content chunk {} should have usage: None"
,
i
);
assert
!
(
!
response
.choices
.is_empty
(),
!
response
.
inner.
choices
.is_empty
(),
"Content chunk {} should have choices"
,
i
);
...
...
@@ -301,15 +301,15 @@ async fn test_streaming_with_usage_compliance() {
// Verify the final chunk is the usage-only chunk
if
let
Some
(
final_response
)
=
&
chunks
[
3
]
.data
{
assert
!
(
final_response
.choices
.is_empty
(),
final_response
.
inner.
choices
.is_empty
(),
"Final usage chunk should have empty choices array"
);
assert
!
(
final_response
.usage
.is_some
(),
final_response
.
inner.
usage
.is_some
(),
"Final usage chunk should have usage statistics"
);
let
usage
=
final_response
.usage
.as_ref
()
.unwrap
();
let
usage
=
final_response
.
inner.
usage
.as_ref
()
.unwrap
();
assert_eq!
(
usage
.completion_tokens
,
3
,
"Should have 3 completion tokens"
...
...
@@ -359,18 +359,18 @@ async fn test_streaming_with_continuous_usage() {
for
(
i
,
chunk
)
in
chunks
.iter
()
.take
(
3
)
.enumerate
()
{
if
let
Some
(
response
)
=
&
chunk
.data
{
assert
!
(
response
.usage
.is_some
(),
response
.
inner.
usage
.is_some
(),
"Content chunk {} should have usage: Some"
,
i
);
assert
!
(
!
response
.choices
.is_empty
(),
!
response
.
inner.
choices
.is_empty
(),
"Content chunk {} should have choices"
,
i
);
// Verify usage counts are properly accumulated for each chunk
let
usage
=
response
.usage
.as_ref
()
.unwrap
();
let
usage
=
response
.
inner.
usage
.as_ref
()
.unwrap
();
assert_eq!
(
usage
.completion_tokens
,
i
as
u32
+
1
,
...
...
@@ -392,15 +392,15 @@ async fn test_streaming_with_continuous_usage() {
// Verify the final chunk is the usage-only chunk
if
let
Some
(
final_response
)
=
&
chunks
[
3
]
.data
{
assert
!
(
final_response
.choices
.is_empty
(),
final_response
.
inner.
choices
.is_empty
(),
"Final usage chunk should have empty choices array"
);
assert
!
(
final_response
.usage
.is_some
(),
final_response
.
inner.
usage
.is_some
(),
"Final usage chunk should have usage statistics"
);
let
usage
=
final_response
.usage
.as_ref
()
.unwrap
();
let
usage
=
final_response
.
inner.
usage
.as_ref
()
.unwrap
();
assert_eq!
(
usage
.completion_tokens
,
3
,
"Should have 3 completion tokens"
...
...
@@ -464,7 +464,7 @@ async fn test_streaming_with_usage_false() {
for
(
i
,
chunk
)
in
content_chunks
.iter
()
.enumerate
()
{
if
let
Some
(
response
)
=
&
chunk
.data
{
assert
!
(
response
.usage
.is_none
(),
response
.
inner.
usage
.is_none
(),
"Chunk {} should have usage: None when include_usage is false"
,
i
);
...
...
@@ -560,7 +560,7 @@ async fn test_nonstreaming_has_usage_field() {
// Aggregate the streaming chunks into a single non-streaming response
// This simulates what the HTTP service does for non-streaming requests
let
result
=
dynamo_
async_openai
::
type
s
::
CreateChatCompletionResponse
::
from_annotated_stream
(
let
result
=
dynamo_
llm
::
protocols
::
openai
::
chat_completion
s
::
Nv
CreateChatCompletionResponse
::
from_annotated_stream
(
transformed_stream
,
ParsingOptions
::
default
(),
)
...
...
@@ -570,12 +570,12 @@ async fn test_nonstreaming_has_usage_field() {
let
response
=
result
.unwrap
();
assert
!
(
response
.usage
.is_some
(),
response
.
inner.
usage
.is_some
(),
"Non-streaming chat completion response MUST have a usage field populated.
\
This is required for OpenAI API compliance."
);
let
usage
=
response
.usage
.unwrap
();
let
usage
=
response
.
inner.
usage
.unwrap
();
// Verify usage contains valid token counts
// In our mock, we generated 3 tokens (from the 3 backend outputs)
...
...
@@ -725,7 +725,11 @@ async fn test_chat_streaming_with_cached_tokens_propagation() {
assert_eq!
(
chunks
.len
(),
4
,
"Should have 3 content + 1 usage chunk"
);
if
let
Some
(
final_resp
)
=
&
chunks
[
3
]
.data
{
let
usage
=
final_resp
.usage
.as_ref
()
.expect
(
"Usage must be present"
);
let
usage
=
final_resp
.inner
.usage
.as_ref
()
.expect
(
"Usage must be present"
);
let
cached
=
usage
.prompt_tokens_details
.as_ref
()
...
...
lib/llm/tests/tool_choice.rs
View file @
2887cd1c
...
...
@@ -157,7 +157,7 @@ async fn test_named_tool_choice_parses_json() {
.expect
(
"choice generation"
);
let
response
=
apply_jail_transformation
(
raw_response
,
tool_choice
)
.await
;
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.finish_reason
,
...
...
@@ -199,7 +199,7 @@ async fn test_required_tool_choice_parses_json_array() {
.expect
(
"choice generation"
);
let
response
=
apply_jail_transformation
(
raw_response
,
tool_choice
)
.await
;
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.finish_reason
,
...
...
@@ -259,7 +259,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {
.expect
(
"choice generation"
);
let
response
=
apply_jail_transformation
(
raw_response
,
tool_choice
)
.await
;
let
delta
=
&
response
.choices
[
0
]
.delta
;
let
delta
=
&
response
.
inner.
choices
[
0
]
.delta
;
// Jail stream behavior: if parsing fails, return accumulated content as-is
// This matches marker-based FC behavior
...
...
@@ -317,11 +317,11 @@ async fn test_streaming_named_tool_buffers_until_finish() {
let
response
=
&
all_responses
[
0
];
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
);
let
tool_calls
=
response
.choices
[
0
]
.delta.tool_calls
.as_ref
()
.unwrap
();
let
tool_calls
=
response
.
inner.
choices
[
0
]
.delta.tool_calls
.as_ref
()
.unwrap
();
assert_eq!
(
tool_calls
.len
(),
1
);
assert_eq!
(
tool_calls
[
0
]
.function
.as_ref
()
.unwrap
()
.name
.as_deref
(),
...
...
@@ -384,11 +384,11 @@ async fn test_streaming_required_tool_parallel() {
let
response
=
&
all_responses
[
0
];
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
)
);
let
tool_calls
=
response
.choices
[
0
]
.delta.tool_calls
.as_ref
()
.unwrap
();
let
tool_calls
=
response
.
inner.
choices
[
0
]
.delta.tool_calls
.as_ref
()
.unwrap
();
assert_eq!
(
tool_calls
.len
(),
2
);
assert_eq!
(
...
...
@@ -445,8 +445,12 @@ fn test_no_tool_choice_outputs_normal_text() {
.expect
(
"normal text"
);
assert_eq!
(
response
.choices
[
0
]
.delta.content
.as_ref
()
.map
(
get_text
),
response
.inner.choices
[
0
]
.delta
.content
.as_ref
()
.map
(
get_text
),
Some
(
"Hello world"
)
);
assert
!
(
response
.choices
[
0
]
.delta.tool_calls
.is_none
());
assert
!
(
response
.
inner.
choices
[
0
]
.delta.tool_calls
.is_none
());
}
lib/llm/tests/tool_choice_finish_reasons.rs
View file @
2887cd1c
...
...
@@ -116,7 +116,7 @@ async fn test_named_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with Stop
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Length
),
"Length finish reason must be preserved for tool_choice=named"
);
...
...
@@ -139,7 +139,7 @@ fn test_required_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with ToolCalls
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Length
),
"Length finish reason must be preserved for tool_choice=required"
);
...
...
@@ -169,7 +169,7 @@ fn test_named_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ContentFilter
),
"ContentFilter finish reason must be preserved for tool_choice=named"
);
...
...
@@ -192,7 +192,7 @@ fn test_required_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ContentFilter
),
"ContentFilter finish reason must be preserved for tool_choice=required"
);
...
...
@@ -222,7 +222,7 @@ fn test_named_tool_choice_normal_stop_becomes_stop() {
// Normal completion: Stop should remain Stop for named tool choice
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
);
}
...
...
@@ -247,7 +247,7 @@ async fn test_required_tool_choice_normal_stop_becomes_tool_calls() {
// Normal completion: Stop should become ToolCalls for required tool choice
assert_eq!
(
response
.choices
[
0
]
.finish_reason
,
response
.
inner.
choices
[
0
]
.finish_reason
,
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
),
);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment