Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1c9412d2
Unverified
Commit
1c9412d2
authored
Apr 21, 2026
by
Biswa Panda
Committed by
GitHub
Apr 21, 2026
Browse files
feat(lpu): add opaque engine_data field to nvext response with per-request opt-in via extra_fields
parent
3f83c597
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
329 additions
and
11 deletions
+329
-11
lib/llm/src/backend.rs
lib/llm/src/backend.rs
+1
-0
lib/llm/src/migration.rs
lib/llm/src/migration.rs
+1
-0
lib/llm/src/protocols/common/llm_backend.rs
lib/llm/src/protocols/common/llm_backend.rs
+14
-0
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+142
-0
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+133
-0
lib/llm/src/protocols/openai/nvext.rs
lib/llm/src/protocols/openai/nvext.rs
+30
-11
lib/llm/tests/test_streaming_usage.rs
lib/llm/tests/test_streaming_usage.rs
+3
-0
lib/llm/tests/tool_choice.rs
lib/llm/tests/tool_choice.rs
+4
-0
lib/llm/tests/tool_choice_finish_reasons.rs
lib/llm/tests/tool_choice_finish_reasons.rs
+1
-0
No files found.
lib/llm/src/backend.rs
View file @
1c9412d2
...
@@ -300,6 +300,7 @@ impl
...
@@ -300,6 +300,7 @@ impl
index
:
data
.index
,
index
:
data
.index
,
completion_usage
:
data
.completion_usage
,
completion_usage
:
data
.completion_usage
,
disaggregated_params
:
data
.disaggregated_params
,
disaggregated_params
:
data
.disaggregated_params
,
engine_data
:
data
.engine_data
,
})
})
})
})
});
});
...
...
lib/llm/src/migration.rs
View file @
1c9412d2
...
@@ -323,6 +323,7 @@ mod tests {
...
@@ -323,6 +323,7 @@ mod tests {
index
:
None
,
index
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
completion_usage
:
None
,
completion_usage
:
None
,
engine_data
:
None
,
})
})
}
}
...
...
lib/llm/src/protocols/common/llm_backend.rs
View file @
1c9412d2
...
@@ -106,6 +106,11 @@ pub struct BackendOutput {
...
@@ -106,6 +106,11 @@ pub struct BackendOutput {
/// Disaggregated execution parameters (for prefill/decode separation)
/// Disaggregated execution parameters (for prefill/decode separation)
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
disaggregated_params
:
Option
<
serde_json
::
Value
>
,
pub
disaggregated_params
:
Option
<
serde_json
::
Value
>
,
/// Opaque engine data passed through from the backend worker to the response.
/// Dynamo does not inspect this field; it is serialized as-is into `nvext.engine_data`.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
engine_data
:
Option
<
serde_json
::
Value
>
,
}
}
/// The LLM engine and backnd with manage it's own state, specifically translating how a
/// The LLM engine and backnd with manage it's own state, specifically translating how a
...
@@ -167,6 +172,11 @@ pub struct LLMEngineOutput {
...
@@ -167,6 +172,11 @@ pub struct LLMEngineOutput {
// Token usage information
// Token usage information
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
completion_usage
:
Option
<
CompletionUsage
>
,
pub
completion_usage
:
Option
<
CompletionUsage
>
,
/// Opaque engine data passed through from the backend worker to the response.
/// Dynamo does not inspect this field; it is serialized as-is into `nvext.engine_data`.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
pub
engine_data
:
Option
<
serde_json
::
Value
>
,
}
}
impl
LLMEngineOutput
{
impl
LLMEngineOutput
{
...
@@ -186,6 +196,7 @@ impl LLMEngineOutput {
...
@@ -186,6 +196,7 @@ impl LLMEngineOutput {
disaggregated_params
:
None
,
disaggregated_params
:
None
,
extra_args
:
None
,
extra_args
:
None
,
completion_usage
:
None
,
completion_usage
:
None
,
engine_data
:
None
,
}
}
}
}
...
@@ -205,6 +216,7 @@ impl LLMEngineOutput {
...
@@ -205,6 +216,7 @@ impl LLMEngineOutput {
disaggregated_params
:
None
,
disaggregated_params
:
None
,
extra_args
:
None
,
extra_args
:
None
,
completion_usage
:
None
,
completion_usage
:
None
,
engine_data
:
None
,
}
}
}
}
...
@@ -224,6 +236,7 @@ impl LLMEngineOutput {
...
@@ -224,6 +236,7 @@ impl LLMEngineOutput {
disaggregated_params
:
None
,
disaggregated_params
:
None
,
extra_args
:
None
,
extra_args
:
None
,
completion_usage
:
None
,
completion_usage
:
None
,
engine_data
:
None
,
}
}
}
}
...
@@ -243,6 +256,7 @@ impl LLMEngineOutput {
...
@@ -243,6 +256,7 @@ impl LLMEngineOutput {
disaggregated_params
:
None
,
disaggregated_params
:
None
,
extra_args
:
None
,
extra_args
:
None
,
completion_usage
:
None
,
completion_usage
:
None
,
engine_data
:
None
,
}
}
}
}
}
}
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
1c9412d2
...
@@ -418,6 +418,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
...
@@ -418,6 +418,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
self
.tracker
.as_ref
(),
self
.tracker
.as_ref
(),
delta
.disaggregated_params
.as_ref
(),
delta
.disaggregated_params
.as_ref
(),
finish_reason
.is_some
(),
finish_reason
.is_some
(),
delta
.engine_data
,
)
&&
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
)
&&
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
{
stream_response
.nvext
=
Some
(
nvext_json
);
stream_response
.nvext
=
Some
(
nvext_json
);
...
@@ -558,6 +559,58 @@ mod tests {
...
@@ -558,6 +559,58 @@ mod tests {
"token_ids"
:
[
11
,
22
,
33
],
"token_ids"
:
[
11
,
22
,
33
],
"routed_experts"
:
{
"layer_0"
:
[
1
,
3
]}
"routed_experts"
:
{
"layer_0"
:
[
1
,
3
]}
})),
})),
engine_data
:
None
,
}
}
fn
create_test_request_with_extra_fields
(
fields
:
Vec
<
String
>
)
->
NvCreateChatCompletionRequest
{
let
messages
=
vec!
[
ChatCompletionRequestMessage
::
User
(
ChatCompletionRequestUserMessage
{
content
:
ChatCompletionRequestUserMessageContent
::
Text
(
"test"
.to_string
()),
name
:
None
,
},
)];
NvCreateChatCompletionRequest
{
inner
:
CreateChatCompletionRequest
{
model
:
"test-model"
.to_string
(),
messages
,
stream
:
Some
(
true
),
stream_options
:
None
,
..
Default
::
default
()
},
common
:
Default
::
default
(),
nvext
:
Some
(
crate
::
protocols
::
openai
::
nvext
::
NvExt
::
builder
()
.extra_fields
(
fields
)
.build
()
.unwrap
(),
),
chat_template_args
:
None
,
media_io_kwargs
:
None
,
unsupported_fields
:
Default
::
default
(),
}
}
fn
make_backend_output_with_engine_data
()
->
crate
::
protocols
::
common
::
llm_backend
::
BackendOutput
{
crate
::
protocols
::
common
::
llm_backend
::
BackendOutput
{
token_ids
:
vec!
[
42
],
tokens
:
vec!
[
Some
(
"hello"
.to_string
())],
text
:
Some
(
"hello"
.to_string
()),
cum_log_probs
:
None
,
log_probs
:
None
,
top_logprobs
:
None
,
finish_reason
:
Some
(
crate
::
protocols
::
common
::
FinishReason
::
Stop
),
stop_reason
:
None
,
index
:
Some
(
0
),
completion_usage
:
None
,
disaggregated_params
:
None
,
engine_data
:
Some
(
serde_json
::
json!
({
"kv_transfer_time_ms"
:
12.3
,
"disaggregated_kv_transfer_time_ms"
:
8.1
,
"prefill_compute_time_ms"
:
45.6
})),
}
}
}
}
...
@@ -653,4 +706,93 @@ mod tests {
...
@@ -653,4 +706,93 @@ mod tests {
assert
!
(
nvext_json
.get
(
"timing"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"timing"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"token_ids"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"token_ids"
)
.is_none
());
}
}
#[test]
fn
test_engine_data_included_when_requested_via_extra_fields
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"engine_data"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-1"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
let
nvext
=
response
.nvext
.expect
(
"nvext should be present"
);
let
engine_data
=
nvext
.get
(
"engine_data"
)
.expect
(
"engine_data should be present"
);
assert_eq!
(
engine_data
[
"kv_transfer_time_ms"
],
12.3
);
assert_eq!
(
engine_data
[
"prefill_compute_time_ms"
],
45.6
);
}
#[test]
fn
test_engine_data_excluded_when_not_requested
()
{
let
request
=
create_test_request
();
let
mut
generator
=
request
.response_generator
(
"req-engine-2"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
// nvext may or may not be present (tracker may inject worker_id),
// but engine_data specifically must be absent
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not be present when not requested"
);
}
}
#[test]
fn
test_engine_data_excluded_when_other_extra_fields_requested
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"timing"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-3"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not be present when only timing is requested"
);
}
}
#[test]
fn
test_engine_data_none_from_backend_no_nvext_noise
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"engine_data"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-4"
.to_string
());
let
backend_output
=
crate
::
protocols
::
common
::
llm_backend
::
BackendOutput
{
token_ids
:
vec!
[
42
],
tokens
:
vec!
[
Some
(
"hello"
.to_string
())],
text
:
Some
(
"hello"
.to_string
()),
cum_log_probs
:
None
,
log_probs
:
None
,
top_logprobs
:
None
,
finish_reason
:
Some
(
crate
::
protocols
::
common
::
FinishReason
::
Stop
),
stop_reason
:
None
,
index
:
Some
(
0
),
completion_usage
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
// engine didn't provide any data
};
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
// engine_data is None from backend, so nvext.engine_data should be absent
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not appear when backend provides None"
);
}
}
}
}
lib/llm/src/protocols/openai/completions/delta.rs
View file @
1c9412d2
...
@@ -312,6 +312,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
...
@@ -312,6 +312,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
self
.tracker
.as_ref
(),
self
.tracker
.as_ref
(),
delta
.disaggregated_params
.as_ref
(),
delta
.disaggregated_params
.as_ref
(),
finish_reason
.is_some
(),
finish_reason
.is_some
(),
delta
.engine_data
,
)
&&
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
)
&&
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
{
response
.nvext
=
Some
(
nvext_json
);
response
.nvext
=
Some
(
nvext_json
);
...
@@ -405,6 +406,49 @@ mod tests {
...
@@ -405,6 +406,49 @@ mod tests {
"token_ids"
:
[
11
,
22
,
33
],
"token_ids"
:
[
11
,
22
,
33
],
"routed_experts"
:
{
"layer_0"
:
[
1
,
3
]}
"routed_experts"
:
{
"layer_0"
:
[
1
,
3
]}
})),
})),
engine_data
:
None
,
}
}
fn
create_test_request_with_extra_fields
(
fields
:
Vec
<
String
>
)
->
NvCreateCompletionRequest
{
let
inner
=
CreateCompletionRequestArgs
::
default
()
.model
(
"test-model"
)
.prompt
(
Prompt
::
String
(
"test"
.to_string
()))
.build
()
.expect
(
"completion request"
);
NvCreateCompletionRequest
{
inner
,
common
:
Default
::
default
(),
nvext
:
Some
(
crate
::
protocols
::
openai
::
nvext
::
NvExt
::
builder
()
.extra_fields
(
fields
)
.build
()
.unwrap
(),
),
metadata
:
None
,
unsupported_fields
:
Default
::
default
(),
}
}
fn
make_backend_output_with_engine_data
()
->
BackendOutput
{
BackendOutput
{
token_ids
:
vec!
[
42
],
tokens
:
vec!
[
Some
(
"hello"
.to_string
())],
text
:
Some
(
"hello"
.to_string
()),
cum_log_probs
:
None
,
log_probs
:
None
,
top_logprobs
:
None
,
finish_reason
:
Some
(
common
::
FinishReason
::
Stop
),
stop_reason
:
None
,
index
:
Some
(
0
),
completion_usage
:
None
,
disaggregated_params
:
None
,
engine_data
:
Some
(
serde_json
::
json!
({
"kv_transfer_time_ms"
:
12.3
,
"disaggregated_kv_transfer_time_ms"
:
8.1
,
"prefill_compute_time_ms"
:
45.6
})),
}
}
}
}
...
@@ -500,4 +544,93 @@ mod tests {
...
@@ -500,4 +544,93 @@ mod tests {
assert
!
(
nvext_json
.get
(
"timing"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"timing"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"token_ids"
)
.is_none
());
assert
!
(
nvext_json
.get
(
"token_ids"
)
.is_none
());
}
}
#[test]
fn
test_engine_data_included_when_requested_via_extra_fields
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"engine_data"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-1"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
let
nvext
=
response
.nvext
.expect
(
"nvext should be present"
);
let
engine_data
=
nvext
.get
(
"engine_data"
)
.expect
(
"engine_data should be present"
);
assert_eq!
(
engine_data
[
"kv_transfer_time_ms"
],
12.3
);
assert_eq!
(
engine_data
[
"prefill_compute_time_ms"
],
45.6
);
}
#[test]
fn
test_engine_data_excluded_when_not_requested
()
{
let
request
=
create_test_request
();
let
mut
generator
=
request
.response_generator
(
"req-engine-2"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
// nvext may or may not be present (tracker may inject worker_id),
// but engine_data specifically must be absent
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not be present when not requested"
);
}
}
#[test]
fn
test_engine_data_excluded_when_other_extra_fields_requested
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"timing"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-3"
.to_string
());
let
backend_output
=
make_backend_output_with_engine_data
();
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not be present when only timing is requested"
);
}
}
#[test]
fn
test_engine_data_none_from_backend_no_nvext_noise
()
{
let
request
=
create_test_request_with_extra_fields
(
vec!
[
"engine_data"
.to_string
()]);
let
mut
generator
=
request
.response_generator
(
"req-engine-4"
.to_string
());
let
backend_output
=
BackendOutput
{
token_ids
:
vec!
[
42
],
tokens
:
vec!
[
Some
(
"hello"
.to_string
())],
text
:
Some
(
"hello"
.to_string
()),
cum_log_probs
:
None
,
log_probs
:
None
,
top_logprobs
:
None
,
finish_reason
:
Some
(
common
::
FinishReason
::
Stop
),
stop_reason
:
None
,
index
:
Some
(
0
),
completion_usage
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
// engine didn't provide any data
};
let
response
=
generator
.choice_from_postprocessor
(
backend_output
)
.expect
(
"should produce a response"
);
// engine_data is None from backend, so nvext.engine_data should be absent
if
let
Some
(
nvext
)
=
&
response
.nvext
{
assert
!
(
nvext
.get
(
"engine_data"
)
.is_none
()
||
nvext
.get
(
"engine_data"
)
.unwrap
()
.is_null
(),
"engine_data should not appear when backend provides None"
);
}
}
}
}
lib/llm/src/protocols/openai/nvext.rs
View file @
1c9412d2
...
@@ -114,6 +114,11 @@ pub struct NvExtResponse {
...
@@ -114,6 +114,11 @@ pub struct NvExtResponse {
/// Routed expert capture payload (SGLang-specific)
/// Routed expert capture payload (SGLang-specific)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
routed_experts
:
Option
<
serde_json
::
Value
>
,
pub
routed_experts
:
Option
<
serde_json
::
Value
>
,
/// Opaque engine data passed through from the backend worker.
/// Dynamo does not inspect this; it is forwarded as-is to the client.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
engine_data
:
Option
<
serde_json
::
Value
>
,
}
}
/// Response nvext fields requested for a given request.
/// Response nvext fields requested for a given request.
...
@@ -130,6 +135,7 @@ pub struct NvExtResponseFieldSelection {
...
@@ -130,6 +135,7 @@ pub struct NvExtResponseFieldSelection {
pub
timing
:
bool
,
pub
timing
:
bool
,
pub
token_ids
:
bool
,
pub
token_ids
:
bool
,
pub
routed_experts
:
bool
,
pub
routed_experts
:
bool
,
pub
engine_data
:
bool
,
}
}
impl
NvExtResponseFieldSelection
{
impl
NvExtResponseFieldSelection
{
...
@@ -145,6 +151,7 @@ impl NvExtResponseFieldSelection {
...
@@ -145,6 +151,7 @@ impl NvExtResponseFieldSelection {
"worker_id"
=>
selection
.worker_id
=
true
,
"worker_id"
=>
selection
.worker_id
=
true
,
"timing"
=>
selection
.timing
=
true
,
"timing"
=>
selection
.timing
=
true
,
"routed_experts"
=>
selection
.routed_experts
=
true
,
"routed_experts"
=>
selection
.routed_experts
=
true
,
"engine_data"
=>
selection
.engine_data
=
true
,
_
=>
{}
_
=>
{}
}
}
}
}
...
@@ -176,11 +183,13 @@ impl NvExtResponseFieldSelection {
...
@@ -176,11 +183,13 @@ impl NvExtResponseFieldSelection {
/// - `routed_experts` requires the selection flag **and** a `"routed_experts"` key on
/// - `routed_experts` requires the selection flag **and** a `"routed_experts"` key on
/// `disaggregated_params` (cloned as-is, no validation).
/// `disaggregated_params` (cloned as-is, no validation).
/// - `timing` requires the selection flag, `finish_reason_present == true`, **and** a tracker.
/// - `timing` requires the selection flag, `finish_reason_present == true`, **and** a tracker.
/// - `engine_data` requires the selection flag **and** a non-`None` `engine_data_from_backend`.
pub
fn
build_response_nvext
(
pub
fn
build_response_nvext
(
&
self
,
&
self
,
tracker
:
Option
<&
std
::
sync
::
Arc
<
crate
::
protocols
::
common
::
timing
::
RequestTracker
>>
,
tracker
:
Option
<&
std
::
sync
::
Arc
<
crate
::
protocols
::
common
::
timing
::
RequestTracker
>>
,
disaggregated_params
:
Option
<&
serde_json
::
Value
>
,
disaggregated_params
:
Option
<&
serde_json
::
Value
>
,
finish_reason_present
:
bool
,
finish_reason_present
:
bool
,
engine_data_from_backend
:
Option
<
serde_json
::
Value
>
,
)
->
Option
<
NvExtResponse
>
{
)
->
Option
<
NvExtResponse
>
{
let
worker_id
=
if
self
.worker_id
{
let
worker_id
=
if
self
.worker_id
{
tracker
.and_then
(|
t
|
t
.get_worker_info
())
tracker
.and_then
(|
t
|
t
.get_worker_info
())
...
@@ -210,10 +219,17 @@ impl NvExtResponseFieldSelection {
...
@@ -210,10 +219,17 @@ impl NvExtResponseFieldSelection {
None
None
};
};
let
engine_data
=
if
self
.engine_data
{
engine_data_from_backend
}
else
{
None
};
if
worker_id
.is_none
()
if
worker_id
.is_none
()
&&
token_ids
.is_none
()
&&
token_ids
.is_none
()
&&
routed_experts
.is_none
()
&&
routed_experts
.is_none
()
&&
timing
.is_none
()
&&
timing
.is_none
()
&&
engine_data
.is_none
()
{
{
return
None
;
return
None
;
}
}
...
@@ -223,6 +239,7 @@ impl NvExtResponseFieldSelection {
...
@@ -223,6 +239,7 @@ impl NvExtResponseFieldSelection {
timing
,
timing
,
token_ids
,
token_ids
,
routed_experts
,
routed_experts
,
engine_data
,
})
})
}
}
}
}
...
@@ -273,7 +290,7 @@ pub struct NvExt {
...
@@ -273,7 +290,7 @@ pub struct NvExt {
/// Extra fields to be included in the response's nvext
/// Extra fields to be included in the response's nvext
/// This is a list of field names that should be populated in the response
/// This is a list of field names that should be populated in the response
/// Supported fields include "worker_id", "timing", "routed_experts",
/// Supported fields include "worker_id", "timing", "routed_experts",
"engine_data",
/// which map to fields in NvExtResponse.
/// which map to fields in NvExtResponse.
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
#[serde(default,
skip_serializing_if
=
"Option::is_none"
)]
#[builder(default,
setter(strip_option))]
#[builder(default,
setter(strip_option))]
...
@@ -677,11 +694,11 @@ mod tests {
...
@@ -677,11 +694,11 @@ mod tests {
fn
test_build_response_nvext_all_false_returns_none
()
{
fn
test_build_response_nvext_all_false_returns_none
()
{
let
sel
=
sel_all_false
();
let
sel
=
sel_all_false
();
assert
!
(
assert
!
(
sel
.build_response_nvext
(
None
,
None
,
false
)
.is_none
(),
sel
.build_response_nvext
(
None
,
None
,
false
,
None
)
.is_none
(),
"no fields selected → None"
"no fields selected → None"
);
);
assert
!
(
assert
!
(
sel
.build_response_nvext
(
None
,
None
,
true
)
.is_none
(),
sel
.build_response_nvext
(
None
,
None
,
true
,
None
)
.is_none
(),
"finish_reason alone does not force emission"
"finish_reason alone does not force emission"
);
);
}
}
...
@@ -696,7 +713,7 @@ mod tests {
...
@@ -696,7 +713,7 @@ mod tests {
// finish_reason=false: worker_id still emitted (only timing is finish-gated).
// finish_reason=false: worker_id still emitted (only timing is finish-gated).
let
out
=
sel
let
out
=
sel
.build_response_nvext
(
Some
(
&
tracker
),
None
,
false
)
.build_response_nvext
(
Some
(
&
tracker
),
None
,
false
,
None
)
.expect
(
"worker_id should emit regardless of finish_reason"
);
.expect
(
"worker_id should emit regardless of finish_reason"
);
assert
!
(
out
.worker_id
.is_some
());
assert
!
(
out
.worker_id
.is_some
());
...
@@ -715,7 +732,7 @@ mod tests {
...
@@ -715,7 +732,7 @@ mod tests {
// timing alone + finish_reason=false → nothing to emit, returns None.
// timing alone + finish_reason=false → nothing to emit, returns None.
assert
!
(
assert
!
(
sel
.build_response_nvext
(
Some
(
&
tracker
),
None
,
false
)
sel
.build_response_nvext
(
Some
(
&
tracker
),
None
,
false
,
None
)
.is_none
(),
.is_none
(),
"timing is gated on finish_reason_present"
"timing is gated on finish_reason_present"
);
);
...
@@ -730,7 +747,7 @@ mod tests {
...
@@ -730,7 +747,7 @@ mod tests {
let
tracker
=
tracker_with_prefill_worker
();
let
tracker
=
tracker_with_prefill_worker
();
let
out
=
sel
let
out
=
sel
.build_response_nvext
(
Some
(
&
tracker
),
None
,
true
)
.build_response_nvext
(
Some
(
&
tracker
),
None
,
true
,
None
)
.expect
(
"timing should emit on finish"
);
.expect
(
"timing should emit on finish"
);
assert
!
(
out
.timing
.is_some
());
assert
!
(
out
.timing
.is_some
());
...
@@ -746,7 +763,7 @@ mod tests {
...
@@ -746,7 +763,7 @@ mod tests {
..
Default
::
default
()
..
Default
::
default
()
};
};
// finish=true but no tracker → timing not populated → None.
// finish=true but no tracker → timing not populated → None.
assert
!
(
sel
.build_response_nvext
(
None
,
None
,
true
)
.is_none
());
assert
!
(
sel
.build_response_nvext
(
None
,
None
,
true
,
None
)
.is_none
());
}
}
#[test]
#[test]
...
@@ -758,7 +775,7 @@ mod tests {
...
@@ -758,7 +775,7 @@ mod tests {
let
params
=
disagg_params_full
();
let
params
=
disagg_params_full
();
let
out
=
sel
let
out
=
sel
.build_response_nvext
(
None
,
Some
(
&
params
),
false
)
.build_response_nvext
(
None
,
Some
(
&
params
),
false
,
None
)
.expect
(
"token_ids should emit when present"
);
.expect
(
"token_ids should emit when present"
);
assert_eq!
(
out
.token_ids
,
Some
(
vec!
[
11u32
,
22
,
33
]));
assert_eq!
(
out
.token_ids
,
Some
(
vec!
[
11u32
,
22
,
33
]));
...
@@ -777,7 +794,7 @@ mod tests {
...
@@ -777,7 +794,7 @@ mod tests {
let
params
=
serde_json
::
json!
({
"token_ids"
:
"not-an-array"
});
let
params
=
serde_json
::
json!
({
"token_ids"
:
"not-an-array"
});
assert
!
(
assert
!
(
sel
.build_response_nvext
(
None
,
Some
(
&
params
),
false
)
sel
.build_response_nvext
(
None
,
Some
(
&
params
),
false
,
None
)
.is_none
(),
.is_none
(),
"malformed token_ids silently suppressed; nothing else selected → None"
"malformed token_ids silently suppressed; nothing else selected → None"
);
);
...
@@ -792,7 +809,7 @@ mod tests {
...
@@ -792,7 +809,7 @@ mod tests {
let
params
=
disagg_params_full
();
let
params
=
disagg_params_full
();
let
out
=
sel
let
out
=
sel
.build_response_nvext
(
None
,
Some
(
&
params
),
false
)
.build_response_nvext
(
None
,
Some
(
&
params
),
false
,
None
)
.expect
(
"routed_experts should emit when present"
);
.expect
(
"routed_experts should emit when present"
);
assert_eq!
(
assert_eq!
(
...
@@ -808,12 +825,13 @@ mod tests {
...
@@ -808,12 +825,13 @@ mod tests {
timing
:
true
,
timing
:
true
,
token_ids
:
true
,
token_ids
:
true
,
routed_experts
:
true
,
routed_experts
:
true
,
engine_data
:
false
,
};
};
let
tracker
=
tracker_with_prefill_worker
();
let
tracker
=
tracker_with_prefill_worker
();
let
params
=
disagg_params_full
();
let
params
=
disagg_params_full
();
let
out
=
sel
let
out
=
sel
.build_response_nvext
(
Some
(
&
tracker
),
Some
(
&
params
),
true
)
.build_response_nvext
(
Some
(
&
tracker
),
Some
(
&
params
),
true
,
None
)
.expect
(
"all fields selected and available → Some"
);
.expect
(
"all fields selected and available → Some"
);
assert
!
(
out
.worker_id
.is_some
());
assert
!
(
out
.worker_id
.is_some
());
...
@@ -843,6 +861,7 @@ mod tests {
...
@@ -843,6 +861,7 @@ mod tests {
timing
:
true
,
timing
:
true
,
token_ids
:
false
,
// only enabled via query_instance_id
token_ids
:
false
,
// only enabled via query_instance_id
routed_experts
:
true
,
routed_experts
:
true
,
engine_data
:
false
,
}
}
);
);
}
}
...
...
lib/llm/tests/test_streaming_usage.rs
View file @
1c9412d2
...
@@ -110,6 +110,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
...
@@ -110,6 +110,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
},
},
BackendOutput
{
BackendOutput
{
token_ids
:
vec!
[
1917
],
token_ids
:
vec!
[
1917
],
...
@@ -123,6 +124,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
...
@@ -123,6 +124,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
},
},
BackendOutput
{
BackendOutput
{
token_ids
:
vec!
[
0
],
token_ids
:
vec!
[
0
],
...
@@ -145,6 +147,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
...
@@ -145,6 +147,7 @@ fn build_backend_outputs_with_cached_tokens(cached_tokens: Option<u32>) -> Vec<B
completion_tokens_details
:
None
,
completion_tokens_details
:
None
,
}),
}),
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
},
},
]
]
}
}
...
...
lib/llm/tests/tool_choice.rs
View file @
1c9412d2
...
@@ -134,6 +134,7 @@ fn build_backend_output(text: &str) -> BackendOutput {
...
@@ -134,6 +134,7 @@ fn build_backend_output(text: &str) -> BackendOutput {
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
}
}
}
}
...
@@ -302,6 +303,7 @@ async fn test_streaming_named_tool_buffers_until_finish() {
...
@@ -302,6 +303,7 @@ async fn test_streaming_named_tool_buffers_until_finish() {
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
};
};
let
response
=
generator
let
response
=
generator
...
@@ -369,6 +371,7 @@ async fn test_streaming_required_tool_parallel() {
...
@@ -369,6 +371,7 @@ async fn test_streaming_required_tool_parallel() {
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
};
};
let
response
=
generator
let
response
=
generator
...
@@ -438,6 +441,7 @@ fn test_no_tool_choice_outputs_normal_text() {
...
@@ -438,6 +441,7 @@ fn test_no_tool_choice_outputs_normal_text() {
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
};
};
let
response
=
generator
let
response
=
generator
...
...
lib/llm/tests/tool_choice_finish_reasons.rs
View file @
1c9412d2
...
@@ -50,6 +50,7 @@ fn build_backend_output_with_finish(text: &str, finish: common::FinishReason) ->
...
@@ -50,6 +50,7 @@ fn build_backend_output_with_finish(text: &str, finish: common::FinishReason) ->
index
:
Some
(
0
),
index
:
Some
(
0
),
completion_usage
:
None
,
completion_usage
:
None
,
disaggregated_params
:
None
,
disaggregated_params
:
None
,
engine_data
:
None
,
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment