Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2887cd1c
Unverified
Commit
2887cd1c
authored
Mar 30, 2026
by
ishandhanani
Committed by
GitHub
Mar 30, 2026
Browse files
refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)
parent
d6136f4a
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1379 additions
and
1322 deletions
+1379
-1322
lib/async-openai/src/types/anthropic.rs
lib/async-openai/src/types/anthropic.rs
+869
-0
lib/async-openai/src/types/chat.rs
lib/async-openai/src/types/chat.rs
+0
-8
lib/async-openai/src/types/completion.rs
lib/async-openai/src/types/completion.rs
+0
-4
lib/async-openai/src/types/mod.rs
lib/async-openai/src/types/mod.rs
+1
-0
lib/llm/src/audit/stream.rs
lib/llm/src/audit/stream.rs
+95
-81
lib/llm/src/entrypoint/input/batch.rs
lib/llm/src/entrypoint/input/batch.rs
+3
-2
lib/llm/src/entrypoint/input/text.rs
lib/llm/src/entrypoint/input/text.rs
+3
-2
lib/llm/src/http/service/openai.rs
lib/llm/src/http/service/openai.rs
+24
-20
lib/llm/src/perf/logprobs.rs
lib/llm/src/perf/logprobs.rs
+76
-68
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+1
-1
lib/llm/src/preprocessor/speculative_prefill.rs
lib/llm/src/preprocessor/speculative_prefill.rs
+1
-1
lib/llm/src/protocols/anthropic/stream_converter.rs
lib/llm/src/protocols/anthropic/stream_converter.rs
+81
-75
lib/llm/src/protocols/anthropic/types.rs
lib/llm/src/protocols/anthropic/types.rs
+44
-897
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+16
-13
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+107
-97
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+25
-21
lib/llm/src/protocols/openai/chat_completions/jail.rs
lib/llm/src/protocols/openai/chat_completions/jail.rs
+19
-17
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+3
-2
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+8
-8
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+3
-5
No files found.
lib/async-openai/src/types/anthropic.rs
0 → 100644
View file @
2887cd1c
This diff is collapsed.
Click to expand it.
lib/async-openai/src/types/chat.rs
View file @
2887cd1c
...
@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse {
...
@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse {
/// The object type, which is always `chat.completion`.
/// The object type, which is always `chat.completion`.
pub
object
:
String
,
pub
object
:
String
,
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
/// Parsed server side events stream until an \[DONE\] is received from server.
/// Parsed server side events stream until an \[DONE\] is received from server.
...
@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse {
...
@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse {
/// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
/// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
/// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
/// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
#[cfg(test)]
#[cfg(test)]
...
...
lib/async-openai/src/types/completion.rs
View file @
2887cd1c
...
@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse {
...
@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse {
/// The object type, which is always "text_completion"
/// The object type, which is always "text_completion"
pub
object
:
String
,
pub
object
:
String
,
pub
usage
:
Option
<
CompletionUsage
>
,
pub
usage
:
Option
<
CompletionUsage
>
,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
/// Parsed server side events stream until an \[DONE\] is received from server.
/// Parsed server side events stream until an \[DONE\] is received from server.
...
...
lib/async-openai/src/types/mod.rs
View file @
2887cd1c
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
//! Types used in OpenAI API requests and responses.
//! Types used in OpenAI API requests and responses.
//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
pub
mod
anthropic
;
mod
assistant
;
mod
assistant
;
mod
assistant_impls
;
mod
assistant_impls
;
mod
assistant_stream
;
mod
assistant_stream
;
...
...
lib/llm/src/audit/stream.rs
View file @
2887cd1c
...
@@ -90,14 +90,16 @@ where
...
@@ -90,14 +90,16 @@ where
tracing
::
warn!
(
"audit: aggregation future canceled/failed"
);
tracing
::
warn!
(
"audit: aggregation future canceled/failed"
);
// Return minimal response if aggregation failed
// Return minimal response if aggregation failed
NvCreateChatCompletionResponse
{
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
})
})
...
@@ -125,14 +127,16 @@ where
...
@@ -125,14 +127,16 @@ where
Err
(
e
)
=>
{
Err
(
e
)
=>
{
tracing
::
warn!
(
"fold aggregation failed: {e}"
);
tracing
::
warn!
(
"fold aggregation failed: {e}"
);
let
fallback
=
NvCreateChatCompletionResponse
{
let
fallback
=
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
let
_
=
tx
.send
(
fallback
.clone
());
let
_
=
tx
.send
(
fallback
.clone
());
...
@@ -145,14 +149,16 @@ where
...
@@ -145,14 +149,16 @@ where
rx
.await
.unwrap_or_else
(|
_
|
{
rx
.await
.unwrap_or_else
(|
_
|
{
tracing
::
warn!
(
"fold aggregation future canceled"
);
tracing
::
warn!
(
"fold aggregation future canceled"
);
NvCreateChatCompletionResponse
{
NvCreateChatCompletionResponse
{
id
:
String
::
new
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
0
,
id
:
String
::
new
(),
usage
:
None
,
created
:
0
,
model
:
String
::
new
(),
usage
:
None
,
object
:
"chat.completion"
.to_string
(),
model
:
String
::
new
(),
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[],
system_fingerprint
:
None
,
service_tier
:
None
,
choices
:
vec!
[],
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
})
})
...
@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream(
...
@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream(
)
->
std
::
pin
::
Pin
<
)
->
std
::
pin
::
Pin
<
Box
<
dyn
futures
::
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
+
Send
>
,
Box
<
dyn
futures
::
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
+
Send
>
,
>
{
>
{
let
mut
choices
:
Vec
<
ChatChoiceStream
>
=
Vec
::
with_capacity
(
resp
.choices
.len
());
let
mut
choices
:
Vec
<
ChatChoiceStream
>
=
Vec
::
with_capacity
(
resp
.
inner.
choices
.len
());
for
(
idx
,
ch
)
in
resp
.choices
.iter
()
.enumerate
()
{
for
(
idx
,
ch
)
in
resp
.
inner.
choices
.iter
()
.enumerate
()
{
// Convert FunctionCall to FunctionCallStream if present
// Convert FunctionCall to FunctionCallStream if present
#[allow(deprecated)]
#[allow(deprecated)]
let
function_call
=
ch
.message.function_call
.as_ref
()
.map
(|
fc
|
{
let
function_call
=
ch
.message.function_call
.as_ref
()
.map
(|
fc
|
{
...
@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream(
...
@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream(
}
}
let
chunk
=
NvCreateChatCompletionStreamResponse
{
let
chunk
=
NvCreateChatCompletionStreamResponse
{
id
:
resp
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
"chat.completion.chunk"
.to_string
(),
id
:
resp
.inner.id
.clone
(),
created
:
resp
.created
,
object
:
"chat.completion.chunk"
.to_string
(),
model
:
resp
.model
.clone
(),
created
:
resp
.inner.created
,
system_fingerprint
:
resp
.system_fingerprint
.clone
(),
model
:
resp
.inner.model
.clone
(),
service_tier
:
resp
.service_tier
.clone
(),
system_fingerprint
:
resp
.inner.system_fingerprint
.clone
(),
choices
,
service_tier
:
resp
.inner.service_tier
.clone
(),
usage
:
resp
.usage
.clone
(),
choices
,
usage
:
resp
.inner.usage
.clone
(),
},
nvext
:
resp
.nvext
.clone
(),
nvext
:
resp
.nvext
.clone
(),
};
};
...
@@ -275,14 +283,16 @@ mod tests {
...
@@ -275,14 +283,16 @@ mod tests {
};
};
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
choice
],
id
:
"test-id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[
choice
],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -314,14 +324,16 @@ mod tests {
...
@@ -314,14 +324,16 @@ mod tests {
};
};
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
choice
],
id
:
"test-id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[
choice
],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
Some
(
"test-fingerprint"
.to_string
()),
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -339,7 +351,7 @@ mod tests {
...
@@ -339,7 +351,7 @@ mod tests {
chunk
chunk
.data
.data
.as_ref
()
.as_ref
()
.and_then
(|
d
|
d
.choices
.first
())
.and_then
(|
d
|
d
.
inner.
choices
.first
())
.and_then
(|
c
|
c
.delta.content
.as_ref
())
.and_then
(|
c
|
c
.delta.content
.as_ref
())
.and_then
(|
content
|
match
content
{
.and_then
(|
content
|
match
content
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.clone
()),
ChatCompletionMessageContent
::
Text
(
text
)
=>
Some
(
text
.clone
()),
...
@@ -396,7 +408,7 @@ mod tests {
...
@@ -396,7 +408,7 @@ mod tests {
assert_eq!
(
results
.len
(),
0
,
"Empty stream should produce no chunks"
);
assert_eq!
(
results
.len
(),
0
,
"Empty stream should produce no chunks"
);
// Verify fallback response (aggregation will fail on empty stream)
// Verify fallback response (aggregation will fail on empty stream)
assert_eq!
(
final_resp
.object
,
"chat.completion"
);
assert_eq!
(
final_resp
.
inner.
object
,
"chat.completion"
);
// Should get fallback response, not panic
// Should get fallback response, not panic
}
}
...
@@ -415,7 +427,7 @@ mod tests {
...
@@ -415,7 +427,7 @@ mod tests {
assert_eq!
(
extract_content
(
&
results
[
0
]),
"Single chunk"
);
assert_eq!
(
extract_content
(
&
results
[
0
]),
"Single chunk"
);
// Verify aggregation
// Verify aggregation
assert_eq!
(
final_resp
.object
,
"chat.completion"
);
assert_eq!
(
final_resp
.
inner.
object
,
"chat.completion"
);
}
}
#[tokio::test]
#[tokio::test]
...
@@ -423,32 +435,34 @@ mod tests {
...
@@ -423,32 +435,34 @@ mod tests {
// Test that metadata (id, event, comment) is preserved through passthrough
// Test that metadata (id, event, comment) is preserved through passthrough
let
chunk_with_metadata
=
Annotated
{
let
chunk_with_metadata
=
Annotated
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[{
id
:
"test-id"
.to_string
(),
#[allow(deprecated)]
choices
:
vec!
[{
ChatChoiceStream
{
#[allow(deprecated)]
index
:
0
,
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
role
:
Some
(
Role
::
Assistant
),
delta
:
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
role
:
Some
(
Role
::
Assistant
),
"Content"
.to_string
(),
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
)),
"Content"
.to_string
(),
tool_calls
:
None
,
)),
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
function_call
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}
logprobs
:
None
,
}],
}
created
:
1234567890
,
}],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}),
}),
id
:
Some
(
"correlation-123"
.to_string
()),
id
:
Some
(
"correlation-123"
.to_string
()),
...
@@ -481,7 +495,7 @@ mod tests {
...
@@ -481,7 +495,7 @@ mod tests {
let
(
resp1
,
resp2
)
=
tokio
::
join!
(
future1
,
future2
);
let
(
resp1
,
resp2
)
=
tokio
::
join!
(
future1
,
future2
);
// Both should complete successfully
// Both should complete successfully
assert_eq!
(
resp1
.object
,
"chat.completion"
);
assert_eq!
(
resp1
.
inner.
object
,
"chat.completion"
);
assert_eq!
(
resp2
.object
,
"chat.completion"
);
assert_eq!
(
resp2
.
inner.
object
,
"chat.completion"
);
}
}
}
}
lib/llm/src/entrypoint/input/batch.rs
View file @
2887cd1c
...
@@ -238,8 +238,9 @@ async fn evaluate(
...
@@ -238,8 +238,9 @@ async fn evaluate(
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
(
Some
(
data
),
_
)
=>
{
(
Some
(
data
),
_
)
=>
{
// Normal case
// Normal case
let
choice
=
data
.choices
.first
();
let
Some
(
chat_comp
)
=
data
.inner.choices
.first
()
else
{
let
chat_comp
=
choice
.as_ref
()
.unwrap
();
continue
;
};
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
match
c
{
match
c
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
...
...
lib/llm/src/entrypoint/input/text.rs
View file @
2887cd1c
...
@@ -138,8 +138,9 @@ async fn main_loop(
...
@@ -138,8 +138,9 @@ async fn main_loop(
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
match
(
item
.data
.as_ref
(),
item
.event
.as_deref
())
{
(
Some
(
data
),
_
)
=>
{
(
Some
(
data
),
_
)
=>
{
// Normal case
// Normal case
let
entry
=
data
.choices
.first
();
let
Some
(
chat_comp
)
=
data
.inner.choices
.first
()
else
{
let
chat_comp
=
entry
.as_ref
()
.unwrap
();
continue
;
};
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
if
let
Some
(
c
)
=
&
chat_comp
.delta.content
{
match
c
{
match
c
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
ChatCompletionMessageContent
::
Text
(
text
)
=>
{
...
...
lib/llm/src/http/service/openai.rs
View file @
2887cd1c
...
@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events(
...
@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events(
};
};
let
mut
events
=
vec!
[];
let
mut
events
=
vec!
[];
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
let
Some
(
tool_calls
)
=
&
choice
.delta.tool_calls
else
{
let
Some
(
tool_calls
)
=
&
choice
.delta.tool_calls
else
{
continue
;
continue
;
};
};
...
@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch(
...
@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch(
};
};
let
mut
events
=
vec!
[];
let
mut
events
=
vec!
[];
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
let
buffer
=
buffers
.entry
(
choice
.index
)
.or_default
();
let
buffer
=
buffers
.entry
(
choice
.index
)
.or_default
();
let
has_reasoning
=
choice
let
has_reasoning
=
choice
.delta
.delta
...
@@ -2892,15 +2892,17 @@ mod tests {
...
@@ -2892,15 +2892,17 @@ mod tests {
// Create a normal data event
// Create a normal data event
let
normal_event
=
Annotated
::
<
NvCreateChatCompletionStreamResponse
>
{
let
normal_event
=
Annotated
::
<
NvCreateChatCompletionStreamResponse
>
{
data
:
Some
(
CreateChatCompletionStreamResponse
{
data
:
Some
(
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
CreateChatCompletionStreamResponse
{
choices
:
vec!
[],
id
:
"test-id"
.to_string
(),
created
:
0
,
choices
:
vec!
[],
model
:
"test-model"
.to_string
(),
created
:
0
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
service_tier
:
None
,
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}),
}),
id
:
Some
(
"msg-1"
.to_string
()),
id
:
Some
(
"msg-1"
.to_string
()),
...
@@ -3162,15 +3164,17 @@ mod tests {
...
@@ -3162,15 +3164,17 @@ mod tests {
fn
make_stream_response
(
fn
make_stream_response
(
choices
:
Vec
<
ChatChoiceStream
>
,
choices
:
Vec
<
ChatChoiceStream
>
,
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
let
response
=
CreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test-id"
.to_string
(),
inner
:
CreateChatCompletionStreamResponse
{
choices
,
id
:
"test-id"
.to_string
(),
created
:
0
,
choices
,
model
:
"test-model"
.to_string
(),
created
:
0
,
system_fingerprint
:
None
,
model
:
"test-model"
.to_string
(),
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
service_tier
:
None
,
usage
:
None
,
service_tier
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
Annotated
{
Annotated
{
...
...
lib/llm/src/perf/logprobs.rs
View file @
2887cd1c
...
@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
...
@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
fn
extract_logprobs_by_choice
(
&
self
)
->
HashMap
<
u32
,
Vec
<
TokenLogProbs
>>
{
fn
extract_logprobs_by_choice
(
&
self
)
->
HashMap
<
u32
,
Vec
<
TokenLogProbs
>>
{
let
mut
result
=
HashMap
::
new
();
let
mut
result
=
HashMap
::
new
();
for
choice
in
&
self
.choices
{
for
choice
in
&
self
.
inner.
choices
{
let
choice_index
=
choice
.index
;
let
choice_index
=
choice
.index
;
let
choice_logprobs
=
choice
let
choice_logprobs
=
choice
...
@@ -949,34 +949,36 @@ mod tests {
...
@@ -949,34 +949,36 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[expect(deprecated)]
#[expect(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"test_id"
.to_string
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
delta
:
ChatCompletionStreamResponseDelta
{
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
:
Some
(
"test"
.to_string
(),
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"test"
.to_string
(),
),
),
),
)
,
function_call
:
None
,
function
_call
:
None
,
tool
_call
s
:
None
,
tool_calls
:
None
,
role
:
Some
(
Role
::
Assistant
)
,
role
:
Some
(
Role
::
Assistant
)
,
refusal
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
}
,
}
,
finish_reason
:
Some
(
FinishReason
::
Stop
)
,
finish
_reason
:
Some
(
FinishReason
::
Stop
)
,
stop
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
Some
(
ChatChoiceLogprobs
{
logprobs
:
Some
(
ChatChoiceL
ogprobs
{
content
:
Some
(
token_l
ogprobs
),
content
:
Some
(
token_logprobs
)
,
refusal
:
None
,
refusal
:
None
,
})
,
}
)
,
}
]
,
}]
,
created
:
1234567890
,
created
:
1234567890
,
model
:
"test-model"
.to_string
()
,
model
:
"test-model"
.to_string
()
,
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
()
,
object
:
"chat.completion.chunk"
.to_string
()
,
usage
:
None
,
usage
:
None
,
}
,
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -1012,14 +1014,16 @@ mod tests {
...
@@ -1012,14 +1014,16 @@ mod tests {
.collect
();
.collect
();
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
,
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
choices
,
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
service_tier
:
None
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -1341,31 +1345,33 @@ mod tests {
...
@@ -1341,31 +1345,33 @@ mod tests {
// Test with choice that has no logprobs
// Test with choice that has no logprobs
#[expect(deprecated)]
#[expect(deprecated)]
let
response
=
NvCreateChatCompletionStreamResponse
{
let
response
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"test_id"
.to_string
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
delta
:
ChatCompletionStreamResponseDelta
{
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
content
:
Some
(
"test"
.to_string
(),
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
"test"
.to_string
(),
),
),
),
)
,
function_call
:
None
,
function
_call
:
None
,
tool
_call
s
:
None
,
tool_calls
:
None
,
role
:
Some
(
Role
::
Assistant
)
,
role
:
Some
(
Role
::
Assistant
)
,
refusal
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
reasoning_content
:
None
,
}
,
}
,
finish_reason
:
Some
(
FinishReason
::
Stop
)
,
finish
_reason
:
Some
(
FinishReason
::
Stop
)
,
stop
_reason
:
None
,
stop_reason
:
None
,
logprobs
:
None
,
// No logprobs
logprobs
:
None
,
// No logprobs
}],
}]
,
created
:
1234567890
,
created
:
1234567890
,
model
:
"test-model"
.to_string
()
,
model
:
"test-model"
.to_string
()
,
service_tier
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
system_fingerprint
:
None
,
object
:
"chat.completion.chunk"
.to_string
()
,
object
:
"chat.completion.chunk"
.to_string
()
,
usage
:
None
,
usage
:
None
,
}
,
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -1573,14 +1579,16 @@ mod tests {
...
@@ -1573,14 +1579,16 @@ mod tests {
// In practice, this would have real logprobs data
// In practice, this would have real logprobs data
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[],
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
choices
:
vec!
[],
model
:
"test-model"
.to_string
(),
created
:
1234567890
,
service_tier
:
None
,
model
:
"test-model"
.to_string
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.to_string
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/preprocessor.rs
View file @
2887cd1c
...
@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor {
...
@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor {
let
processed_response
=
if
let
Some
(
ref
mut
parser
)
=
state
.reasoning_parser
{
let
processed_response
=
if
let
Some
(
ref
mut
parser
)
=
state
.reasoning_parser
{
response
.map_data
(|
mut
data
|
{
response
.map_data
(|
mut
data
|
{
// Process all choices, not just the first one
// Process all choices, not just the first one
for
choice
in
data
.choices
.iter_mut
()
{
for
choice
in
data
.
inner.
choices
.iter_mut
()
{
// Reasoning parsing only applies to text content
// Reasoning parsing only applies to text content
if
let
Some
(
if
let
Some
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionMessageContent
::
Text
(
...
...
lib/llm/src/preprocessor/speculative_prefill.rs
View file @
2887cd1c
...
@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream(
...
@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream(
let
mut
prefill_tx
=
Some
(
tx
);
let
mut
prefill_tx
=
Some
(
tx
);
Box
::
pin
(
stream
.map
(
move
|
item
|
{
Box
::
pin
(
stream
.map
(
move
|
item
|
{
if
let
Some
(
ref
resp
)
=
item
.data
{
if
let
Some
(
ref
resp
)
=
item
.data
{
for
choice
in
&
resp
.choices
{
for
choice
in
&
resp
.
inner.
choices
{
if
let
Some
(
ChatCompletionMessageContent
::
Text
(
ref
text
))
=
choice
.delta.content
{
if
let
Some
(
ChatCompletionMessageContent
::
Text
(
ref
text
))
=
choice
.delta.content
{
accumulated_text
.push_str
(
text
);
accumulated_text
.push_str
(
text
);
}
}
...
...
lib/llm/src/protocols/anthropic/stream_converter.rs
View file @
2887cd1c
...
@@ -106,7 +106,7 @@ impl AnthropicStreamConverter {
...
@@ -106,7 +106,7 @@ impl AnthropicStreamConverter {
let
mut
events
=
Vec
::
new
();
let
mut
events
=
Vec
::
new
();
// Capture real token usage from engine when available (typically on the final chunk).
// Capture real token usage from engine when available (typically on the final chunk).
if
let
Some
(
usage
)
=
&
chunk
.usage
{
if
let
Some
(
usage
)
=
&
chunk
.
inner.
usage
{
self
.input_token_count
=
usage
.prompt_tokens
;
self
.input_token_count
=
usage
.prompt_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.cached_token_count
=
usage
self
.cached_token_count
=
usage
...
@@ -115,7 +115,7 @@ impl AnthropicStreamConverter {
...
@@ -115,7 +115,7 @@ impl AnthropicStreamConverter {
.and_then
(|
d
|
d
.cached_tokens
);
.and_then
(|
d
|
d
.cached_tokens
);
}
}
for
choice
in
&
chunk
.choices
{
for
choice
in
&
chunk
.
inner.
choices
{
let
delta
=
&
choice
.delta
;
let
delta
=
&
choice
.delta
;
// Track finish reason
// Track finish reason
...
@@ -444,7 +444,7 @@ impl AnthropicStreamConverter {
...
@@ -444,7 +444,7 @@ impl AnthropicStreamConverter {
)
->
Vec
<
TaggedEvent
>
{
)
->
Vec
<
TaggedEvent
>
{
let
mut
events
=
Vec
::
new
();
let
mut
events
=
Vec
::
new
();
if
let
Some
(
usage
)
=
&
chunk
.usage
{
if
let
Some
(
usage
)
=
&
chunk
.
inner.
usage
{
self
.input_token_count
=
usage
.prompt_tokens
;
self
.input_token_count
=
usage
.prompt_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.output_token_count
=
usage
.completion_tokens
;
self
.cached_token_count
=
usage
self
.cached_token_count
=
usage
...
@@ -453,7 +453,7 @@ impl AnthropicStreamConverter {
...
@@ -453,7 +453,7 @@ impl AnthropicStreamConverter {
.and_then
(|
d
|
d
.cached_tokens
);
.and_then
(|
d
|
d
.cached_tokens
);
}
}
for
choice
in
&
chunk
.choices
{
for
choice
in
&
chunk
.
inner.
choices
{
let
delta
=
&
choice
.delta
;
let
delta
=
&
choice
.delta
;
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
if
let
Some
(
ref
fr
)
=
choice
.finish_reason
{
...
@@ -722,27 +722,29 @@ mod tests {
...
@@ -722,27 +722,29 @@ mod tests {
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
text_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
text
.into
())),
tool_calls
:
None
,
function_call
:
None
,
role
:
None
,
tool_calls
:
None
,
refusal
:
None
,
role
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -755,35 +757,37 @@ mod tests {
...
@@ -755,35 +757,37 @@ mod tests {
)
->
NvCreateChatCompletionStreamResponse
{
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
None
,
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
None
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
function_call
:
None
,
index
:
tc_index
,
tool_calls
:
Some
(
vec!
[
ChatCompletionMessageToolCallChunk
{
id
:
id
.map
(
String
::
from
),
index
:
tc_index
,
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
id
:
id
.map
(
String
::
from
),
function
:
Some
(
FunctionCallStream
{
r
#
type
:
Some
(
ChatCompletionToolType
::
Function
),
name
:
name
.map
(
String
::
from
),
function
:
Some
(
FunctionCallStream
{
arguments
:
args
.map
(
String
::
from
),
name
:
name
.map
(
String
::
from
),
}),
arguments
:
args
.map
(
String
::
from
),
}]),
}),
role
:
None
,
}]),
refusal
:
None
,
role
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
@@ -908,27 +912,29 @@ mod tests {
...
@@ -908,27 +912,29 @@ mod tests {
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
fn
reasoning_chunk
(
text
:
&
str
)
->
NvCreateChatCompletionStreamResponse
{
#[allow(deprecated)]
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
"chat-1"
.into
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
choices
:
vec!
[
ChatChoiceStream
{
id
:
"chat-1"
.into
(),
index
:
0
,
choices
:
vec!
[
ChatChoiceStream
{
delta
:
ChatCompletionStreamResponseDelta
{
index
:
0
,
content
:
None
,
delta
:
ChatCompletionStreamResponseDelta
{
function_call
:
None
,
content
:
None
,
tool_calls
:
None
,
function_call
:
None
,
role
:
None
,
tool_calls
:
None
,
refusal
:
None
,
role
:
None
,
reasoning_content
:
Some
(
text
.into
()),
refusal
:
None
,
},
reasoning_content
:
Some
(
text
.into
()),
finish_reason
:
None
,
},
stop_reason
:
None
,
finish_reason
:
None
,
logprobs
:
None
,
stop_reason
:
None
,
}],
logprobs
:
None
,
created
:
0
,
}],
model
:
"test"
.into
(),
created
:
0
,
service_tier
:
None
,
model
:
"test"
.into
(),
system_fingerprint
:
None
,
service_tier
:
None
,
object
:
"chat.completion.chunk"
.into
(),
system_fingerprint
:
None
,
usage
:
None
,
object
:
"chat.completion.chunk"
.into
(),
usage
:
None
,
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/protocols/anthropic/types.rs
View file @
2887cd1c
This diff is collapsed.
Click to expand it.
lib/llm/src/protocols/openai/chat_completions.rs
View file @
2887cd1c
...
@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest {
...
@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest {
}
}
/// A response structure for unary chat completion responses, embedding OpenAI's
/// A response structure for unary chat completion responses, embedding OpenAI's
/// `CreateChatCompletionResponse`.
/// `CreateChatCompletionResponse` with optional NVIDIA extension metadata.
///
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
/// # Fields
pub
struct
NvCreateChatCompletionResponse
{
/// - `inner`: The base OpenAI unary chat completion response, embedded
#[serde(flatten)]
/// using `serde(flatten)`.
pub
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
,
pub
type
NvCreateChatCompletionResponse
=
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
;
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
/// A response structure for streamed chat completions, embedding OpenAI's
/// A response structure for streamed chat completions, embedding OpenAI's
/// `CreateChatCompletionStreamResponse`.
/// `CreateChatCompletionStreamResponse` with optional NVIDIA extension metadata.
///
#[derive(Serialize,
Deserialize,
Debug,
Clone,
PartialEq)]
/// # Fields
pub
struct
NvCreateChatCompletionStreamResponse
{
/// - `inner`: The base OpenAI streaming chat completion response, embedded
#[serde(flatten)]
/// using `serde(flatten)`.
pub
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
,
pub
type
NvCreateChatCompletionStreamResponse
=
#[serde(skip_serializing_if
=
"Option::is_none"
)]
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
;
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// providing access to NVIDIA-specific extensions.
/// providing access to NVIDIA-specific extensions.
...
...
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
2887cd1c
...
@@ -136,16 +136,16 @@ impl DeltaAggregator {
...
@@ -136,16 +136,16 @@ impl DeltaAggregator {
if
aggregator
.error
.is_none
()
if
aggregator
.error
.is_none
()
&&
let
Some
(
delta
)
=
delta
.data
&&
let
Some
(
delta
)
=
delta
.data
{
{
aggregator
.id
=
delta
.id
;
aggregator
.id
=
delta
.
inner.
id
;
aggregator
.model
=
delta
.model
;
aggregator
.model
=
delta
.
inner.
model
;
aggregator
.created
=
delta
.created
;
aggregator
.created
=
delta
.
inner.
created
;
aggregator
.service_tier
=
delta
.service_tier
;
aggregator
.service_tier
=
delta
.
inner.
service_tier
;
// Aggregate usage statistics if available.
// Aggregate usage statistics if available.
if
let
Some
(
usage
)
=
delta
.usage
{
if
let
Some
(
usage
)
=
delta
.
inner.
usage
{
aggregator
.usage
=
Some
(
usage
);
aggregator
.usage
=
Some
(
usage
);
}
}
if
let
Some
(
system_fingerprint
)
=
delta
.system_fingerprint
{
if
let
Some
(
system_fingerprint
)
=
delta
.
inner.
system_fingerprint
{
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
}
}
...
@@ -155,7 +155,7 @@ impl DeltaAggregator {
...
@@ -155,7 +155,7 @@ impl DeltaAggregator {
}
}
// Aggregate choices incrementally.
// Aggregate choices incrementally.
for
choice
in
delta
.choices
{
for
choice
in
delta
.
inner.
choices
{
let
state_choice
=
let
state_choice
=
aggregator
aggregator
.choices
.choices
...
@@ -267,14 +267,16 @@ impl DeltaAggregator {
...
@@ -267,14 +267,16 @@ impl DeltaAggregator {
// Construct the final response object.
// Construct the final response object.
let
response
=
NvCreateChatCompletionResponse
{
let
response
=
NvCreateChatCompletionResponse
{
id
:
aggregator
.id
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
created
:
aggregator
.created
,
id
:
aggregator
.id
,
usage
:
aggregator
.usage
,
created
:
aggregator
.created
,
model
:
aggregator
.model
,
usage
:
aggregator
.usage
,
object
:
"chat.completion"
.to_string
(),
model
:
aggregator
.model
,
system_fingerprint
:
aggregator
.system_fingerprint
,
object
:
"chat.completion"
.to_string
(),
choices
,
system_fingerprint
:
aggregator
.system_fingerprint
,
service_tier
:
aggregator
.service_tier
,
choices
,
service_tier
:
aggregator
.service_tier
,
},
nvext
:
aggregator
.nvext
,
nvext
:
aggregator
.nvext
,
};
};
...
@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator {
...
@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator {
)
->
Result
<
NvCreateChatCompletionResponse
,
String
>
;
)
->
Result
<
NvCreateChatCompletionResponse
,
String
>
;
}
}
impl
ChatCompletionAggregator
for
dynamo_async_openai
::
types
::
CreateChatCompletionResponse
{
impl
ChatCompletionAggregator
for
Nv
CreateChatCompletionResponse
{
async
fn
from_annotated_stream
(
async
fn
from_annotated_stream
(
stream
:
impl
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
stream
:
impl
Stream
<
Item
=
Annotated
<
NvCreateChatCompletionStreamResponse
>>
,
parsing_options
:
ParsingOptions
,
parsing_options
:
ParsingOptions
,
...
@@ -445,14 +447,16 @@ mod tests {
...
@@ -445,14 +447,16 @@ mod tests {
};
};
let
data
=
NvCreateChatCompletionStreamResponse
{
let
data
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
service_tier
:
None
,
created
:
1234567890
,
usage
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
usage
:
None
,
choices
:
vec!
[
choice
],
system_fingerprint
:
None
,
object
:
"chat.completion"
.to_string
(),
choices
:
vec!
[
choice
],
object
:
"chat.completion"
.to_string
(),
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -479,13 +483,13 @@ mod tests {
...
@@ -479,13 +483,13 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify that the response is empty and has default values
// Verify that the response is empty and has default values
assert_eq!
(
response
.id
,
""
);
assert_eq!
(
response
.
inner.
id
,
""
);
assert_eq!
(
response
.model
,
""
);
assert_eq!
(
response
.
inner.
model
,
""
);
assert_eq!
(
response
.created
,
0
);
assert_eq!
(
response
.
inner.
created
,
0
);
assert
!
(
response
.usage
.is_none
());
assert
!
(
response
.
inner.
usage
.is_none
());
assert
!
(
response
.system_fingerprint
.is_none
());
assert
!
(
response
.
inner.
system_fingerprint
.is_none
());
assert_eq!
(
response
.choices
.len
(),
0
);
assert_eq!
(
response
.
inner.
choices
.len
(),
0
);
assert
!
(
response
.service_tier
.is_none
());
assert
!
(
response
.
inner.
service_tier
.is_none
());
}
}
#[tokio::test]
#[tokio::test]
...
@@ -511,13 +515,13 @@ mod tests {
...
@@ -511,13 +515,13 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.id
,
"test_id"
);
assert_eq!
(
response
.
inner.
id
,
"test_id"
);
assert_eq!
(
response
.model
,
"meta/llama-3.1-8b-instruct"
);
assert_eq!
(
response
.
inner.
model
,
"meta/llama-3.1-8b-instruct"
);
assert_eq!
(
response
.created
,
1234567890
);
assert_eq!
(
response
.
inner.
created
,
1234567890
);
assert
!
(
response
.usage
.is_none
());
assert
!
(
response
.
inner.
usage
.is_none
());
assert
!
(
response
.system_fingerprint
.is_none
());
assert
!
(
response
.
inner.
system_fingerprint
.is_none
());
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
choice
.message.content
.as_ref
()
.unwrap
(),
...
@@ -525,7 +529,7 @@ mod tests {
...
@@ -525,7 +529,7 @@ mod tests {
);
);
assert
!
(
choice
.finish_reason
.is_none
());
assert
!
(
choice
.finish_reason
.is_none
());
assert_eq!
(
choice
.message.role
,
dynamo_async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_async_openai
::
types
::
Role
::
User
);
assert
!
(
response
.service_tier
.is_none
());
assert
!
(
response
.
inner.
service_tier
.is_none
());
}
}
#[tokio::test]
#[tokio::test]
...
@@ -562,8 +566,8 @@ mod tests {
...
@@ -562,8 +566,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
choice
.message.content
.as_ref
()
.unwrap
(),
...
@@ -630,8 +634,8 @@ mod tests {
...
@@ -630,8 +634,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
assert_eq!
(
...
@@ -653,43 +657,49 @@ mod tests {
...
@@ -653,43 +657,49 @@ mod tests {
// Create a delta with multiple choices
// Create a delta with multiple choices
// ALLOW: function_call is deprecated
// ALLOW: function_call is deprecated
let
data
=
NvCreateChatCompletionStreamResponse
{
let
data
=
NvCreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
model
:
"test_model"
.to_string
(),
id
:
"test_id"
.to_string
(),
created
:
1234567890
,
model
:
"test_model"
.to_string
(),
service_tier
:
None
,
created
:
1234567890
,
usage
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
usage
:
None
,
choices
:
vec!
[
system_fingerprint
:
None
,
dynamo_async_openai
::
types
::
ChatChoiceStream
{
choices
:
vec!
[
index
:
0
,
dynamo_async_openai
::
types
::
ChatChoiceStream
{
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
index
:
0
,
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 0"
.to_string
())),
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
tool_calls
:
None
,
"Choice 0"
.to_string
(),
refusal
:
None
,
)),
reasoning_content
:
None
,
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
reasoning_content
:
None
,
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
dynamo_async_openai
::
types
::
ChatChoiceStream
{
stop_reason
:
None
,
index
:
1
,
logprobs
:
None
,
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
},
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
dynamo_async_openai
::
types
::
ChatChoiceStream
{
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
index
:
1
,
"Choice 1"
.to_string
(),
delta
:
dynamo_async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
)),
role
:
Some
(
dynamo_async_openai
::
types
::
Role
::
Assistant
),
function_call
:
None
,
content
:
Some
(
ChatCompletionMessageContent
::
Text
(
"Choice 1"
.to_string
())),
tool_calls
:
None
,
function_call
:
None
,
refusal
:
None
,
tool_calls
:
None
,
reasoning_content
:
None
,
refusal
:
None
,
},
reasoning_content
:
None
,
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
stop_reason
:
None
,
logprobs
:
None
,
},
},
finish_reason
:
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
],
stop_reason
:
None
,
object
:
"chat.completion"
.to_string
(),
logprobs
:
None
,
},
},
],
object
:
"chat.completion"
.to_string
(),
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -711,9 +721,9 @@ mod tests {
...
@@ -711,9 +721,9 @@ mod tests {
let
mut
response
=
result
.unwrap
();
let
mut
response
=
result
.unwrap
();
// Verify the response fields
// Verify the response fields
assert_eq!
(
response
.choices
.len
(),
2
);
assert_eq!
(
response
.
inner.
choices
.len
(),
2
);
response
.choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Ensure the choices are ordered
response
.
inner.
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Ensure the choices are ordered
let
choice0
=
&
response
.choices
[
0
];
let
choice0
=
&
response
.
inner.
choices
[
0
];
assert_eq!
(
choice0
.index
,
0
);
assert_eq!
(
choice0
.index
,
0
);
assert_eq!
(
assert_eq!
(
choice0
.message.content
.as_ref
()
.unwrap
(),
choice0
.message.content
.as_ref
()
.unwrap
(),
...
@@ -728,7 +738,7 @@ mod tests {
...
@@ -728,7 +738,7 @@ mod tests {
dynamo_async_openai
::
types
::
Role
::
Assistant
dynamo_async_openai
::
types
::
Role
::
Assistant
);
);
let
choice1
=
&
response
.choices
[
1
];
let
choice1
=
&
response
.
inner.
choices
[
1
];
assert_eq!
(
choice1
.index
,
1
);
assert_eq!
(
choice1
.index
,
1
);
assert_eq!
(
assert_eq!
(
choice1
.message.content
.as_ref
()
.unwrap
(),
choice1
.message.content
.as_ref
()
.unwrap
(),
...
@@ -773,8 +783,8 @@ mod tests {
...
@@ -773,8 +783,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -816,8 +826,8 @@ mod tests {
...
@@ -816,8 +826,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -859,8 +869,8 @@ mod tests {
...
@@ -859,8 +869,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls are present
// Verify tool calls are present
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -900,8 +910,8 @@ mod tests {
...
@@ -900,8 +910,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify no tool calls are present
// Verify no tool calls are present
assert
!
(
choice
.message.tool_calls
.is_none
());
assert
!
(
choice
.message.tool_calls
.is_none
());
...
@@ -928,7 +938,7 @@ mod tests {
...
@@ -928,7 +938,7 @@ mod tests {
// Manually set empty tool calls array
// Manually set empty tool calls array
if
let
Some
(
ref
mut
data
)
=
annotated_delta
.data
{
if
let
Some
(
ref
mut
data
)
=
annotated_delta
.data
{
data
.choices
[
0
]
.delta.tool_calls
=
Some
(
vec!
[]);
// Empty tool calls array
data
.
inner.
choices
[
0
]
.delta.tool_calls
=
Some
(
vec!
[]);
// Empty tool calls array
}
}
let
data
=
annotated_delta
.data
.unwrap
();
let
data
=
annotated_delta
.data
.unwrap
();
...
@@ -945,8 +955,8 @@ mod tests {
...
@@ -945,8 +955,8 @@ mod tests {
assert
!
(
result
.is_ok
());
assert
!
(
result
.is_ok
());
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// Verify tool calls array is empty
// Verify tool calls array is empty
assert
!
(
choice
.message.tool_calls
.is_none
());
assert
!
(
choice
.message.tool_calls
.is_none
());
...
@@ -992,8 +1002,8 @@ mod tests {
...
@@ -992,8 +1002,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// There should be one choice
// There should be one choice
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// The tool_calls field should be present and parsed
// The tool_calls field should be present and parsed
assert
!
(
choice
.message.tool_calls
.is_some
());
assert
!
(
choice
.message.tool_calls
.is_some
());
...
@@ -1050,8 +1060,8 @@ mod tests {
...
@@ -1050,8 +1060,8 @@ mod tests {
let
response
=
result
.unwrap
();
let
response
=
result
.unwrap
();
// There should be one choice
// There should be one choice
assert_eq!
(
response
.choices
.len
(),
1
);
assert_eq!
(
response
.
inner.
choices
.len
(),
1
);
let
choice
=
&
response
.choices
[
0
];
let
choice
=
&
response
.
inner.
choices
[
0
];
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
// The finish_reason should be ToolCalls, not Stop, because tool calls are present
assert_eq!
(
assert_eq!
(
...
...
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
2887cd1c
...
@@ -278,19 +278,21 @@ impl DeltaGenerator {
...
@@ -278,19 +278,21 @@ impl DeltaGenerator {
// According to OpenAI spec: when stream_options.include_usage is true,
// According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null
// all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices
// The final usage chunk will be sent separately with empty choices
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
self
.object
.clone
(),
id
:
self
.id
.clone
(),
created
:
self
.created
,
object
:
self
.object
.clone
(),
model
:
self
.model
.clone
(),
created
:
self
.created
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
model
:
self
.model
.clone
(),
choices
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
:
if
self
.options.enable_usage
&&
self
.options.continuous_usage_stats
{
choices
,
Some
(
self
.get_usage
())
usage
:
if
self
.options.enable_usage
&&
self
.options.continuous_usage_stats
{
}
else
{
Some
(
self
.get_usage
())
None
}
else
{
None
},
service_tier
:
self
.service_tier
.clone
(),
},
},
service_tier
:
self
.service_tier
.clone
(),
nvext
:
None
,
// Will be populated by router layer if needed
nvext
:
None
,
// Will be populated by router layer if needed
}
}
}
}
...
@@ -303,15 +305,17 @@ impl DeltaGenerator {
...
@@ -303,15 +305,17 @@ impl DeltaGenerator {
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateChatCompletionStreamResponse
{
pub
fn
create_usage_chunk
(
&
self
)
->
NvCreateChatCompletionStreamResponse
{
let
usage
=
self
.get_usage
();
let
usage
=
self
.get_usage
();
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
NvCreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
self
.object
.clone
(),
id
:
self
.id
.clone
(),
created
:
self
.created
,
object
:
self
.object
.clone
(),
model
:
self
.model
.clone
(),
created
:
self
.created
,
system_fingerprint
:
self
.system_fingerprint
.clone
(),
model
:
self
.model
.clone
(),
choices
:
vec!
[],
// Empty choices for usage-only chunk
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
:
Some
(
usage
),
choices
:
vec!
[],
// Empty choices for usage-only chunk
service_tier
:
self
.service_tier
.clone
(),
usage
:
Some
(
usage
),
service_tier
:
self
.service_tier
.clone
(),
},
nvext
:
None
,
nvext
:
None
,
}
}
}
}
...
...
lib/llm/src/protocols/openai/chat_completions/jail.rs
View file @
2887cd1c
...
@@ -525,13 +525,13 @@ impl JailedStream {
...
@@ -525,13 +525,13 @@ impl JailedStream {
// Process each item in the stream
// Process each item in the stream
while
let
Some
(
response
)
=
stream
.next
()
.await
{
while
let
Some
(
response
)
=
stream
.next
()
.await
{
if
let
Some
(
chat_response
)
=
response
.data
.as_ref
()
{
if
let
Some
(
chat_response
)
=
response
.data
.as_ref
()
{
last_stream_id
.clone_from
(
&
chat_response
.id
);
last_stream_id
.clone_from
(
&
chat_response
.
inner.
id
);
last_stream_model
.clone_from
(
&
chat_response
.model
);
last_stream_model
.clone_from
(
&
chat_response
.
inner.
model
);
last_stream_created
=
chat_response
.created
;
last_stream_created
=
chat_response
.
inner.
created
;
let
mut
all_emissions
=
Vec
::
new
();
let
mut
all_emissions
=
Vec
::
new
();
if
chat_response
.choices
.is_empty
()
{
if
chat_response
.
inner.
choices
.is_empty
()
{
// No choices processed (e.g., usage-only chunk)
// No choices processed (e.g., usage-only chunk)
// Pass through as-is to preserve usage and other metadata
// Pass through as-is to preserve usage and other metadata
yield
response
;
yield
response
;
...
@@ -539,7 +539,7 @@ impl JailedStream {
...
@@ -539,7 +539,7 @@ impl JailedStream {
}
}
// Process each choice independently using the new architecture
// Process each choice independently using the new architecture
for
choice
in
&
chat_response
.choices
{
for
choice
in
&
chat_response
.
inner.
choices
{
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
if
let
Some
(
ref
content
)
=
choice
.delta.content
{
// Jailing only applies to text content
// Jailing only applies to text content
let
text_content
=
match
content
{
let
text_content
=
match
content
{
...
@@ -676,14 +676,16 @@ impl JailedStream {
...
@@ -676,14 +676,16 @@ impl JailedStream {
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
tracing
::
debug!
(
"Stream ended while jailed, releasing accumulated content"
);
// Create a finalization response carrying forward real stream metadata
// Create a finalization response carrying forward real stream metadata
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
let
dummy_response
=
NvCreateChatCompletionStreamResponse
{
id
:
last_stream_id
,
inner
:
dynamo_async_openai
::
types
::
CreateChatCompletionStreamResponse
{
object
:
"chat.completion.chunk"
.to_string
(),
id
:
last_stream_id
,
created
:
last_stream_created
,
object
:
"chat.completion.chunk"
.to_string
(),
model
:
last_stream_model
,
created
:
last_stream_created
,
choices
:
Vec
::
new
(),
model
:
last_stream_model
,
usage
:
None
,
choices
:
Vec
::
new
(),
service_tier
:
None
,
usage
:
None
,
system_fingerprint
:
None
,
service_tier
:
None
,
system_fingerprint
:
None
,
},
nvext
:
None
,
nvext
:
None
,
};
};
...
@@ -713,7 +715,7 @@ impl JailedStream {
...
@@ -713,7 +715,7 @@ impl JailedStream {
EmissionMode
::
Packed
=>
{
EmissionMode
::
Packed
=>
{
// Pack all choices into a single response
// Pack all choices into a single response
let
mut
response
=
base_response
.clone
();
let
mut
response
=
base_response
.clone
();
response
.choices
=
emissions
.into_iter
()
.map
(|
e
|
e
.into_choice
())
.collect
();
response
.
inner.
choices
=
emissions
.into_iter
()
.map
(|
e
|
e
.into_choice
())
.collect
();
vec!
[
Annotated
{
vec!
[
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -729,7 +731,7 @@ impl JailedStream {
...
@@ -729,7 +731,7 @@ impl JailedStream {
.into_iter
()
.into_iter
()
.map
(|
emission
|
{
.map
(|
emission
|
{
let
mut
response
=
base_response
.clone
();
let
mut
response
=
base_response
.clone
();
response
.choices
=
vec!
[
emission
.into_choice
()];
response
.
inner.
choices
=
vec!
[
emission
.into_choice
()];
Annotated
{
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -1013,7 +1015,7 @@ impl JailedStream {
...
@@ -1013,7 +1015,7 @@ impl JailedStream {
while
let
Some
(
mut
response
)
=
input_stream
.next
()
.await
{
while
let
Some
(
mut
response
)
=
input_stream
.next
()
.await
{
// Track if any choice emitted tool calls
// Track if any choice emitted tool calls
if
let
Some
(
ref
data
)
=
response
.data
{
if
let
Some
(
ref
data
)
=
response
.data
{
for
choice
in
&
data
.choices
{
for
choice
in
&
data
.
inner.
choices
{
if
choice
.delta.tool_calls
.is_some
()
{
if
choice
.delta.tool_calls
.is_some
()
{
has_tool_calls_per_choice
.insert
(
choice
.index
,
true
);
has_tool_calls_per_choice
.insert
(
choice
.index
,
true
);
}
}
...
@@ -1022,7 +1024,7 @@ impl JailedStream {
...
@@ -1022,7 +1024,7 @@ impl JailedStream {
// Fix finish_reason based on jail mode and whether tool calls were emitted
// Fix finish_reason based on jail mode and whether tool calls were emitted
if
let
Some
(
ref
mut
data
)
=
response
.data
{
if
let
Some
(
ref
mut
data
)
=
response
.data
{
for
choice
in
&
mut
data
.choices
{
for
choice
in
&
mut
data
.
inner.
choices
{
if
let
Some
(
finish
)
=
choice
.finish_reason
{
if
let
Some
(
finish
)
=
choice
.finish_reason
{
// Only modify Stop finish reason, preserve Length/ContentFilter
// Only modify Stop finish reason, preserve Length/ContentFilter
if
finish
==
FinishReason
::
Stop
{
if
finish
==
FinishReason
::
Stop
{
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
2887cd1c
...
@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest {
...
@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest {
pub
struct
NvCreateCompletionResponse
{
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
#[serde(flatten)]
pub
inner
:
dynamo_async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_async_openai
::
types
::
CreateCompletionResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
serde_json
::
Value
>
,
}
}
impl
ContentProvider
for
dynamo_async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_async_openai
::
types
::
Choice
{
...
@@ -296,9 +298,8 @@ impl ResponseFactory {
...
@@ -296,9 +298,8 @@ impl ResponseFactory {
choices
:
vec!
[
choice
],
choices
:
vec!
[
choice
],
system_fingerprint
:
self
.system_fingerprint
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
usage
,
usage
,
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
}
}
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
2887cd1c
...
@@ -86,8 +86,8 @@ impl DeltaAggregator {
...
@@ -86,8 +86,8 @@ impl DeltaAggregator {
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
aggregator
.system_fingerprint
=
Some
(
system_fingerprint
);
}
}
// Aggregate nvext field (take the last non-None value)
// Aggregate nvext field (take the last non-None value)
if
delta
.
inner.
nvext
.is_some
()
{
if
delta
.nvext
.is_some
()
{
aggregator
.nvext
=
delta
.
inner.
nvext
;
aggregator
.nvext
=
delta
.nvext
;
}
}
// handle the choices
// handle the choices
...
@@ -168,10 +168,12 @@ impl DeltaAggregator {
...
@@ -168,10 +168,12 @@ impl DeltaAggregator {
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
system_fingerprint
:
aggregator
.system_fingerprint
,
system_fingerprint
:
aggregator
.system_fingerprint
,
choices
,
choices
,
nvext
:
aggregator
.nvext
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
aggregator
.nvext
,
};
Ok
(
response
)
Ok
(
response
)
}
}
...
@@ -256,10 +258,9 @@ mod tests {
...
@@ -256,10 +258,9 @@ mod tests {
logprobs
,
logprobs
,
}],
}],
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
nvext
:
None
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
None
};
Annotated
{
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
@@ -387,10 +388,9 @@ mod tests {
...
@@ -387,10 +388,9 @@ mod tests {
},
},
],
],
object
:
"text_completion"
.to_string
(),
object
:
"text_completion"
.to_string
(),
nvext
:
None
,
};
};
let
response
=
NvCreateCompletionResponse
{
inner
};
let
response
=
NvCreateCompletionResponse
{
inner
,
nvext
:
None
};
let
annotated_delta
=
Annotated
{
let
annotated_delta
=
Annotated
{
data
:
Some
(
response
),
data
:
Some
(
response
),
...
...
lib/llm/src/protocols/openai/completions/delta.rs
View file @
2887cd1c
...
@@ -218,10 +218,9 @@ impl DeltaGenerator {
...
@@ -218,10 +218,9 @@ impl DeltaGenerator {
}
else
{
}
else
{
None
None
},
},
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
/// Creates a final usage-only chunk for OpenAI compliance.
/// Creates a final usage-only chunk for OpenAI compliance.
...
@@ -240,10 +239,9 @@ impl DeltaGenerator {
...
@@ -240,10 +239,9 @@ impl DeltaGenerator {
system_fingerprint
:
self
.system_fingerprint
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[],
// Empty choices for usage-only chunk
choices
:
vec!
[],
// Empty choices for usage-only chunk
usage
:
Some
(
usage
),
usage
:
Some
(
usage
),
nvext
:
None
,
// Will be populated by router layer if needed
};
};
NvCreateCompletionResponse
{
inner
}
NvCreateCompletionResponse
{
inner
,
nvext
:
None
}
}
}
/// Check if usage tracking is enabled
/// Check if usage tracking is enabled
...
@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
...
@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
};
};
if
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
if
let
Ok
(
nvext_json
)
=
serde_json
::
to_value
(
&
nvext_response
)
{
response
.
inner.
nvext
=
Some
(
nvext_json
);
response
.nvext
=
Some
(
nvext_json
);
if
let
Some
(
ref
info
)
=
worker_id_info
{
if
let
Some
(
ref
info
)
=
worker_id_info
{
tracing
::
debug!
(
tracing
::
debug!
(
"Injected worker_id into completions nvext: prefill={:?}, decode={:?}"
,
"Injected worker_id into completions nvext: prefill={:?}, decode={:?}"
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment