Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
199b9a30
Unverified
Commit
199b9a30
authored
Aug 19, 2025
by
nachiketb-nvidia
Committed by
GitHub
Aug 19, 2025
Browse files
chore: Bring async-openai into repo as request starter (#2520)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
26d9f159
Changes
114
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
246 additions
and
216 deletions
+246
-216
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+61
-43
lib/llm/src/protocols/openai/chat_completions/delta.rs
lib/llm/src/protocols/openai/chat_completions/delta.rs
+28
-22
lib/llm/src/protocols/openai/completions.rs
lib/llm/src/protocols/openai/completions.rs
+14
-14
lib/llm/src/protocols/openai/completions/aggregator.rs
lib/llm/src/protocols/openai/completions/aggregator.rs
+26
-26
lib/llm/src/protocols/openai/completions/delta.rs
lib/llm/src/protocols/openai/completions/delta.rs
+11
-11
lib/llm/src/protocols/openai/embeddings.rs
lib/llm/src/protocols/openai/embeddings.rs
+4
-4
lib/llm/src/protocols/openai/embeddings/aggregator.rs
lib/llm/src/protocols/openai/embeddings/aggregator.rs
+6
-6
lib/llm/src/protocols/openai/responses.rs
lib/llm/src/protocols/openai/responses.rs
+10
-10
lib/llm/src/protocols/openai/validate.rs
lib/llm/src/protocols/openai/validate.rs
+12
-12
lib/llm/tests/http-service.rs
lib/llm/tests/http-service.rs
+56
-50
lib/llm/tests/logprob_analysis_integration.rs
lib/llm/tests/logprob_analysis_integration.rs
+1
-1
lib/llm/tests/openai_completions.rs
lib/llm/tests/openai_completions.rs
+1
-1
lib/llm/tests/preprocessor.rs
lib/llm/tests/preprocessor.rs
+12
-12
lib/llm/tests/test_common_ext.rs
lib/llm/tests/test_common_ext.rs
+4
-4
No files found.
lib/llm/src/protocols/openai/chat_completions/aggregator.rs
View file @
199b9a30
...
...
@@ -35,7 +35,7 @@ pub struct DeltaAggregator {
/// Timestamp (Unix epoch) indicating when the response was created.
created
:
u32
,
/// Optional usage statistics for the completion request.
usage
:
Option
<
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
/// Map of incremental response choices, keyed by index.
...
...
@@ -43,7 +43,7 @@ pub struct DeltaAggregator {
/// Optional error message if an error occurs during aggregation.
error
:
Option
<
String
>
,
/// Optional service tier information for the response.
service_tier
:
Option
<
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
}
/// Represents the accumulated state of a single chat choice during streaming aggregation.
...
...
@@ -53,13 +53,13 @@ struct DeltaChoice {
/// The accumulated text content for the choice.
text
:
String
,
/// The role associated with this message (e.g., `system`, `user`, `assistant`).
role
:
Option
<
async_openai
::
types
::
Role
>
,
role
:
Option
<
dynamo_
async_openai
::
types
::
Role
>
,
/// The reason the completion was finished (if applicable).
finish_reason
:
Option
<
async_openai
::
types
::
FinishReason
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
/// Optional log probabilities for the chat choice.
logprobs
:
Option
<
async_openai
::
types
::
ChatChoiceLogprobs
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
// Optional tool calls for the chat choice.
tool_calls
:
Option
<
Vec
<
async_openai
::
types
::
ChatCompletionMessageToolCall
>>
,
tool_calls
:
Option
<
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionMessageToolCall
>>
,
}
impl
Default
for
DeltaAggregator
{
...
...
@@ -182,7 +182,8 @@ impl DeltaAggregator {
}
choice
.tool_calls
=
Some
(
tool_calls
);
choice
.text
.clear
();
choice
.finish_reason
=
Some
(
async_openai
::
types
::
FinishReason
::
ToolCalls
);
choice
.finish_reason
=
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
);
}
}
}
...
...
@@ -191,13 +192,13 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.into_values
()
.map
(
async_openai
::
types
::
ChatChoice
::
from
)
.map
(
dynamo_
async_openai
::
types
::
ChatChoice
::
from
)
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
// Construct the final response object.
let
inner
=
async_openai
::
types
::
CreateChatCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
id
:
aggregator
.id
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
...
...
@@ -215,14 +216,14 @@ impl DeltaAggregator {
}
#[allow(deprecated)]
impl
From
<
DeltaChoice
>
for
async_openai
::
types
::
ChatChoice
{
/// Converts a [`DeltaChoice`] into an [`async_openai::types::ChatChoice`].
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
ChatChoice
{
/// Converts a [`DeltaChoice`] into an [`
dynamo_
async_openai::types::ChatChoice`].
///
/// # Note
/// The `function_call` field is deprecated.
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
async_openai
::
types
::
ChatChoice
{
message
:
async_openai
::
types
::
ChatCompletionResponseMessage
{
dynamo_
async_openai
::
types
::
ChatChoice
{
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
role
:
delta
.role
.expect
(
"delta should have a Role"
),
content
:
if
delta
.tool_calls
.is_some
()
{
None
...
...
@@ -282,25 +283,25 @@ mod tests {
fn
create_test_delta
(
index
:
u32
,
text
:
&
str
,
role
:
Option
<
async_openai
::
types
::
Role
>
,
finish_reason
:
Option
<
async_openai
::
types
::
FinishReason
>
,
role
:
Option
<
dynamo_
async_openai
::
types
::
Role
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
)
->
Annotated
<
NvCreateChatCompletionStreamResponse
>
{
// ALLOW: function_call is deprecated
let
delta
=
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
Some
(
text
.to_string
()),
function_call
:
None
,
tool_calls
:
None
,
role
,
refusal
:
None
,
};
let
choice
=
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
index
,
delta
,
finish_reason
,
logprobs
:
None
,
};
let
inner
=
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b-instruct"
.to_string
(),
created
:
1234567890
,
...
...
@@ -347,8 +348,12 @@ mod tests {
#[tokio::test]
async
fn
test_single_delta
()
{
// Create a sample delta
let
annotated_delta
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
async_openai
::
types
::
Role
::
User
),
None
);
let
annotated_delta
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
dynamo_async_openai
::
types
::
Role
::
User
),
None
,
);
// Create a stream
let
stream
=
Box
::
pin
(
stream
::
iter
(
vec!
[
annotated_delta
]));
...
...
@@ -371,7 +376,7 @@ mod tests {
assert_eq!
(
choice
.index
,
0
);
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
"Hello,"
);
assert
!
(
choice
.finish_reason
.is_none
());
assert_eq!
(
choice
.message.role
,
async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
User
);
assert
!
(
response
.inner.service_tier
.is_none
());
}
...
...
@@ -380,13 +385,17 @@ mod tests {
// Create multiple deltas with the same choice index
// One will have a MessageRole and no FinishReason,
// the other will have a FinishReason and no MessageRole
let
annotated_delta1
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
async_openai
::
types
::
Role
::
User
),
None
);
let
annotated_delta1
=
create_test_delta
(
0
,
"Hello,"
,
Some
(
dynamo_async_openai
::
types
::
Role
::
User
),
None
,
);
let
annotated_delta2
=
create_test_delta
(
0
,
" world!"
,
None
,
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
);
// Create a stream
...
...
@@ -407,9 +416,9 @@ mod tests {
assert_eq!
(
choice
.message.content
.as_ref
()
.unwrap
(),
"Hello, world!"
);
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice
.message.role
,
async_openai
::
types
::
Role
::
User
);
assert_eq!
(
choice
.message.role
,
dynamo_
async_openai
::
types
::
Role
::
User
);
}
#[allow(deprecated)]
...
...
@@ -417,7 +426,7 @@ mod tests {
async
fn
test_multiple_choices
()
{
// Create a delta with multiple choices
// ALLOW: function_call is deprecated
let
delta
=
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
let
delta
=
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
"test_id"
.to_string
(),
model
:
"test_model"
.to_string
(),
created
:
1234567890
,
...
...
@@ -425,28 +434,28 @@ mod tests {
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
async_openai
::
types
::
ChatChoiceStream
{
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
index
:
0
,
delta
:
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
content
:
Some
(
"Choice 0"
.to_string
()),
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
},
finish_reason
:
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
logprobs
:
None
,
},
async_openai
::
types
::
ChatChoiceStream
{
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
index
:
1
,
delta
:
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
async_openai
::
types
::
Role
::
Assistant
),
delta
:
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
role
:
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
content
:
Some
(
"Choice 1"
.to_string
()),
function_call
:
None
,
tool_calls
:
None
,
refusal
:
None
,
},
finish_reason
:
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
Stop
),
logprobs
:
None
,
},
],
...
...
@@ -479,18 +488,24 @@ mod tests {
assert_eq!
(
choice0
.message.content
.as_ref
()
.unwrap
(),
"Choice 0"
);
assert_eq!
(
choice0
.finish_reason
,
Some
(
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice0
.message.role
,
dynamo_async_openai
::
types
::
Role
::
Assistant
);
assert_eq!
(
choice0
.message.role
,
async_openai
::
types
::
Role
::
Assistant
);
let
choice1
=
&
response
.inner.choices
[
1
];
assert_eq!
(
choice1
.index
,
1
);
assert_eq!
(
choice1
.message.content
.as_ref
()
.unwrap
(),
"Choice 1"
);
assert_eq!
(
choice1
.finish_reason
,
Some
(
async_openai
::
types
::
FinishReason
::
Stop
)
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
);
assert_eq!
(
choice1
.message.role
,
dynamo_async_openai
::
types
::
Role
::
Assistant
);
assert_eq!
(
choice1
.message.role
,
async_openai
::
types
::
Role
::
Assistant
);
}
#[tokio::test]
...
...
@@ -502,8 +517,8 @@ mod tests {
let
annotated_delta
=
create_test_delta
(
0
,
tool_call_json
,
Some
(
async_openai
::
types
::
Role
::
Assistant
),
Some
(
async_openai
::
types
::
FinishReason
::
ToolCalls
),
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
),
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ToolCalls
),
);
let
delta
=
annotated_delta
.data
.unwrap
()
.inner
;
...
...
@@ -547,8 +562,11 @@ mod tests {
// The finish_reason should be ToolCalls
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
FinishReason
::
ToolCalls
)
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
ToolCalls
)
);
assert_eq!
(
choice
.message.role
,
dynamo_async_openai
::
types
::
Role
::
Assistant
);
assert_eq!
(
choice
.message.role
,
async_openai
::
types
::
Role
::
Assistant
);
}
}
lib/llm/src/protocols/openai/chat_completions/delta.rs
View file @
199b9a30
...
...
@@ -59,9 +59,9 @@ pub struct DeltaGenerator {
/// Optional system fingerprint for version tracking.
system_fingerprint
:
Option
<
String
>
,
/// Optional service tier information for the response.
service_tier
:
Option
<
async_openai
::
types
::
ServiceTierResponse
>
,
service_tier
:
Option
<
dynamo_
async_openai
::
types
::
ServiceTierResponse
>
,
/// Tracks token usage for the completion request.
usage
:
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
/// Counter tracking the number of messages issued.
msg_counter
:
u64
,
/// Configuration options for response generation.
...
...
@@ -87,7 +87,7 @@ impl DeltaGenerator {
// but this will not be an issue until 2106.
let
now
:
u32
=
now
.try_into
()
.expect
(
"timestamp exceeds u32::MAX"
);
let
usage
=
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
prompt_tokens
:
0
,
completion_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -122,7 +122,7 @@ impl DeltaGenerator {
token_ids
:
Vec
<
TokenIdType
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
async_openai
::
types
::
ChatChoiceLogprobs
>
{
)
->
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -150,22 +150,22 @@ impl DeltaGenerator {
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
async_openai
::
types
::
TopLogprobs
{
dynamo_
async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
async_openai
::
types
::
TopLogprobs
>>
();
.collect
::
<
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
async_openai
::
types
::
TopLogprobs
{
converted_top_lps
.push
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
});
}
async_openai
::
types
::
ChatCompletionTokenLogprob
{
dynamo_
async_openai
::
types
::
ChatCompletionTokenLogprob
{
token
:
t
.clone
(),
logprob
:
lp
,
bytes
:
None
,
...
...
@@ -175,7 +175,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
async_openai
::
types
::
ChatChoiceLogprobs
{
Some
(
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
{
content
,
refusal
:
None
,
})
...
...
@@ -190,28 +190,28 @@ impl DeltaGenerator {
/// * `logprobs` - Optional log probabilities of the generated tokens.
///
/// # Returns
/// * An [`async_openai::types::CreateChatCompletionStreamResponse`] instance representing the choice.
/// * An [`
dynamo_
async_openai::types::CreateChatCompletionStreamResponse`] instance representing the choice.
#[allow(deprecated)]
pub
fn
create_choice
(
&
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
async_openai
::
types
::
FinishReason
>
,
logprobs
:
Option
<
async_openai
::
types
::
ChatChoiceLogprobs
>
,
)
->
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
let
delta
=
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
FinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
ChatChoiceLogprobs
>
,
)
->
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
let
delta
=
dynamo_
async_openai
::
types
::
ChatCompletionStreamResponseDelta
{
content
:
text
,
function_call
:
None
,
tool_calls
:
None
,
role
:
if
self
.msg_counter
==
0
{
Some
(
async_openai
::
types
::
Role
::
Assistant
)
Some
(
dynamo_
async_openai
::
types
::
Role
::
Assistant
)
}
else
{
None
},
refusal
:
None
,
};
let
choice
=
async_openai
::
types
::
ChatChoiceStream
{
let
choice
=
dynamo_
async_openai
::
types
::
ChatChoiceStream
{
index
,
delta
,
finish_reason
,
...
...
@@ -225,7 +225,7 @@ impl DeltaGenerator {
usage
.total_tokens
=
usage
.prompt_tokens
+
usage
.completion_tokens
;
}
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
dynamo_
async_openai
::
types
::
CreateChatCompletionStreamResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -281,12 +281,18 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
// Map backend finish reasons to OpenAI's finish reasons.
let
finish_reason
=
match
delta
.finish_reason
{
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Length
)
=>
Some
(
async_openai
::
types
::
FinishReason
::
Length
),
Some
(
common
::
FinishReason
::
Cancelled
)
=>
Some
(
async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
EoS
)
=>
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
),
Some
(
common
::
FinishReason
::
Stop
)
=>
{
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
Length
)
=>
{
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Length
)
}
Some
(
common
::
FinishReason
::
Cancelled
)
=>
{
Some
(
dynamo_async_openai
::
types
::
FinishReason
::
Stop
)
}
Some
(
common
::
FinishReason
::
ContentFilter
)
=>
{
Some
(
async_openai
::
types
::
FinishReason
::
ContentFilter
)
Some
(
dynamo_
async_openai
::
types
::
FinishReason
::
ContentFilter
)
}
Some
(
common
::
FinishReason
::
Error
(
err_msg
))
=>
{
return
Err
(
anyhow
::
anyhow!
(
err_msg
));
...
...
lib/llm/src/protocols/openai/completions.rs
View file @
199b9a30
...
...
@@ -37,7 +37,7 @@ pub use delta::DeltaGenerator;
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionRequest
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateCompletionRequest
,
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionRequest
,
#[serde(flatten)]
pub
common
:
CommonExt
,
...
...
@@ -49,25 +49,25 @@ pub struct NvCreateCompletionRequest {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateCompletionResponse
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateCompletionResponse
,
pub
inner
:
dynamo_
async_openai
::
types
::
CreateCompletionResponse
,
}
impl
ContentProvider
for
async_openai
::
types
::
Choice
{
impl
ContentProvider
for
dynamo_
async_openai
::
types
::
Choice
{
fn
content
(
&
self
)
->
String
{
self
.text
.clone
()
}
}
pub
fn
prompt_to_string
(
prompt
:
&
async_openai
::
types
::
Prompt
)
->
String
{
pub
fn
prompt_to_string
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
String
{
match
prompt
{
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
s
.clone
(),
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
arr
.join
(
" "
),
// Join strings with spaces
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
&
num
|
num
.to_string
())
.collect
::
<
Vec
<
_
>>
()
.join
(
" "
),
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
arr
.iter
()
.map
(|
inner
|
{
inner
...
...
@@ -226,10 +226,10 @@ impl ResponseFactory {
pub
fn
make_response
(
&
self
,
choice
:
async_openai
::
types
::
Choice
,
usage
:
Option
<
async_openai
::
types
::
CompletionUsage
>
,
choice
:
dynamo_
async_openai
::
types
::
Choice
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
)
->
NvCreateCompletionResponse
{
let
inner
=
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
...
...
@@ -300,7 +300,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
}
}
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
async_openai
::
types
::
Choice
{
impl
TryFrom
<
common
::
StreamingCompletionResponse
>
for
dynamo_
async_openai
::
types
::
Choice
{
type
Error
=
anyhow
::
Error
;
fn
try_from
(
response
:
common
::
StreamingCompletionResponse
)
->
Result
<
Self
,
Self
::
Error
>
{
...
...
@@ -321,10 +321,10 @@ impl TryFrom<common::StreamingCompletionResponse> for async_openai::types::Choic
// TODO handle aggregating logprobs
let
logprobs
=
None
;
let
finish_reason
:
Option
<
async_openai
::
types
::
CompletionFinishReason
>
=
let
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
=
response
.delta.finish_reason
.map
(
Into
::
into
);
let
choice
=
async_openai
::
types
::
Choice
{
let
choice
=
dynamo_
async_openai
::
types
::
Choice
{
text
,
index
,
logprobs
,
...
...
lib/llm/src/protocols/openai/completions/aggregator.rs
View file @
199b9a30
...
...
@@ -30,7 +30,7 @@ pub struct DeltaAggregator {
id
:
String
,
model
:
String
,
created
:
u32
,
usage
:
Option
<
async_openai
::
types
::
CompletionUsage
>
,
usage
:
Option
<
dynamo_
async_openai
::
types
::
CompletionUsage
>
,
system_fingerprint
:
Option
<
String
>
,
choices
:
HashMap
<
u32
,
DeltaChoice
>
,
error
:
Option
<
String
>
,
...
...
@@ -40,7 +40,7 @@ struct DeltaChoice {
index
:
u32
,
text
:
String
,
finish_reason
:
Option
<
FinishReason
>
,
logprobs
:
Option
<
async_openai
::
types
::
Logprobs
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
}
impl
Default
for
DeltaAggregator
{
...
...
@@ -112,15 +112,15 @@ impl DeltaAggregator {
// Handle CompletionFinishReason -> FinishReason conversation
state_choice
.finish_reason
=
match
choice
.finish_reason
{
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
=>
{
Some
(
FinishReason
::
Stop
)
}
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
=>
{
Some
(
FinishReason
::
Length
)
}
Some
(
async_openai
::
types
::
CompletionFinishReason
::
ContentFilter
)
=>
{
Some
(
FinishReason
::
ContentFilter
)
}
Some
(
dynamo_async_openai
::
types
::
Completion
FinishReason
::
ContentFilter
,
)
=>
Some
(
FinishReason
::
ContentFilter
),
None
=>
None
,
};
}
...
...
@@ -140,12 +140,12 @@ impl DeltaAggregator {
let
mut
choices
:
Vec
<
_
>
=
aggregator
.choices
.into_values
()
.map
(
async_openai
::
types
::
Choice
::
from
)
.map
(
dynamo_
async_openai
::
types
::
Choice
::
from
)
.collect
();
choices
.sort_by
(|
a
,
b
|
a
.index
.cmp
(
&
b
.index
));
let
inner
=
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
id
:
aggregator
.id
,
created
:
aggregator
.created
,
usage
:
aggregator
.usage
,
...
...
@@ -161,11 +161,11 @@ impl DeltaAggregator {
}
}
impl
From
<
DeltaChoice
>
for
async_openai
::
types
::
Choice
{
impl
From
<
DeltaChoice
>
for
dynamo_
async_openai
::
types
::
Choice
{
fn
from
(
delta
:
DeltaChoice
)
->
Self
{
let
finish_reason
=
delta
.finish_reason
.map
(
Into
::
into
);
async_openai
::
types
::
Choice
{
dynamo_
async_openai
::
types
::
Choice
{
index
:
delta
.index
,
text
:
delta
.text
,
finish_reason
,
...
...
@@ -210,13 +210,13 @@ mod tests {
.and_then
(|
s
|
FinishReason
::
from_str
(
s
)
.ok
())
.map
(
Into
::
into
);
let
inner
=
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
index
,
text
:
text
.to_string
(),
finish_reason
,
...
...
@@ -283,11 +283,11 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello,"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
);
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Length
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Length
)
);
assert
!
(
choice
.logprobs
.is_none
());
}
...
...
@@ -318,34 +318,34 @@ mod tests {
assert_eq!
(
choice
.text
,
"Hello, world!"
.to_string
());
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
}
#[tokio::test]
async
fn
test_multiple_choices
()
{
// Create a delta with multiple choices
let
inner
=
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
id
:
"test_id"
.to_string
(),
model
:
"meta/llama-3.1-8b"
.to_string
(),
created
:
1234567890
,
usage
:
None
,
system_fingerprint
:
None
,
choices
:
vec!
[
async_openai
::
types
::
Choice
{
dynamo_
async_openai
::
types
::
Choice
{
index
:
0
,
text
:
"Choice 0"
.to_string
(),
finish_reason
:
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
async_openai
::
types
::
Choice
{
dynamo_
async_openai
::
types
::
Choice
{
index
:
1
,
text
:
"Choice 1"
.to_string
(),
finish_reason
:
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
finish_reason
:
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
),
logprobs
:
None
,
},
],
...
...
@@ -379,11 +379,11 @@ mod tests {
assert_eq!
(
choice0
.text
,
"Choice 0"
.to_string
());
assert_eq!
(
choice0
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice0
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
let
choice1
=
&
response
.inner.choices
[
1
];
...
...
@@ -391,11 +391,11 @@ mod tests {
assert_eq!
(
choice1
.text
,
"Choice 1"
.to_string
());
assert_eq!
(
choice1
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
assert_eq!
(
choice1
.finish_reason
,
Some
(
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
Some
(
dynamo_
async_openai
::
types
::
CompletionFinishReason
::
Stop
)
);
}
}
lib/llm/src/protocols/openai/completions/delta.rs
View file @
199b9a30
...
...
@@ -42,7 +42,7 @@ pub struct DeltaGenerator {
created
:
u32
,
model
:
String
,
system_fingerprint
:
Option
<
String
>
,
usage
:
async_openai
::
types
::
CompletionUsage
,
usage
:
dynamo_
async_openai
::
types
::
CompletionUsage
,
options
:
DeltaGeneratorOptions
,
}
...
...
@@ -59,7 +59,7 @@ impl DeltaGenerator {
// Previously, our home-rolled CompletionUsage impl'd Default
// PR !387 - https://github.com/64bit/async-openai/pull/387
let
usage
=
async_openai
::
types
::
CompletionUsage
{
let
usage
=
dynamo_
async_openai
::
types
::
CompletionUsage
{
completion_tokens
:
0
,
prompt_tokens
:
0
,
total_tokens
:
0
,
...
...
@@ -88,7 +88,7 @@ impl DeltaGenerator {
token_ids
:
Vec
<
TokenIdType
>
,
logprobs
:
Option
<
common
::
llm_backend
::
LogProbs
>
,
top_logprobs
:
Option
<
common
::
llm_backend
::
TopLogprobs
>
,
)
->
Option
<
async_openai
::
types
::
Logprobs
>
{
)
->
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
{
if
!
self
.options.enable_logprobs
||
logprobs
.is_none
()
{
return
None
;
}
...
...
@@ -116,16 +116,16 @@ impl DeltaGenerator {
let
top_t
=
top_lp
.token
.clone
()
.unwrap_or_default
();
let
top_tid
=
top_lp
.token_id
;
found_selected_token
=
found_selected_token
||
top_tid
==
*
tid
;
async_openai
::
types
::
TopLogprobs
{
dynamo_
async_openai
::
types
::
TopLogprobs
{
token
:
top_t
,
logprob
:
top_lp
.logprob
as
f32
,
bytes
:
None
,
}
})
.collect
::
<
Vec
<
async_openai
::
types
::
TopLogprobs
>>
();
.collect
::
<
Vec
<
dynamo_
async_openai
::
types
::
TopLogprobs
>>
();
if
!
found_selected_token
{
// If the selected token is not in the top logprobs, add it
converted_top_lps
.push
(
async_openai
::
types
::
TopLogprobs
{
converted_top_lps
.push
(
dynamo_
async_openai
::
types
::
TopLogprobs
{
token
:
t
.clone
(),
logprob
:
*
lp
,
bytes
:
None
,
...
...
@@ -136,7 +136,7 @@ impl DeltaGenerator {
.collect
()
});
Some
(
async_openai
::
types
::
Logprobs
{
Some
(
dynamo_
async_openai
::
types
::
Logprobs
{
tokens
:
toks
.iter
()
.map
(|(
t
,
_
)|
t
.clone
())
.collect
(),
token_logprobs
:
tok_lps
.into_iter
()
.map
(
Some
)
.collect
(),
text_offset
:
vec!
[],
...
...
@@ -148,8 +148,8 @@ impl DeltaGenerator {
&
self
,
index
:
u32
,
text
:
Option
<
String
>
,
finish_reason
:
Option
<
async_openai
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
async_openai
::
types
::
Logprobs
>
,
finish_reason
:
Option
<
dynamo_
async_openai
::
types
::
CompletionFinishReason
>
,
logprobs
:
Option
<
dynamo_
async_openai
::
types
::
Logprobs
>
,
)
->
NvCreateCompletionResponse
{
// todo - update for tool calling
...
...
@@ -158,13 +158,13 @@ impl DeltaGenerator {
usage
.total_tokens
=
usage
.prompt_tokens
+
usage
.completion_tokens
;
}
let
inner
=
async_openai
::
types
::
CreateCompletionResponse
{
let
inner
=
dynamo_
async_openai
::
types
::
CreateCompletionResponse
{
id
:
self
.id
.clone
(),
object
:
self
.object
.clone
(),
created
:
self
.created
,
model
:
self
.model
.clone
(),
system_fingerprint
:
self
.system_fingerprint
.clone
(),
choices
:
vec!
[
async_openai
::
types
::
Choice
{
choices
:
vec!
[
dynamo_
async_openai
::
types
::
Choice
{
text
:
text
.unwrap_or_default
(),
index
,
finish_reason
,
...
...
lib/llm/src/protocols/openai/embeddings.rs
View file @
199b9a30
...
...
@@ -26,7 +26,7 @@ pub use nvext::{NvExt, NvExtProvider};
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingRequest
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateEmbeddingRequest
,
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingRequest
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -41,17 +41,17 @@ pub struct NvCreateEmbeddingRequest {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateEmbeddingResponse
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
CreateEmbeddingResponse
,
pub
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
,
}
impl
NvCreateEmbeddingResponse
{
pub
fn
empty
()
->
Self
{
Self
{
inner
:
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"embedding"
.to_string
(),
data
:
vec!
[],
usage
:
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
prompt_tokens
:
0
,
total_tokens
:
0
,
},
...
...
lib/llm/src/protocols/openai/embeddings/aggregator.rs
View file @
199b9a30
...
...
@@ -145,16 +145,16 @@ mod tests {
use
futures
::
stream
;
fn
create_test_embedding_response
(
embeddings
:
Vec
<
async_openai
::
types
::
Embedding
>
,
embeddings
:
Vec
<
dynamo_
async_openai
::
types
::
Embedding
>
,
prompt_tokens
:
u32
,
total_tokens
:
u32
,
)
->
Annotated
<
NvCreateEmbeddingResponse
>
{
let
response
=
NvCreateEmbeddingResponse
{
inner
:
async_openai
::
types
::
CreateEmbeddingResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateEmbeddingResponse
{
object
:
"list"
.to_string
(),
model
:
"test-model"
.to_string
(),
data
:
embeddings
,
usage
:
async_openai
::
types
::
EmbeddingUsage
{
usage
:
dynamo_
async_openai
::
types
::
EmbeddingUsage
{
prompt_tokens
,
total_tokens
,
},
...
...
@@ -178,7 +178,7 @@ mod tests {
#[tokio::test]
async
fn
test_single_embedding
()
{
let
embedding
=
async_openai
::
types
::
Embedding
{
let
embedding
=
dynamo_
async_openai
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
...
...
@@ -200,13 +200,13 @@ mod tests {
#[tokio::test]
async
fn
test_multiple_embeddings
()
{
let
embedding1
=
async_openai
::
types
::
Embedding
{
let
embedding1
=
dynamo_
async_openai
::
types
::
Embedding
{
index
:
0
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.1
,
0.2
,
0.3
],
};
let
embedding2
=
async_openai
::
types
::
Embedding
{
let
embedding2
=
dynamo_
async_openai
::
types
::
Embedding
{
index
:
1
,
object
:
"embedding"
.to_string
(),
embedding
:
vec!
[
0.4
,
0.5
,
0.6
],
...
...
lib/llm/src/protocols/openai/responses.rs
View file @
199b9a30
...
...
@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
async_openai
::
types
::
responses
::{
use
dynamo_
async_openai
::
types
::
responses
::{
Content
,
Input
,
OutputContent
,
OutputMessage
,
OutputStatus
,
OutputText
,
Response
,
Role
as
ResponseRole
,
Status
,
};
use
async_openai
::
types
::{
use
dynamo_
async_openai
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessage
,
ChatCompletionRequestUserMessageContent
,
CreateChatCompletionRequest
,
};
...
...
@@ -33,7 +33,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvCreateResponse
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
responses
::
CreateResponse
,
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
CreateResponse
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
nvext
:
Option
<
NvExt
>
,
...
...
@@ -42,7 +42,7 @@ pub struct NvCreateResponse {
#[derive(Serialize,
Deserialize,
Validate,
Debug,
Clone)]
pub
struct
NvResponse
{
#[serde(flatten)]
pub
inner
:
async_openai
::
types
::
responses
::
Response
,
pub
inner
:
dynamo_
async_openai
::
types
::
responses
::
Response
,
}
/// Implements `NvExtProvider` for `NvCreateResponse`,
...
...
@@ -256,8 +256,8 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
#[cfg(test)]
mod
tests
{
use
async_openai
::
types
::
responses
::{
CreateResponse
,
Input
};
use
async_openai
::
types
::{
use
dynamo_
async_openai
::
types
::
responses
::{
CreateResponse
,
Input
};
use
dynamo_
async_openai
::
types
::{
ChatCompletionRequestMessage
,
ChatCompletionRequestUserMessageContent
,
};
...
...
@@ -341,15 +341,15 @@ mod tests {
fn
test_into_nvresponse_from_chat_response
()
{
let
now
=
1_726_000_000
;
let
chat_resp
=
NvCreateChatCompletionResponse
{
inner
:
async_openai
::
types
::
CreateChatCompletionResponse
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionResponse
{
id
:
"chatcmpl-xyz"
.into
(),
choices
:
vec!
[
async_openai
::
types
::
ChatChoice
{
choices
:
vec!
[
dynamo_
async_openai
::
types
::
ChatChoice
{
index
:
0
,
message
:
async_openai
::
types
::
ChatCompletionResponseMessage
{
message
:
dynamo_
async_openai
::
types
::
ChatCompletionResponseMessage
{
content
:
Some
(
"This is a reply"
.into
()),
refusal
:
None
,
tool_calls
:
None
,
role
:
async_openai
::
types
::
Role
::
Assistant
,
role
:
dynamo_
async_openai
::
types
::
Role
::
Assistant
,
function_call
:
None
,
audio
:
None
,
},
...
...
lib/llm/src/protocols/openai/validate.rs
View file @
199b9a30
...
...
@@ -224,15 +224,15 @@ pub fn validate_user(user: Option<&str>) -> Result<(), anyhow::Error> {
}
/// Validates stop sequences
pub
fn
validate_stop
(
stop
:
&
Option
<
async_openai
::
types
::
Stop
>
)
->
Result
<
(),
anyhow
::
Error
>
{
pub
fn
validate_stop
(
stop
:
&
Option
<
dynamo_
async_openai
::
types
::
Stop
>
)
->
Result
<
(),
anyhow
::
Error
>
{
if
let
Some
(
stop_value
)
=
stop
{
match
stop_value
{
async_openai
::
types
::
Stop
::
String
(
s
)
=>
{
dynamo_
async_openai
::
types
::
Stop
::
String
(
s
)
=>
{
if
s
.is_empty
()
{
anyhow
::
bail!
(
"Stop sequence cannot be empty"
);
}
}
async_openai
::
types
::
Stop
::
StringArray
(
sequences
)
=>
{
dynamo_
async_openai
::
types
::
Stop
::
StringArray
(
sequences
)
=>
{
if
sequences
.is_empty
()
{
anyhow
::
bail!
(
"Stop sequences array cannot be empty"
);
}
...
...
@@ -260,7 +260,7 @@ pub fn validate_stop(stop: &Option<async_openai::types::Stop>) -> Result<(), any
/// Validates messages array
pub
fn
validate_messages
(
messages
:
&
[
async_openai
::
types
::
ChatCompletionRequestMessage
],
messages
:
&
[
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
],
)
->
Result
<
(),
anyhow
::
Error
>
{
if
messages
.is_empty
()
{
anyhow
::
bail!
(
"Messages array cannot be empty"
);
...
...
@@ -284,7 +284,7 @@ pub fn validate_top_logprobs(top_logprobs: Option<u8>) -> Result<(), anyhow::Err
/// Validates tools array
pub
fn
validate_tools
(
tools
:
&
Option
<&
[
async_openai
::
types
::
ChatCompletionTool
]
>
,
tools
:
&
Option
<&
[
dynamo_
async_openai
::
types
::
ChatCompletionTool
]
>
,
)
->
Result
<
(),
anyhow
::
Error
>
{
let
tools
=
match
tools
{
Some
(
val
)
=>
val
,
...
...
@@ -356,7 +356,7 @@ pub fn validate_metadata(metadata: &Option<serde_json::Value>) -> Result<(), any
/// Validates reasoning effort parameter
pub
fn
validate_reasoning_effort
(
_
reasoning_effort
:
&
Option
<
async_openai
::
types
::
ReasoningEffort
>
,
_
reasoning_effort
:
&
Option
<
dynamo_
async_openai
::
types
::
ReasoningEffort
>
,
)
->
Result
<
(),
anyhow
::
Error
>
{
// TODO ADD HERE
// ReasoningEffort is an enum, so if it exists, it's valid by definition
...
...
@@ -366,7 +366,7 @@ pub fn validate_reasoning_effort(
/// Validates service tier parameter
pub
fn
validate_service_tier
(
_
service_tier
:
&
Option
<
async_openai
::
types
::
ServiceTier
>
,
_
service_tier
:
&
Option
<
dynamo_
async_openai
::
types
::
ServiceTier
>
,
)
->
Result
<
(),
anyhow
::
Error
>
{
// TODO ADD HERE
// ServiceTier is an enum, so if it exists, it's valid by definition
...
...
@@ -379,14 +379,14 @@ pub fn validate_service_tier(
//
/// Validates prompt
pub
fn
validate_prompt
(
prompt
:
&
async_openai
::
types
::
Prompt
)
->
Result
<
(),
anyhow
::
Error
>
{
pub
fn
validate_prompt
(
prompt
:
&
dynamo_
async_openai
::
types
::
Prompt
)
->
Result
<
(),
anyhow
::
Error
>
{
match
prompt
{
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
String
(
s
)
=>
{
if
s
.is_empty
()
{
anyhow
::
bail!
(
"Prompt string cannot be empty"
);
}
}
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
StringArray
(
arr
)
=>
{
if
arr
.is_empty
()
{
anyhow
::
bail!
(
"Prompt string array cannot be empty"
);
}
...
...
@@ -396,7 +396,7 @@ pub fn validate_prompt(prompt: &async_openai::types::Prompt) -> Result<(), anyho
}
}
}
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
IntegerArray
(
arr
)
=>
{
if
arr
.is_empty
()
{
anyhow
::
bail!
(
"Prompt integer array cannot be empty"
);
}
...
...
@@ -411,7 +411,7 @@ pub fn validate_prompt(prompt: &async_openai::types::Prompt) -> Result<(), anyho
}
}
}
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
dynamo_
async_openai
::
types
::
Prompt
::
ArrayOfIntegerArray
(
arr
)
=>
{
if
arr
.is_empty
()
{
anyhow
::
bail!
(
"Prompt array of integer arrays cannot be empty"
);
}
...
...
lib/llm/tests/http-service.rs
View file @
199b9a30
...
...
@@ -14,8 +14,8 @@
// limitations under the License.
use
anyhow
::
Error
;
use
async_openai
::
config
::
OpenAIConfig
;
use
async_stream
::
stream
;
use
dynamo_async_openai
::
config
::
OpenAIConfig
;
use
dynamo_llm
::
http
::{
client
::{
GenericBYOTClient
,
HttpClientConfig
,
HttpRequestContext
,
NvCustomClient
,
PureOpenAIClient
,
...
...
@@ -311,16 +311,16 @@ async fn test_http_service() {
let
client
=
reqwest
::
Client
::
new
();
let
message
=
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
let
message
=
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"hi"
.to_string
(),
),
name
:
None
,
},
);
let
mut
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
mut
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"foo"
)
.messages
(
vec!
[
message
])
.build
()
...
...
@@ -483,7 +483,7 @@ async fn test_http_service() {
// ==== ChatCompletions / Unary / Error ====
// ==== Completions / Unary / Error ====
let
mut
request
=
async_openai
::
types
::
CreateCompletionRequestArgs
::
default
()
let
mut
request
=
dynamo_
async_openai
::
types
::
CreateCompletionRequestArgs
::
default
()
.model
(
"bar"
)
.prompt
(
"hi"
)
.build
()
...
...
@@ -642,12 +642,13 @@ async fn test_pure_openai_client() {
wait_for_service_ready
(
port
)
.await
;
// Test successful streaming request
let
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"foo"
)
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -674,12 +675,13 @@ async fn test_pure_openai_client() {
assert
!
(
count
>
0
,
"Should receive at least one response"
);
// Test error case with invalid model
let
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"bar"
)
// This model will fail
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -707,12 +709,13 @@ async fn test_pure_openai_client() {
// Test context management
let
ctx
=
HttpRequestContext
::
new
();
let
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"foo"
)
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -751,12 +754,13 @@ async fn test_nv_custom_client() {
wait_for_service_ready
(
port
)
.await
;
// Test successful streaming request
let
inner_request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
inner_request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"foo"
)
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -789,12 +793,13 @@ async fn test_nv_custom_client() {
assert
!
(
count
>
0
,
"Should receive at least one response"
);
// Test error case with invalid model
let
inner_request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
inner_request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"bar"
)
// This model will fail
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -828,12 +833,13 @@ async fn test_nv_custom_client() {
// Test context management
let
ctx
=
HttpRequestContext
::
new
();
let
inner_request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
inner_request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"foo"
)
.messages
(
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
dynamo_async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hi"
.to_string
(),
),
name
:
None
,
...
...
@@ -987,16 +993,16 @@ async fn test_client_disconnect_cancellation_unary() {
let
client
=
reqwest
::
Client
::
new
();
let
message
=
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
let
message
=
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"This will take a long time"
.to_string
(),
),
name
:
None
,
},
);
let
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"slow-model"
)
.messages
(
vec!
[
message
])
.stream
(
false
)
// Test unary response
...
...
@@ -1078,16 +1084,16 @@ async fn test_client_disconnect_cancellation_streaming() {
let
client
=
reqwest
::
Client
::
new
();
let
message
=
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
let
message
=
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"This will stream for a long time"
.to_string
(),
),
name
:
None
,
},
);
let
request
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
let
request
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
()
.model
(
"slow-stream-model"
)
.messages
(
vec!
[
message
])
.stream
(
true
)
// Test streaming response
...
...
lib/llm/tests/logprob_analysis_integration.rs
View file @
199b9a30
...
...
@@ -10,7 +10,7 @@ use dynamo_llm::perf::logprobs::analyze_logprob_sensitivity;
use
dynamo_llm
::
perf
::{
RecordedStream
,
TimestampedResponse
};
use
dynamo_llm
::
protocols
::
openai
::
chat_completions
::
NvCreateChatCompletionStreamResponse
;
use
async_openai
::
types
::{
use
dynamo_
async_openai
::
types
::{
ChatChoiceLogprobs
,
ChatChoiceStream
,
ChatCompletionStreamResponseDelta
,
ChatCompletionTokenLogprob
,
CreateChatCompletionStreamResponse
,
FinishReason
,
Role
,
TopLogprobs
,
...
...
lib/llm/tests/openai_completions.rs
View file @
199b9a30
...
...
@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
async_openai
::
types
::
CreateCompletionRequestArgs
;
use
dynamo_
async_openai
::
types
::
CreateCompletionRequestArgs
;
use
dynamo_llm
::
protocols
::
openai
::{
completions
::
NvCreateCompletionRequest
,
validate
};
use
serde
::{
Deserialize
,
Serialize
};
...
...
lib/llm/tests/preprocessor.rs
View file @
199b9a30
...
...
@@ -231,31 +231,31 @@ const TOOLS: &str = r#"
"#
;
// Notes:
// protocols::openai::chat_completions::ChatCompletionMessage -> async_openai::types::ChatCompletionRequestMessage
// protocols::openai::chat_completions::Tool -> async_openai::types::ChatCompletionTool
// protocols::openai::chat_completions::ToolChoiceType -> async_openai::types::ChatCompletionToolChoiceOption
// protocols::openai::chat_completions::ChatCompletionMessage ->
dynamo_
async_openai::types::ChatCompletionRequestMessage
// protocols::openai::chat_completions::Tool ->
dynamo_
async_openai::types::ChatCompletionTool
// protocols::openai::chat_completions::ToolChoiceType ->
dynamo_
async_openai::types::ChatCompletionToolChoiceOption
#[derive(Serialize,
Deserialize)]
struct
Request
{
messages
:
Vec
<
async_openai
::
types
::
ChatCompletionRequestMessage
>
,
tools
:
Option
<
Vec
<
async_openai
::
types
::
ChatCompletionTool
>>
,
tool_choice
:
Option
<
async_openai
::
types
::
ChatCompletionToolChoiceOption
>
,
messages
:
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
>
,
tools
:
Option
<
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionTool
>>
,
tool_choice
:
Option
<
dynamo_
async_openai
::
types
::
ChatCompletionToolChoiceOption
>
,
}
impl
Request
{
fn
from
(
messages
:
&
str
,
tools
:
Option
<&
str
>
,
tool_choice
:
Option
<
async_openai
::
types
::
ChatCompletionToolChoiceOption
>
,
tool_choice
:
Option
<
dynamo_
async_openai
::
types
::
ChatCompletionToolChoiceOption
>
,
model
:
String
,
)
->
NvCreateChatCompletionRequest
{
let
messages
:
Vec
<
async_openai
::
types
::
ChatCompletionRequestMessage
>
=
let
messages
:
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
>
=
serde_json
::
from_str
(
messages
)
.unwrap
();
let
tools
:
Option
<
Vec
<
async_openai
::
types
::
ChatCompletionTool
>>
=
let
tools
:
Option
<
Vec
<
dynamo_
async_openai
::
types
::
ChatCompletionTool
>>
=
tools
.map
(|
x
|
serde_json
::
from_str
(
x
)
.unwrap
());
//let tools = tools.unwrap();
//let tool_choice = tool_choice.unwrap();
let
mut
inner
=
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
();
let
mut
inner
=
dynamo_
async_openai
::
types
::
CreateChatCompletionRequestArgs
::
default
();
inner
.model
(
model
);
inner
.messages
(
messages
);
if
let
Some
(
tools
)
=
tools
{
...
...
@@ -326,7 +326,7 @@ async fn test_single_turn_with_tools() {
let
request
=
Request
::
from
(
SINGLE_CHAT_MESSAGE
,
Some
(
TOOLS
),
Some
(
async_openai
::
types
::
ChatCompletionToolChoiceOption
::
Auto
),
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolChoiceOption
::
Auto
),
mdc
.slug
()
.to_string
(),
);
let
formatted_prompt
=
formatter
.render
(
&
request
)
.unwrap
();
...
...
@@ -433,7 +433,7 @@ async fn test_multi_turn_with_system_with_tools() {
let
request
=
Request
::
from
(
THREE_TURN_CHAT_MESSAGE_WITH_SYSTEM
,
Some
(
TOOLS
),
Some
(
async_openai
::
types
::
ChatCompletionToolChoiceOption
::
Auto
),
Some
(
dynamo_
async_openai
::
types
::
ChatCompletionToolChoiceOption
::
Auto
),
mdc
.slug
()
.to_string
(),
);
let
formatted_prompt
=
formatter
.render
(
&
request
)
.unwrap
();
...
...
lib/llm/tests/test_common_ext.rs
View file @
199b9a30
...
...
@@ -226,11 +226,11 @@ fn test_completions_common_overrides_nvext() {
fn
test_serialization_preserves_structure
()
{
// Test that serialization preserves the flattened structure
let
request
=
NvCreateChatCompletionRequest
{
inner
:
async_openai
::
types
::
CreateChatCompletionRequest
{
inner
:
dynamo_
async_openai
::
types
::
CreateChatCompletionRequest
{
model
:
"test-model"
.to_string
(),
messages
:
vec!
[
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
messages
:
vec!
[
dynamo_
async_openai
::
types
::
ChatCompletionRequestMessage
::
User
(
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessage
{
content
:
dynamo_
async_openai
::
types
::
ChatCompletionRequestUserMessageContent
::
Text
(
"Hello"
.to_string
(),
),
..
Default
::
default
()
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment