Unverified Commit 199b9a30 authored by nachiketb-nvidia's avatar nachiketb-nvidia Committed by GitHub
Browse files

chore: Bring async-openai into repo as request starter (#2520)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent 26d9f159
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
// Original Copyright (c) 2022 Himanshu Neema
// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
//
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
#![allow(dead_code)]
//! The purpose of this test to make sure that all _byot methods compiles with custom types.
use std::pin::Pin;
use dynamo_async_openai::{error::OpenAIError, Client};
use futures::Stream;
use serde_json::{json, Value};
impl dynamo_async_openai::traits::AsyncTryFrom<MyJson> for reqwest::multipart::Form {
type Error = OpenAIError;
async fn try_from(_value: MyJson) -> Result<Self, Self::Error> {
Ok(reqwest::multipart::Form::new())
}
}
#[derive(Clone)]
pub struct MyJson(Value);
type MyStreamingType = Pin<Box<dyn Stream<Item = Result<Value, OpenAIError>> + Send>>;
#[tokio::test]
async fn test_byot_files() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.files().create_byot(MyJson(json!({}))).await;
let _r: Result<Value, OpenAIError> = client.files().list_byot([("limit", "2")]).await;
let _r: Result<Value, OpenAIError> = client.files().retrieve_byot("file_id").await;
let _r: Result<Value, OpenAIError> = client.files().delete_byot("file_id").await;
}
#[tokio::test]
async fn test_byot_assistants() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.assistants().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.assistants().retrieve_byot("aid").await;
let _r: Result<Value, OpenAIError> = client.assistants().update_byot("aid", json!({})).await;
let _r: Result<Value, OpenAIError> = client.assistants().list_byot([("limit", 2)]).await;
}
#[tokio::test]
async fn test_byot_models() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.models().list_byot().await;
let _r: Result<Value, OpenAIError> = client.models().retrieve_byot("").await;
let _r: Result<Value, OpenAIError> = client.models().delete_byot(String::new()).await;
}
#[tokio::test]
async fn test_byot_moderations() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.moderations().create_byot(json!({})).await;
}
#[tokio::test]
async fn test_byot_images() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.images().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.images().create_edit_byot(MyJson(json!({}))).await;
let _r: Result<Value, OpenAIError> = client
.images()
.create_variation_byot(MyJson(json!({})))
.await;
}
#[tokio::test]
async fn test_byot_chat() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.chat().create_byot(json!({})).await;
let _r: Result<MyStreamingType, OpenAIError> =
client.chat().create_stream_byot(json!({})).await;
}
#[tokio::test]
async fn test_byot_completions() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.completions().create_byot(json!({})).await;
let _r: Result<MyStreamingType, OpenAIError> =
client.completions().create_stream_byot(json!({})).await;
}
#[tokio::test]
async fn test_byot_audio() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.audio().transcribe_byot(MyJson(json!({}))).await;
let _r: Result<Value, OpenAIError> = client
.audio()
.transcribe_verbose_json_byot(MyJson(json!({})))
.await;
let _r: Result<Value, OpenAIError> = client.audio().translate_byot(MyJson(json!({}))).await;
let _r: Result<Value, OpenAIError> = client
.audio()
.translate_verbose_json_byot(MyJson(json!({})))
.await;
}
#[tokio::test]
async fn test_byot_embeddings() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.embeddings().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.embeddings().create_base64_byot(json!({})).await;
}
#[tokio::test]
async fn test_byot_fine_tunning() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.fine_tuning().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client
.fine_tuning()
.list_paginated_byot([("limit", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.fine_tuning()
.retrieve_byot("fine_tunning_job_id")
.await;
let _r: Result<Value, OpenAIError> =
client.fine_tuning().cancel_byot("fine_tuning_job_id").await;
let _r: Result<Value, OpenAIError> = client
.fine_tuning()
.list_events_byot("fine_tuning_job_id", [("limit", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.fine_tuning()
.list_checkpoints_byot("fine_tuning_job_id", [("limit", "2")])
.await;
}
#[derive(Clone, serde::Deserialize)]
pub struct MyThreadJson(Value);
impl TryFrom<eventsource_stream::Event> for MyThreadJson {
type Error = OpenAIError;
fn try_from(_value: eventsource_stream::Event) -> Result<Self, Self::Error> {
Ok(MyThreadJson(json!({})))
}
}
type MyThreadStreamingType = Pin<Box<dyn Stream<Item = Result<MyThreadJson, OpenAIError>> + Send>>;
#[tokio::test]
async fn test_byot_threads() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.threads().create_and_run_byot(json!({})).await;
let _r: Result<MyThreadStreamingType, OpenAIError> =
client.threads().create_and_run_stream_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.threads().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.threads().retrieve_byot("thread_id").await;
let _r: Result<Value, OpenAIError> = client.threads().update_byot("thread_id", json!({})).await;
let _r: Result<Value, OpenAIError> = client.threads().delete_byot("thread_id").await;
}
#[tokio::test]
async fn test_byot_messages() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.threads()
.messages("thread_id")
.create_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.messages("thread_id")
.retrieve_byot("message_id")
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.messages("thread_id")
.update_byot("message_id", json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.messages("thread_id")
.list_byot([("limit", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.messages("thread_id")
.delete_byot("message_id")
.await;
}
#[tokio::test]
async fn test_byot_runs() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.create_byot(json!({}))
.await;
let _r: Result<MyThreadStreamingType, OpenAIError> = client
.threads()
.runs("thread_id")
.create_stream_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.retrieve_byot("run_id")
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.update_byot("run_id", json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.list_byot([("limit", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.submit_tool_outputs_byot("run_id", json!({}))
.await;
let _r: Result<MyThreadStreamingType, OpenAIError> = client
.threads()
.runs("thread_id")
.submit_tool_outputs_stream_byot("run_id", json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.cancel_byot("run_id")
.await;
}
#[tokio::test]
async fn test_byot_run_steps() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.steps("run_id")
.retrieve_byot("step_id")
.await;
let _r: Result<Value, OpenAIError> = client
.threads()
.runs("thread_id")
.steps("run_id")
.list_byot([("limit", "2")])
.await;
}
#[tokio::test]
async fn test_byot_vector_store_files() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.files("vector_store_id")
.create_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.files("vector_store_id")
.retrieve_byot("file_id")
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.files("vector_store_id")
.delete_byot("file_id")
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.files("vector_store_id")
.list_byot([("limit", "2")])
.await;
}
#[tokio::test]
async fn test_byot_vector_store_file_batches() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.file_batches("vector_store_id")
.create_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.file_batches("vector_store_id")
.retrieve_byot("file_id")
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.file_batches("vector_store_id")
.cancel_byot("file_id")
.await;
let _r: Result<Value, OpenAIError> = client
.vector_stores()
.file_batches("vector_store_id")
.list_byot("batch_id", [("limit", "2")])
.await;
}
#[tokio::test]
async fn test_byot_batches() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.batches().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.batches().list_byot([("limit", "2")]).await;
let _r: Result<Value, OpenAIError> = client.batches().retrieve_byot("batch_id").await;
let _r: Result<Value, OpenAIError> = client.batches().cancel_byot("batch_id").await;
}
#[tokio::test]
async fn test_byot_audit_logs() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.audit_logs().get_byot([("limit", "2")]).await;
}
#[tokio::test]
async fn test_byot_invites() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.invites().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.invites().retrieve_byot("invite_id").await;
let _r: Result<Value, OpenAIError> = client.invites().delete_byot("invite_id").await;
let _r: Result<Value, OpenAIError> = client.invites().list_byot([("limit", "2")]).await;
}
#[tokio::test]
async fn test_byot_projects() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.projects().list_byot([("limit", "2")]).await;
let _r: Result<Value, OpenAIError> = client.projects().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client.projects().retrieve_byot("project_id").await;
let _r: Result<Value, OpenAIError> =
client.projects().modify_byot("project_id", json!({})).await;
let _r: Result<Value, OpenAIError> = client.projects().archive_byot("project_id").await;
}
#[tokio::test]
async fn test_byot_project_api_keys() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.projects()
.api_keys("project_id")
.list_byot([("query", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.api_keys("project_id")
.retrieve_byot("api_key")
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.api_keys("project_id")
.delete_byot("api_key")
.await;
}
#[tokio::test]
async fn test_byot_project_service_accounts() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.projects()
.service_accounts("project_id")
.create_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.service_accounts("project_id")
.delete_byot("service_account_id")
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.service_accounts("project_id")
.retrieve_byot("service_account_id")
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.service_accounts("project_id")
.list_byot([("limit", "2")])
.await;
}
#[tokio::test]
async fn test_byot_project_users() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client
.projects()
.users("project_id")
.create_byot(json!({}))
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.users("project_id")
.delete_byot("user_id")
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.users("project_id")
.list_byot([("limit", "2")])
.await;
let _r: Result<Value, OpenAIError> = client
.projects()
.users("project_id")
.retrieve_byot("user_id")
.await;
}
#[tokio::test]
async fn test_byot_uploads() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.uploads().create_byot(json!({})).await;
let _r: Result<Value, OpenAIError> = client
.uploads()
.add_part_byot("upload_id", MyJson(json!({})))
.await;
let _r: Result<Value, OpenAIError> =
client.uploads().complete_byot("upload_id", json!({})).await;
let _r: Result<Value, OpenAIError> = client.uploads().cancel_byot("upload_id").await;
}
#[tokio::test]
async fn test_byot_users() {
let client = Client::new();
let _r: Result<Value, OpenAIError> = client.users().list_byot([("limit", "2")]).await;
let _r: Result<Value, OpenAIError> = client.users().modify_byot("user_id", json!({})).await;
let _r: Result<Value, OpenAIError> = client.users().retrieve_byot("user_id").await;
let _r: Result<Value, OpenAIError> = client.users().delete_byot("user_id").await;
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
// Original Copyright (c) 2022 Himanshu Neema
// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
//
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
//! This test is primarily to make sure that macros_rules for From traits are correct.
use dynamo_async_openai::types::Prompt;
fn prompt_input<T>(input: T) -> Prompt
where
Prompt: From<T>,
{
input.into()
}
#[test]
fn create_prompt_input() {
let prompt = "This is &str prompt";
let _ = prompt_input(prompt);
let prompt = "This is String".to_string();
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let prompt = vec!["This is first", "This is second"];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let prompt = vec!["First string".to_string(), "Second string".to_string()];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let first = "First".to_string();
let second = "Second".to_string();
let prompt = vec![&first, &second];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let prompt = ["first", "second"];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let prompt = ["first".to_string(), "second".to_string()];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
let first = "First".to_string();
let second = "Second".to_string();
let prompt = [&first, &second];
let _ = prompt_input(&prompt);
let _ = prompt_input(prompt);
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
// Original Copyright (c) 2022 Himanshu Neema
// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
//
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
//! This test is primarily to make sure that macros_rules for From traits are correct.
use dynamo_async_openai::types::EmbeddingInput;
fn embedding_input<T>(input: T) -> EmbeddingInput
where
EmbeddingInput: From<T>,
{
input.into()
}
#[test]
fn create_embedding_input() {
let input = [1, 2, 3];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let input = [[1, 2, 3], [4, 5, 6], [7, 8, 9]];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let (s1, s2, s3) = ([1, 2, 3], [4, 5, 6], [7, 8, 9]);
let input = [&s1, &s2, &s3];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let input = vec![1, 2, 3];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let input = vec![[1, 2, 3], [4, 5, 6], [7, 8, 9]];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let input = vec![vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let input = [vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]];
let _ = embedding_input(&input);
let _ = embedding_input(input);
let (v1, v2, v3) = (vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]);
let input = [&v1, &v2, &v3];
let _ = embedding_input(&input);
let _ = embedding_input(input);
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
// Original Copyright (c) 2022 Himanshu Neema
// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
//
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
use dynamo_async_openai::types::{
ChatCompletionRequestSystemMessageArgs, ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequest, CreateChatCompletionRequestArgs,
};
#[tokio::test]
async fn chat_types_serde() {
let request: CreateChatCompletionRequest = CreateChatCompletionRequestArgs::default()
.messages([
ChatCompletionRequestSystemMessageArgs::default()
.content("your are a calculator")
.build()
.unwrap()
.into(),
ChatCompletionRequestUserMessageArgs::default()
.content("what is the result of 1+1")
.build()
.unwrap()
.into(),
])
.build()
.unwrap();
// serialize the request
let serialized = serde_json::to_string(&request).unwrap();
// deserialize the request
let deserialized: CreateChatCompletionRequest = serde_json::from_str(&serialized).unwrap();
assert_eq!(request, deserialized);
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
// Original Copyright (c) 2022 Himanshu Neema
// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
//
// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
// Licensed under Apache 2.0
use dynamo_async_openai::types::CreateTranslationRequestArgs;
use dynamo_async_openai::{types::CreateTranscriptionRequestArgs, Client};
use tokio_test::assert_err;
#[tokio::test]
async fn transcribe_test() {
let client = Client::new();
let request = CreateTranscriptionRequestArgs::default().build().unwrap();
let response = client.audio().transcribe(request).await;
assert_err!(response); // FileReadError("cannot extract file name from ")
}
#[tokio::test]
async fn transcribe_sendable_test() {
let client = Client::new();
// https://github.com/64bit/async-openai/issues/140
let transcribe = tokio::spawn(async move {
let request = CreateTranscriptionRequestArgs::default().build().unwrap();
client.audio().transcribe(request).await
});
let response = transcribe.await.unwrap();
assert_err!(response); // FileReadError("cannot extract file name from ")
}
#[tokio::test]
async fn translate_test() {
let client = Client::new();
let request = CreateTranslationRequestArgs::default().build().unwrap();
let response = client.audio().translate(request).await;
assert_err!(response); // FileReadError("cannot extract file name from ")
}
#[tokio::test]
async fn translate_sendable_test() {
let client = Client::new();
// https://github.com/64bit/async-openai/issues/140
let translate = tokio::spawn(async move {
let request = CreateTranslationRequestArgs::default().build().unwrap();
client.audio().translate(request).await
});
let response = translate.await.unwrap();
assert_err!(response); // FileReadError("cannot extract file name from ")
}
......@@ -42,7 +42,7 @@ dynamo-llm = { path = "../../llm" }
dynamo-runtime = { path = "../../runtime" }
anyhow = { version = "1" }
async-openai = { version = "0.29.0" }
dynamo-async-openai = { path = "../../async-openai" }
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
derive-getters = "0.5"
......
......@@ -34,7 +34,7 @@ dynamo-runtime = { workspace = true }
dynamo-llm = { workspace = true }
anyhow = { workspace = true }
async-openai = { workspace = true }
dynamo-async-openai = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
either = { workspace = true }
......
......@@ -4,9 +4,9 @@
use std::collections::HashMap;
use std::{num::NonZero, sync::Arc};
use async_openai::types::FinishReason;
use async_stream::stream;
use async_trait::async_trait;
use dynamo_async_openai::types::FinishReason;
use either::Either;
use indexmap::IndexMap;
use mistralrs::{
......@@ -277,10 +277,10 @@ impl
let mut messages = vec![];
for m in request.inner.messages {
let async_openai::types::ChatCompletionRequestMessage::User(inner_m) = m else {
let dynamo_async_openai::types::ChatCompletionRequestMessage::User(inner_m) = m else {
continue;
};
let async_openai::types::ChatCompletionRequestUserMessageContent::Text(content) =
let dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(content) =
inner_m.content
else {
anyhow::bail!("Only Text type chat completion supported");
......@@ -396,13 +396,13 @@ impl
//tracing::trace!("from_assistant: {from_assistant}");
#[allow(deprecated)]
let inner = async_openai::types::CreateChatCompletionStreamResponse{
let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse{
id: c.id,
choices: vec![async_openai::types::ChatChoiceStream{
choices: vec![dynamo_async_openai::types::ChatChoiceStream{
index: 0,
delta: async_openai::types::ChatCompletionStreamResponseDelta{
delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta{
//role: c.choices[0].delta.role,
role: Some(async_openai::types::Role::Assistant),
role: Some(dynamo_async_openai::types::Role::Assistant),
content: Some(from_assistant),
tool_calls: None,
refusal: None,
......@@ -441,10 +441,10 @@ impl
}
/// openai stop tokens to mistralrs stop tokens
fn to_stop_tokens(t: async_openai::types::Stop) -> StopTokens {
fn to_stop_tokens(t: dynamo_async_openai::types::Stop) -> StopTokens {
match t {
async_openai::types::Stop::String(s) => StopTokens::Seqs(vec![s]),
async_openai::types::Stop::StringArray(v) => StopTokens::Seqs(v),
dynamo_async_openai::types::Stop::String(s) => StopTokens::Seqs(vec![s]),
dynamo_async_openai::types::Stop::StringArray(v) => StopTokens::Seqs(v),
}
}
......
......@@ -47,7 +47,7 @@ dynamo-runtime = { workspace = true }
# workspace
anyhow = { workspace = true }
async-openai = { workspace = true }
dynamo-async-openai = { workspace = true }
async-stream = { workspace = true }
async-trait = { workspace = true }
async-nats = { workspace = true }
......
......@@ -188,11 +188,11 @@ impl
let req = request.inner.messages.into_iter().next_back().unwrap();
let prompt = match req {
async_openai::types::ChatCompletionRequestMessage::User(user_msg) => {
dynamo_async_openai::types::ChatCompletionRequestMessage::User(user_msg) => {
match user_msg.content {
async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt) => {
prompt
}
dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
prompt,
) => prompt,
_ => anyhow::bail!("Invalid request content field, expected Content::Text"),
}
}
......@@ -212,7 +212,7 @@ impl
id += 1;
}
let inner = deltas.create_choice(0, None, Some(async_openai::types::FinishReason::Stop), None);
let inner = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
let response = NvCreateChatCompletionStreamResponse {
inner,
};
......@@ -247,7 +247,7 @@ impl
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
id += 1;
}
let response = deltas.create_choice(0, None, Some(async_openai::types::CompletionFinishReason::Stop), None);
let response = deltas.create_choice(0, None, Some(dynamo_async_openai::types::CompletionFinishReason::Stop), None);
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
};
......
......@@ -7,7 +7,7 @@ use crate::types::openai::chat_completions::{
NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine,
};
use anyhow::Context as _;
use async_openai::types::FinishReason;
use dynamo_async_openai::types::FinishReason;
use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
use futures::StreamExt;
use serde::{Deserialize, Serialize};
......@@ -199,15 +199,15 @@ async fn evaluate(
entry: &mut Entry,
template: Option<Arc<RequestTemplate>>,
) -> anyhow::Result<String> {
let user_message = async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage {
content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
let user_message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
dynamo_async_openai::types::ChatCompletionRequestUserMessage {
content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
entry.text.clone(),
),
name: None,
},
);
let inner = async_openai::types::CreateChatCompletionRequestArgs::default()
let inner = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
.messages(vec![user_message])
.model(
template
......
......@@ -81,15 +81,17 @@ async fn main_loop(
};
// Construct messages
let user_message = async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage {
content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt),
let user_message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
dynamo_async_openai::types::ChatCompletionRequestUserMessage {
content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
prompt,
),
name: None,
},
);
messages.push(user_message);
// Request
let inner = async_openai::types::CreateChatCompletionRequestArgs::default()
let inner = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
.messages(messages.clone())
.model(
template
......@@ -170,12 +172,12 @@ async fn main_loop(
println!();
let assistant_content =
async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(
dynamo_async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(
assistant_message,
);
let assistant_message = async_openai::types::ChatCompletionRequestMessage::Assistant(
async_openai::types::ChatCompletionRequestAssistantMessage {
let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant(
dynamo_async_openai::types::ChatCompletionRequestAssistantMessage {
content: Some(assistant_content),
..Default::default()
},
......
......@@ -12,9 +12,9 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Instant;
use async_openai::{config::OpenAIConfig, error::OpenAIError, Client};
use async_trait::async_trait;
use derive_getters::Dissolve;
use dynamo_async_openai::{config::OpenAIConfig, error::OpenAIError, Client};
use futures::Stream;
use serde_json::Value;
use tokio_util::sync::CancellationToken;
......@@ -234,7 +234,7 @@ pub type ByotResponseStream = DataStream<Result<Value, OpenAIError>>;
/// Type alias for pure OpenAI chat response stream
pub type OpenAIChatResponseStream =
DataStream<Result<async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>>;
DataStream<Result<dynamo_async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>>;
/// A wrapped HTTP response stream that combines a stream with its context
/// This provides a unified interface for HTTP client responses
......@@ -330,7 +330,7 @@ pub type ByotHttpResponseStream = HttpResponseStream<Result<Value, OpenAIError>>
/// Type alias for HttpResponseStream with pure OpenAI responses
pub type OpenAIHttpResponseStream = HttpResponseStream<
Result<async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>,
Result<dynamo_async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>,
>;
/// Pure OpenAI client using standard async-openai types
......@@ -350,7 +350,7 @@ impl PureOpenAIClient {
/// Uses a client-managed context
pub async fn chat_stream(
&self,
request: async_openai::types::CreateChatCompletionRequest,
request: dynamo_async_openai::types::CreateChatCompletionRequest,
) -> Result<OpenAIHttpResponseStream, HttpClientError> {
let ctx = self.base.create_context();
self.chat_stream_with_context(request, ctx).await
......@@ -359,7 +359,7 @@ impl PureOpenAIClient {
/// Create streaming chat completions with a custom context
pub async fn chat_stream_with_context(
&self,
request: async_openai::types::CreateChatCompletionRequest,
request: dynamo_async_openai::types::CreateChatCompletionRequest,
context: HttpRequestContext,
) -> Result<OpenAIHttpResponseStream, HttpClientError> {
let ctx_arc: Arc<dyn AsyncEngineContext> = Arc::new(context.clone());
......
......@@ -763,7 +763,7 @@ pub fn validate_response_input_is_text_only(
request: &NvCreateResponse,
) -> Option<impl IntoResponse> {
match &request.inner.input {
async_openai::types::responses::Input::Text(_) => None,
dynamo_async_openai::types::responses::Input::Text(_) => None,
_ => Some(ErrorMessage::not_implemented_error("Only `Input::Text` is supported. Structured, multimedia, or custom input types are not yet implemented.")),
}
}
......@@ -1069,12 +1069,12 @@ pub fn responses_router(
mod tests {
use std::collections::HashMap;
use async_openai::types::responses::{
use dynamo_async_openai::types::responses::{
CreateResponse, Input, InputContent, InputItem, InputMessage, PromptConfig,
Role as ResponseRole, ServiceTier, TextConfig, TextResponseFormat, ToolChoice,
ToolChoiceMode, Truncation,
};
use async_openai::types::{
use dynamo_async_openai::types::{
ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
};
......
......@@ -572,7 +572,7 @@ mod tests {
use crate::protocols::codec::create_message_stream;
use crate::protocols::convert_sse_stream;
use approx::assert_abs_diff_eq;
use async_openai::types::{
use dynamo_async_openai::types::{
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
TopLogprobs,
......@@ -1556,7 +1556,7 @@ mod tests {
fn create_mock_response() -> NvCreateChatCompletionStreamResponse {
// Create a mock response for testing
// In practice, this would have real logprobs data
use async_openai::types::CreateChatCompletionStreamResponse;
use dynamo_async_openai::types::CreateChatCompletionStreamResponse;
let inner = CreateChatCompletionStreamResponse {
id: "test_id".to_string(),
......
......@@ -14,7 +14,7 @@ pub use super::parsers::{detect_and_parse_tool_call, ToolCallConfig};
pub fn try_tool_call_parse_aggregate(
message: &str,
parser_str: Option<&str>,
) -> anyhow::Result<Vec<async_openai::types::ChatCompletionMessageToolCall>> {
) -> anyhow::Result<Vec<dynamo_async_openai::types::ChatCompletionMessageToolCall>> {
let parsed = detect_and_parse_tool_call(message, parser_str)?;
if parsed.is_empty() {
return Ok(vec![]);
......@@ -22,10 +22,10 @@ pub fn try_tool_call_parse_aggregate(
Ok(parsed
.into_iter()
.map(
|parsed| async_openai::types::ChatCompletionMessageToolCall {
|parsed| dynamo_async_openai::types::ChatCompletionMessageToolCall {
id: parsed.id,
r#type: async_openai::types::ChatCompletionToolType::Function,
function: async_openai::types::FunctionCall {
r#type: dynamo_async_openai::types::ChatCompletionToolType::Function,
function: dynamo_async_openai::types::FunctionCall {
name: parsed.function.name,
arguments: parsed.function.arguments,
},
......@@ -40,7 +40,7 @@ pub fn try_tool_call_parse_aggregate(
pub fn try_tool_call_parse_stream(
message: &str,
parser_str: Option<&str>,
) -> anyhow::Result<Vec<async_openai::types::ChatCompletionMessageToolCallChunk>> {
) -> anyhow::Result<Vec<dynamo_async_openai::types::ChatCompletionMessageToolCallChunk>> {
let parsed = detect_and_parse_tool_call(message, parser_str)?;
if parsed.is_empty() {
return Ok(vec![]);
......@@ -49,11 +49,11 @@ pub fn try_tool_call_parse_stream(
.into_iter()
.enumerate()
.map(
|(idx, parsed)| async_openai::types::ChatCompletionMessageToolCallChunk {
|(idx, parsed)| dynamo_async_openai::types::ChatCompletionMessageToolCallChunk {
index: idx as u32,
id: Some(parsed.id),
r#type: Some(async_openai::types::ChatCompletionToolType::Function),
function: Some(async_openai::types::FunctionCallStream {
r#type: Some(dynamo_async_openai::types::ChatCompletionToolType::Function),
function: Some(dynamo_async_openai::types::FunctionCallStream {
name: Some(parsed.function.name),
arguments: Some(parsed.function.arguments),
}),
......
......@@ -15,7 +15,7 @@ pub mod prompt;
pub mod tools;
use anyhow::Result;
use async_openai::types::EncodingFormat;
use dynamo_async_openai::types::EncodingFormat;
use futures::stream::{self, StreamExt};
use prompt::OAIPromptFormatter;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
......@@ -276,11 +276,11 @@ impl OpenAIPreprocessor {
let mut builder = PreprocessedEmbeddingRequest::builder();
let all_token_ids = match &request.inner.input {
async_openai::types::EmbeddingInput::String(s) => {
dynamo_async_openai::types::EmbeddingInput::String(s) => {
let encoding = self.tokenizer.encode(s)?;
vec![encoding.token_ids().to_vec()]
}
async_openai::types::EmbeddingInput::StringArray(arr) => {
dynamo_async_openai::types::EmbeddingInput::StringArray(arr) => {
let input_strs: Vec<String> = arr.to_vec();
let encodings = tokio::task::spawn_blocking({
let tokenizer = self.tokenizer.clone();
......@@ -296,8 +296,10 @@ impl OpenAIPreprocessor {
.collect();
token_arrays
}
async_openai::types::EmbeddingInput::IntegerArray(token_ids) => vec![token_ids.clone()],
async_openai::types::EmbeddingInput::ArrayOfIntegerArray(token_arrays) => {
dynamo_async_openai::types::EmbeddingInput::IntegerArray(token_ids) => {
vec![token_ids.clone()]
}
dynamo_async_openai::types::EmbeddingInput::ArrayOfIntegerArray(token_arrays) => {
token_arrays.clone()
}
};
......@@ -437,11 +439,11 @@ impl OpenAIPreprocessor {
let transformed_stream = stream.map(move |output| {
output.map_data(|engine_output| {
// Convert engine output to OpenAI response format
let embeddings: Vec<async_openai::types::Embedding> = engine_output
let embeddings: Vec<dynamo_async_openai::types::Embedding> = engine_output
.embeddings
.into_iter()
.enumerate()
.map(|(index, embedding)| async_openai::types::Embedding {
.map(|(index, embedding)| dynamo_async_openai::types::Embedding {
index: index as u32,
object: "embedding".to_string(),
embedding: embedding.into_iter().map(|f| f as f32).collect(),
......@@ -449,11 +451,11 @@ impl OpenAIPreprocessor {
.collect();
let response = NvCreateEmbeddingResponse {
inner: async_openai::types::CreateEmbeddingResponse {
inner: dynamo_async_openai::types::CreateEmbeddingResponse {
object: "list".to_string(),
model: original_request.inner.model.clone(),
data: embeddings,
usage: async_openai::types::EmbeddingUsage {
usage: dynamo_async_openai::types::EmbeddingUsage {
prompt_tokens: engine_output.prompt_tokens,
total_tokens: engine_output.total_tokens,
},
......
......@@ -53,7 +53,7 @@ impl OAIChatLikeRequest for NvCreateChatCompletionRequest {
if let Some(last) = self.inner.messages.last() {
matches!(
last,
async_openai::types::ChatCompletionRequestMessage::User(_)
dynamo_async_openai::types::ChatCompletionRequestMessage::User(_)
)
} else {
true
......@@ -70,9 +70,9 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
self.inner.model.clone()
}
fn messages(&self) -> minijinja::value::Value {
let message = async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage {
content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
let message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
dynamo_async_openai::types::ChatCompletionRequestUserMessage {
content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
crate::protocols::openai::completions::prompt_to_string(&self.inner.prompt),
),
name: None,
......@@ -88,16 +88,16 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
fn prompt_input_type(&self) -> PromptInput {
match &self.inner.prompt {
async_openai::types::Prompt::IntegerArray(_) => {
dynamo_async_openai::types::Prompt::IntegerArray(_) => {
PromptInput::Tokens(TokenInput::Single(vec![]))
}
async_openai::types::Prompt::ArrayOfIntegerArray(_) => {
dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(_) => {
PromptInput::Tokens(TokenInput::Batch(vec![]))
}
async_openai::types::Prompt::String(_) => {
dynamo_async_openai::types::Prompt::String(_) => {
PromptInput::Text(TextInput::Single(String::new()))
}
async_openai::types::Prompt::StringArray(_) => {
dynamo_async_openai::types::Prompt::StringArray(_) => {
PromptInput::Text(TextInput::Batch(vec![]))
}
}
......@@ -105,10 +105,10 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
fn extract_tokens(&self) -> Option<TokenInput> {
match &self.inner.prompt {
async_openai::types::Prompt::IntegerArray(tokens) => {
dynamo_async_openai::types::Prompt::IntegerArray(tokens) => {
Some(TokenInput::Single(tokens.clone()))
}
async_openai::types::Prompt::ArrayOfIntegerArray(arrays) => {
dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arrays) => {
Some(TokenInput::Batch(arrays.clone()))
}
_ => None,
......@@ -117,8 +117,10 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
fn extract_text(&self) -> Option<TextInput> {
match &self.inner.prompt {
async_openai::types::Prompt::String(text) => Some(TextInput::Single(text.to_string())),
async_openai::types::Prompt::StringArray(texts) => {
dynamo_async_openai::types::Prompt::String(text) => {
Some(TextInput::Single(text.to_string()))
}
dynamo_async_openai::types::Prompt::StringArray(texts) => {
Some(TextInput::Batch(texts.to_vec()))
}
_ => None,
......
......@@ -98,27 +98,27 @@ impl std::str::FromStr for FinishReason {
}
}
impl From<FinishReason> for async_openai::types::CompletionFinishReason {
impl From<FinishReason> for dynamo_async_openai::types::CompletionFinishReason {
fn from(reason: FinishReason) -> Self {
match reason {
FinishReason::EoS | FinishReason::Stop | FinishReason::Cancelled => {
async_openai::types::CompletionFinishReason::Stop
dynamo_async_openai::types::CompletionFinishReason::Stop
}
FinishReason::ContentFilter => {
async_openai::types::CompletionFinishReason::ContentFilter
dynamo_async_openai::types::CompletionFinishReason::ContentFilter
}
FinishReason::Length => async_openai::types::CompletionFinishReason::Length,
FinishReason::Error(_) => async_openai::types::CompletionFinishReason::Stop,
FinishReason::Length => dynamo_async_openai::types::CompletionFinishReason::Length,
FinishReason::Error(_) => dynamo_async_openai::types::CompletionFinishReason::Stop,
}
}
}
impl From<async_openai::types::CompletionFinishReason> for FinishReason {
fn from(reason: async_openai::types::CompletionFinishReason) -> Self {
impl From<dynamo_async_openai::types::CompletionFinishReason> for FinishReason {
fn from(reason: dynamo_async_openai::types::CompletionFinishReason) -> Self {
match reason {
async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
async_openai::types::CompletionFinishReason::ContentFilter => {
dynamo_async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
dynamo_async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
dynamo_async_openai::types::CompletionFinishReason::ContentFilter => {
FinishReason::ContentFilter
}
}
......
......@@ -44,7 +44,7 @@ pub use delta::DeltaGenerator;
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionRequest {
#[serde(flatten)]
pub inner: async_openai::types::CreateChatCompletionRequest,
pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,
#[serde(flatten, default)]
pub common: CommonExt,
......@@ -62,7 +62,7 @@ pub struct NvCreateChatCompletionRequest {
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionResponse {
#[serde(flatten)]
pub inner: async_openai::types::CreateChatCompletionResponse,
pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
}
/// A response structure for streamed chat completions, embedding OpenAI's
......@@ -74,7 +74,7 @@ pub struct NvCreateChatCompletionResponse {
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionStreamResponse {
#[serde(flatten)]
pub inner: async_openai::types::CreateChatCompletionStreamResponse,
pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
}
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
......@@ -217,8 +217,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
/// * `None` if no stop conditions are defined.
fn get_stop(&self) -> Option<Vec<String>> {
self.inner.stop.as_ref().map(|stop| match stop {
async_openai::types::Stop::String(s) => vec![s.clone()],
async_openai::types::Stop::StringArray(arr) => arr.clone(),
dynamo_async_openai::types::Stop::String(s) => vec![s.clone()],
dynamo_async_openai::types::Stop::StringArray(arr) => arr.clone(),
})
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment