chore: Bring async-openai into repo as request starter (#2520)

Co-authored-by: Graham King <grahamk@nvidia.com>

chore: Bring async-openai into repo as request starter (#2520)
Co-authored-by: Graham King <grahamk@nvidia.com>
199b9a30 · nachiketb-nvidia · GitHub · 26d9f159 · 199b9a30 · 199b9a30
Unverified Commit 199b9a30 authored Aug 19, 2025 by nachiketb-nvidia Committed by GitHub Aug 19, 2025
20 changed files
--- a/lib/async-openai/tests/bring-your-own-type.rs
+++ b/lib/async-openai/tests/bring-your-own-type.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+#![allow(dead_code)]
+//! The purpose of this test to make sure that all _byot methods compiles with custom types.
+use std::pin::Pin;
+use dynamo_async_openai::{error::OpenAIError, Client};
+use futures::Stream;
+use serde_json::{json, Value};
+impl dynamo_async_openai::traits::AsyncTryFrom<MyJson> for reqwest::multipart::Form {
+    type Error = OpenAIError;
+    async fn try_from(_value: MyJson) -> Result<Self, Self::Error> {
+        Ok(reqwest::multipart::Form::new())
+    }
+}
+#[derive(Clone)]
+pub struct MyJson(Value);
+type MyStreamingType = Pin<Box<dyn Stream<Item = Result<Value, OpenAIError>> + Send>>;
+#[tokio::test]
+async fn test_byot_files() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.files().create_byot(MyJson(json!({}))).await;
+    let _r: Result<Value, OpenAIError> = client.files().list_byot([("limit", "2")]).await;
+    let _r: Result<Value, OpenAIError> = client.files().retrieve_byot("file_id").await;
+    let _r: Result<Value, OpenAIError> = client.files().delete_byot("file_id").await;
+}
+#[tokio::test]
+async fn test_byot_assistants() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.assistants().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.assistants().retrieve_byot("aid").await;
+    let _r: Result<Value, OpenAIError> = client.assistants().update_byot("aid", json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.assistants().list_byot([("limit", 2)]).await;
+}
+#[tokio::test]
+async fn test_byot_models() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.models().list_byot().await;
+    let _r: Result<Value, OpenAIError> = client.models().retrieve_byot("").await;
+    let _r: Result<Value, OpenAIError> = client.models().delete_byot(String::new()).await;
+}
+#[tokio::test]
+async fn test_byot_moderations() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.moderations().create_byot(json!({})).await;
+}
+#[tokio::test]
+async fn test_byot_images() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.images().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.images().create_edit_byot(MyJson(json!({}))).await;
+    let _r: Result<Value, OpenAIError> = client
+        .images()
+        .create_variation_byot(MyJson(json!({})))
+        .await;
+}
+#[tokio::test]
+async fn test_byot_chat() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.chat().create_byot(json!({})).await;
+    let _r: Result<MyStreamingType, OpenAIError> =
+        client.chat().create_stream_byot(json!({})).await;
+}
+#[tokio::test]
+async fn test_byot_completions() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.completions().create_byot(json!({})).await;
+    let _r: Result<MyStreamingType, OpenAIError> =
+        client.completions().create_stream_byot(json!({})).await;
+}
+#[tokio::test]
+async fn test_byot_audio() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.audio().transcribe_byot(MyJson(json!({}))).await;
+    let _r: Result<Value, OpenAIError> = client
+        .audio()
+        .transcribe_verbose_json_byot(MyJson(json!({})))
+        .await;
+    let _r: Result<Value, OpenAIError> = client.audio().translate_byot(MyJson(json!({}))).await;
+    let _r: Result<Value, OpenAIError> = client
+        .audio()
+        .translate_verbose_json_byot(MyJson(json!({})))
+        .await;
+}
+#[tokio::test]
+async fn test_byot_embeddings() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.embeddings().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.embeddings().create_base64_byot(json!({})).await;
+}
+#[tokio::test]
+async fn test_byot_fine_tunning() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.fine_tuning().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client
+        .fine_tuning()
+        .list_paginated_byot([("limit", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .fine_tuning()
+        .retrieve_byot("fine_tunning_job_id")
+        .await;
+    let _r: Result<Value, OpenAIError> =
+        client.fine_tuning().cancel_byot("fine_tuning_job_id").await;
+    let _r: Result<Value, OpenAIError> = client
+        .fine_tuning()
+        .list_events_byot("fine_tuning_job_id", [("limit", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .fine_tuning()
+        .list_checkpoints_byot("fine_tuning_job_id", [("limit", "2")])
+        .await;
+}
+#[derive(Clone, serde::Deserialize)]
+pub struct MyThreadJson(Value);
+impl TryFrom<eventsource_stream::Event> for MyThreadJson {
+    type Error = OpenAIError;
+    fn try_from(_value: eventsource_stream::Event) -> Result<Self, Self::Error> {
+        Ok(MyThreadJson(json!({})))
+    }
+}
+type MyThreadStreamingType = Pin<Box<dyn Stream<Item = Result<MyThreadJson, OpenAIError>> + Send>>;
+#[tokio::test]
+async fn test_byot_threads() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.threads().create_and_run_byot(json!({})).await;
+    let _r: Result<MyThreadStreamingType, OpenAIError> =
+        client.threads().create_and_run_stream_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.threads().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.threads().retrieve_byot("thread_id").await;
+    let _r: Result<Value, OpenAIError> = client.threads().update_byot("thread_id", json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.threads().delete_byot("thread_id").await;
+}
+#[tokio::test]
+async fn test_byot_messages() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .messages("thread_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .messages("thread_id")
+        .retrieve_byot("message_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .messages("thread_id")
+        .update_byot("message_id", json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .messages("thread_id")
+        .list_byot([("limit", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .messages("thread_id")
+        .delete_byot("message_id")
+        .await;
+}
+#[tokio::test]
+async fn test_byot_runs() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<MyThreadStreamingType, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .create_stream_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .retrieve_byot("run_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .update_byot("run_id", json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .list_byot([("limit", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .submit_tool_outputs_byot("run_id", json!({}))
+        .await;
+    let _r: Result<MyThreadStreamingType, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .submit_tool_outputs_stream_byot("run_id", json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .cancel_byot("run_id")
+        .await;
+}
+#[tokio::test]
+async fn test_byot_run_steps() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .steps("run_id")
+        .retrieve_byot("step_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .threads()
+        .runs("thread_id")
+        .steps("run_id")
+        .list_byot([("limit", "2")])
+        .await;
+}
+#[tokio::test]
+async fn test_byot_vector_store_files() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .files("vector_store_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .files("vector_store_id")
+        .retrieve_byot("file_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .files("vector_store_id")
+        .delete_byot("file_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .files("vector_store_id")
+        .list_byot([("limit", "2")])
+        .await;
+}
+#[tokio::test]
+async fn test_byot_vector_store_file_batches() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .file_batches("vector_store_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .file_batches("vector_store_id")
+        .retrieve_byot("file_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .file_batches("vector_store_id")
+        .cancel_byot("file_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .vector_stores()
+        .file_batches("vector_store_id")
+        .list_byot("batch_id", [("limit", "2")])
+        .await;
+}
+#[tokio::test]
+async fn test_byot_batches() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.batches().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.batches().list_byot([("limit", "2")]).await;
+    let _r: Result<Value, OpenAIError> = client.batches().retrieve_byot("batch_id").await;
+    let _r: Result<Value, OpenAIError> = client.batches().cancel_byot("batch_id").await;
+}
+#[tokio::test]
+async fn test_byot_audit_logs() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.audit_logs().get_byot([("limit", "2")]).await;
+}
+#[tokio::test]
+async fn test_byot_invites() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.invites().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.invites().retrieve_byot("invite_id").await;
+    let _r: Result<Value, OpenAIError> = client.invites().delete_byot("invite_id").await;
+    let _r: Result<Value, OpenAIError> = client.invites().list_byot([("limit", "2")]).await;
+}
+#[tokio::test]
+async fn test_byot_projects() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.projects().list_byot([("limit", "2")]).await;
+    let _r: Result<Value, OpenAIError> = client.projects().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.projects().retrieve_byot("project_id").await;
+    let _r: Result<Value, OpenAIError> =
+        client.projects().modify_byot("project_id", json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.projects().archive_byot("project_id").await;
+}
+#[tokio::test]
+async fn test_byot_project_api_keys() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .api_keys("project_id")
+        .list_byot([("query", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .api_keys("project_id")
+        .retrieve_byot("api_key")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .api_keys("project_id")
+        .delete_byot("api_key")
+        .await;
+}
+#[tokio::test]
+async fn test_byot_project_service_accounts() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .service_accounts("project_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .service_accounts("project_id")
+        .delete_byot("service_account_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .service_accounts("project_id")
+        .retrieve_byot("service_account_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .service_accounts("project_id")
+        .list_byot([("limit", "2")])
+        .await;
+}
+#[tokio::test]
+async fn test_byot_project_users() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .users("project_id")
+        .create_byot(json!({}))
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .users("project_id")
+        .delete_byot("user_id")
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .users("project_id")
+        .list_byot([("limit", "2")])
+        .await;
+    let _r: Result<Value, OpenAIError> = client
+        .projects()
+        .users("project_id")
+        .retrieve_byot("user_id")
+        .await;
+}
+#[tokio::test]
+async fn test_byot_uploads() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.uploads().create_byot(json!({})).await;
+    let _r: Result<Value, OpenAIError> = client
+        .uploads()
+        .add_part_byot("upload_id", MyJson(json!({})))
+        .await;
+    let _r: Result<Value, OpenAIError> =
+        client.uploads().complete_byot("upload_id", json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.uploads().cancel_byot("upload_id").await;
+}
+#[tokio::test]
+async fn test_byot_users() {
+    let client = Client::new();
+    let _r: Result<Value, OpenAIError> = client.users().list_byot([("limit", "2")]).await;
+    let _r: Result<Value, OpenAIError> = client.users().modify_byot("user_id", json!({})).await;
+    let _r: Result<Value, OpenAIError> = client.users().retrieve_byot("user_id").await;
+    let _r: Result<Value, OpenAIError> = client.users().delete_byot("user_id").await;
+}
--- a/lib/async-openai/tests/completion.rs
+++ b/lib/async-openai/tests/completion.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+//! This test is primarily to make sure that macros_rules for From traits are correct.
+use dynamo_async_openai::types::Prompt;
+fn prompt_input<T>(input: T) -> Prompt
+where
+    Prompt: From<T>,
+{
+    input.into()
+}
+#[test]
+fn create_prompt_input() {
+    let prompt = "This is &str prompt";
+    let _ = prompt_input(prompt);
+    let prompt = "This is String".to_string();
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let prompt = vec!["This is first", "This is second"];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let prompt = vec!["First string".to_string(), "Second string".to_string()];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let first = "First".to_string();
+    let second = "Second".to_string();
+    let prompt = vec![&first, &second];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let prompt = ["first", "second"];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let prompt = ["first".to_string(), "second".to_string()];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+    let first = "First".to_string();
+    let second = "Second".to_string();
+    let prompt = [&first, &second];
+    let _ = prompt_input(&prompt);
+    let _ = prompt_input(prompt);
+}
--- a/lib/async-openai/tests/embeddings.rs
+++ b/lib/async-openai/tests/embeddings.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+//! This test is primarily to make sure that macros_rules for From traits are correct.
+use dynamo_async_openai::types::EmbeddingInput;
+fn embedding_input<T>(input: T) -> EmbeddingInput
+where
+    EmbeddingInput: From<T>,
+{
+    input.into()
+}
+#[test]
+fn create_embedding_input() {
+    let input = [1, 2, 3];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let input = [[1, 2, 3], [4, 5, 6], [7, 8, 9]];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let (s1, s2, s3) = ([1, 2, 3], [4, 5, 6], [7, 8, 9]);
+    let input = [&s1, &s2, &s3];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let input = vec![1, 2, 3];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let input = vec![[1, 2, 3], [4, 5, 6], [7, 8, 9]];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let input = vec![vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let input = [vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+    let (v1, v2, v3) = (vec![1, 2, 3], vec![4, 5, 6, 7], vec![8, 9, 10, 11, 12]);
+    let input = [&v1, &v2, &v3];
+    let _ = embedding_input(&input);
+    let _ = embedding_input(input);
+}
--- a/lib/async-openai/tests/ser_de.rs
+++ b/lib/async-openai/tests/ser_de.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use dynamo_async_openai::types::{
+    ChatCompletionRequestSystemMessageArgs, ChatCompletionRequestUserMessageArgs,
+    CreateChatCompletionRequest, CreateChatCompletionRequestArgs,
+};
+#[tokio::test]
+async fn chat_types_serde() {
+    let request: CreateChatCompletionRequest = CreateChatCompletionRequestArgs::default()
+        .messages([
+            ChatCompletionRequestSystemMessageArgs::default()
+                .content("your are a calculator")
+                .build()
+                .unwrap()
+                .into(),
+            ChatCompletionRequestUserMessageArgs::default()
+                .content("what is the result of 1+1")
+                .build()
+                .unwrap()
+                .into(),
+        ])
+        .build()
+        .unwrap();
+    // serialize the request
+    let serialized = serde_json::to_string(&request).unwrap();
+    // deserialize the request
+    let deserialized: CreateChatCompletionRequest = serde_json::from_str(&serialized).unwrap();
+    assert_eq!(request, deserialized);
+}
--- a/lib/async-openai/tests/whisper.rs
+++ b/lib/async-openai/tests/whisper.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use dynamo_async_openai::types::CreateTranslationRequestArgs;
+use dynamo_async_openai::{types::CreateTranscriptionRequestArgs, Client};
+use tokio_test::assert_err;
+#[tokio::test]
+async fn transcribe_test() {
+    let client = Client::new();
+    let request = CreateTranscriptionRequestArgs::default().build().unwrap();
+    let response = client.audio().transcribe(request).await;
+    assert_err!(response); // FileReadError("cannot extract file name from ")
+}
+#[tokio::test]
+async fn transcribe_sendable_test() {
+    let client = Client::new();
+    // https://github.com/64bit/async-openai/issues/140
+    let transcribe = tokio::spawn(async move {
+        let request = CreateTranscriptionRequestArgs::default().build().unwrap();
+        client.audio().transcribe(request).await
+    });
+    let response = transcribe.await.unwrap();
+    assert_err!(response); // FileReadError("cannot extract file name from ")
+}
+#[tokio::test]
+async fn translate_test() {
+    let client = Client::new();
+    let request = CreateTranslationRequestArgs::default().build().unwrap();
+    let response = client.audio().translate(request).await;
+    assert_err!(response); // FileReadError("cannot extract file name from ")
+}
+#[tokio::test]
+async fn translate_sendable_test() {
+    let client = Client::new();
+    // https://github.com/64bit/async-openai/issues/140
+    let translate = tokio::spawn(async move {
+        let request = CreateTranslationRequestArgs::default().build().unwrap();
+        client.audio().translate(request).await
+    });
+    let response = translate.await.unwrap();
+    assert_err!(response); // FileReadError("cannot extract file name from ")
+}
--- a/lib/bindings/python/Cargo.toml
+++ b/lib/bindings/python/Cargo.toml
@@ -42,7 +42,7 @@ dynamo-llm = { path = "../../llm" }
 dynamo-runtime = { path = "../../runtime" }
 anyhow = { version = "1" }
-async-openai = { version = "0.29.0" }
+dynamo-async-openai = { path = "../../async-openai" }
 async-stream = { version = "0.3" }
 async-trait = { version = "0.1" }
 derive-getters = "0.5"

--- a/lib/engines/mistralrs/Cargo.toml
+++ b/lib/engines/mistralrs/Cargo.toml
@@ -34,7 +34,7 @@ dynamo-runtime = { workspace = true }
 dynamo-llm = { workspace = true }
 anyhow = { workspace = true }
-async-openai = { workspace = true }
+dynamo-async-openai = { workspace = true }
 async-stream = { workspace = true }
 async-trait = { workspace = true }
 either = { workspace = true }

--- a/lib/engines/mistralrs/src/lib.rs
+++ b/lib/engines/mistralrs/src/lib.rs
@@ -4,9 +4,9 @@
 use std::collections::HashMap;
 use std::{num::NonZero, sync::Arc};
-use async_openai::types::FinishReason;
 use async_stream::stream;
 use async_trait::async_trait;
+use dynamo_async_openai::types::FinishReason;
 use either::Either;
 use indexmap::IndexMap;
 use mistralrs::{
@@ -277,10 +277,10 @@ impl
        let mut messages = vec![];
        for m in request.inner.messages {
-            let async_openai::types::ChatCompletionRequestMessage::User(inner_m) = m else {
+            let dynamo_async_openai::types::ChatCompletionRequestMessage::User(inner_m) = m else {
                continue;
            };
-            let async_openai::types::ChatCompletionRequestUserMessageContent::Text(content) =
+            let dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(content) =
                inner_m.content
            else {
                anyhow::bail!("Only Text type chat completion supported");
@@ -396,13 +396,13 @@ impl
                        //tracing::trace!("from_assistant: {from_assistant}");
                        #[allow(deprecated)]
-                        let inner = async_openai::types::CreateChatCompletionStreamResponse{
+                        let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse{
                            id: c.id,
-                            choices: vec![async_openai::types::ChatChoiceStream{
+                            choices: vec![dynamo_async_openai::types::ChatChoiceStream{
                                index: 0,
-                                delta: async_openai::types::ChatCompletionStreamResponseDelta{
+                                delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta{
                                    //role: c.choices[0].delta.role,
-                                    role: Some(async_openai::types::Role::Assistant),
+                                    role: Some(dynamo_async_openai::types::Role::Assistant),
                                    content: Some(from_assistant),
                                    tool_calls: None,
                                    refusal: None,
@@ -441,10 +441,10 @@ impl
 }
 /// openai stop tokens to mistralrs stop tokens
-fn to_stop_tokens(t: async_openai::types::Stop) -> StopTokens {
+fn to_stop_tokens(t: dynamo_async_openai::types::Stop) -> StopTokens {
    match t {
-        async_openai::types::Stop::String(s) => StopTokens::Seqs(vec![s]),
+        dynamo_async_openai::types::Stop::String(s) => StopTokens::Seqs(vec![s]),
-        async_openai::types::Stop::StringArray(v) => StopTokens::Seqs(v),
+        dynamo_async_openai::types::Stop::StringArray(v) => StopTokens::Seqs(v),
    }
 }

--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -47,7 +47,7 @@ dynamo-runtime = { workspace = true }
 # workspace
 anyhow = { workspace = true }
-async-openai = { workspace = true }
+dynamo-async-openai = { workspace = true }
 async-stream = { workspace = true }
 async-trait = { workspace = true }
 async-nats = { workspace = true }

--- a/lib/llm/src/engines.rs
+++ b/lib/llm/src/engines.rs
@@ -188,11 +188,11 @@ impl
        let req = request.inner.messages.into_iter().next_back().unwrap();
        let prompt = match req {
-            async_openai::types::ChatCompletionRequestMessage::User(user_msg) => {
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(user_msg) => {
                match user_msg.content {
-                    async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt) => {
+                    dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
-                        prompt
+                        prompt,
-                    }
+                    ) => prompt,
                    _ => anyhow::bail!("Invalid request content field, expected Content::Text"),
                }
            }
@@ -212,7 +212,7 @@ impl
                id += 1;
            }
-            let inner = deltas.create_choice(0, None, Some(async_openai::types::FinishReason::Stop), None);
+            let inner = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
            let response = NvCreateChatCompletionStreamResponse {
                inner,
            };
@@ -247,7 +247,7 @@ impl
                yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
                id += 1;
            }
-            let response = deltas.create_choice(0, None, Some(async_openai::types::CompletionFinishReason::Stop), None);
+            let response = deltas.create_choice(0, None, Some(dynamo_async_openai::types::CompletionFinishReason::Stop), None);
            yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
        };

--- a/lib/llm/src/entrypoint/input/batch.rs
+++ b/lib/llm/src/entrypoint/input/batch.rs
@@ -7,7 +7,7 @@ use crate::types::openai::chat_completions::{
    NvCreateChatCompletionRequest, OpenAIChatCompletionsStreamingEngine,
 };
 use anyhow::Context as _;
-use async_openai::types::FinishReason;
+use dynamo_async_openai::types::FinishReason;
 use dynamo_runtime::{pipeline::Context, runtime::CancellationToken, Runtime};
 use futures::StreamExt;
 use serde::{Deserialize, Serialize};
@@ -199,15 +199,15 @@ async fn evaluate(
    entry: &mut Entry,
    template: Option<Arc<RequestTemplate>>,
 ) -> anyhow::Result<String> {
-    let user_message = async_openai::types::ChatCompletionRequestMessage::User(
+    let user_message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
-        async_openai::types::ChatCompletionRequestUserMessage {
+        dynamo_async_openai::types::ChatCompletionRequestUserMessage {
-            content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                entry.text.clone(),
            ),
            name: None,
        },
    );
-    let inner = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let inner = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .messages(vec![user_message])
        .model(
            template

--- a/lib/llm/src/entrypoint/input/text.rs
+++ b/lib/llm/src/entrypoint/input/text.rs
@@ -81,15 +81,17 @@ async fn main_loop(
        };
        // Construct messages
-        let user_message = async_openai::types::ChatCompletionRequestMessage::User(
+        let user_message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
-            async_openai::types::ChatCompletionRequestUserMessage {
+            dynamo_async_openai::types::ChatCompletionRequestUserMessage {
-                content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(prompt),
+                content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+                    prompt,
+                ),
                name: None,
            },
        );
        messages.push(user_message);
        // Request
-        let inner = async_openai::types::CreateChatCompletionRequestArgs::default()
+        let inner = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
            .messages(messages.clone())
            .model(
                template
@@ -170,12 +172,12 @@ async fn main_loop(
        println!();
        let assistant_content =
-            async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestAssistantMessageContent::Text(
                assistant_message,
            );
-        let assistant_message = async_openai::types::ChatCompletionRequestMessage::Assistant(
+        let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant(
-            async_openai::types::ChatCompletionRequestAssistantMessage {
+            dynamo_async_openai::types::ChatCompletionRequestAssistantMessage {
                content: Some(assistant_content),
                ..Default::default()
            },

--- a/lib/llm/src/http/client.rs
+++ b/lib/llm/src/http/client.rs
@@ -12,9 +12,9 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Instant;
-use async_openai::{config::OpenAIConfig, error::OpenAIError, Client};
 use async_trait::async_trait;
 use derive_getters::Dissolve;
+use dynamo_async_openai::{config::OpenAIConfig, error::OpenAIError, Client};
 use futures::Stream;
 use serde_json::Value;
 use tokio_util::sync::CancellationToken;
@@ -234,7 +234,7 @@ pub type ByotResponseStream = DataStream<Result<Value, OpenAIError>>;
 /// Type alias for pure OpenAI chat response stream
 pub type OpenAIChatResponseStream =
-    DataStream<Result<async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>>;
+    DataStream<Result<dynamo_async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>>;
 /// A wrapped HTTP response stream that combines a stream with its context
 /// This provides a unified interface for HTTP client responses
@@ -330,7 +330,7 @@ pub type ByotHttpResponseStream = HttpResponseStream<Result<Value, OpenAIError>>
 /// Type alias for HttpResponseStream with pure OpenAI responses
 pub type OpenAIHttpResponseStream = HttpResponseStream<
-    Result<async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>,
+    Result<dynamo_async_openai::types::CreateChatCompletionStreamResponse, OpenAIError>,
 >;
 /// Pure OpenAI client using standard async-openai types
@@ -350,7 +350,7 @@ impl PureOpenAIClient {
    /// Uses a client-managed context
    pub async fn chat_stream(
        &self,
-        request: async_openai::types::CreateChatCompletionRequest,
+        request: dynamo_async_openai::types::CreateChatCompletionRequest,
    ) -> Result<OpenAIHttpResponseStream, HttpClientError> {
        let ctx = self.base.create_context();
        self.chat_stream_with_context(request, ctx).await
@@ -359,7 +359,7 @@ impl PureOpenAIClient {
    /// Create streaming chat completions with a custom context
    pub async fn chat_stream_with_context(
        &self,
-        request: async_openai::types::CreateChatCompletionRequest,
+        request: dynamo_async_openai::types::CreateChatCompletionRequest,
        context: HttpRequestContext,
    ) -> Result<OpenAIHttpResponseStream, HttpClientError> {
        let ctx_arc: Arc<dyn AsyncEngineContext> = Arc::new(context.clone());

--- a/lib/llm/src/http/service/openai.rs
+++ b/lib/llm/src/http/service/openai.rs
@@ -763,7 +763,7 @@ pub fn validate_response_input_is_text_only(
    request: &NvCreateResponse,
 ) -> Option<impl IntoResponse> {
    match &request.inner.input {
-        async_openai::types::responses::Input::Text(_) => None,
+        dynamo_async_openai::types::responses::Input::Text(_) => None,
        _ => Some(ErrorMessage::not_implemented_error("Only `Input::Text` is supported. Structured, multimedia, or custom input types are not yet implemented.")),
    }
 }
@@ -1069,12 +1069,12 @@ pub fn responses_router(
 mod tests {
    use std::collections::HashMap;
-    use async_openai::types::responses::{
+    use dynamo_async_openai::types::responses::{
        CreateResponse, Input, InputContent, InputItem, InputMessage, PromptConfig,
        Role as ResponseRole, ServiceTier, TextConfig, TextResponseFormat, ToolChoice,
        ToolChoiceMode, Truncation,
    };
-    use async_openai::types::{
+    use dynamo_async_openai::types::{
        ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
        ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
    };

--- a/lib/llm/src/perf/logprobs.rs
+++ b/lib/llm/src/perf/logprobs.rs
@@ -572,7 +572,7 @@ mod tests {
    use crate::protocols::codec::create_message_stream;
    use crate::protocols::convert_sse_stream;
    use approx::assert_abs_diff_eq;
-    use async_openai::types::{
+    use dynamo_async_openai::types::{
        ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
        ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
        TopLogprobs,
@@ -1556,7 +1556,7 @@ mod tests {
    fn create_mock_response() -> NvCreateChatCompletionStreamResponse {
        // Create a mock response for testing
        // In practice, this would have real logprobs data
-        use async_openai::types::CreateChatCompletionStreamResponse;
+        use dynamo_async_openai::types::CreateChatCompletionStreamResponse;
        let inner = CreateChatCompletionStreamResponse {
            id: "test_id".to_string(),

--- a/lib/llm/src/postprocessor/tool_calling/tools.rs
+++ b/lib/llm/src/postprocessor/tool_calling/tools.rs
@@ -14,7 +14,7 @@ pub use super::parsers::{detect_and_parse_tool_call, ToolCallConfig};
 pub fn try_tool_call_parse_aggregate(
    message: &str,
    parser_str: Option<&str>,
-) -> anyhow::Result<Vec<async_openai::types::ChatCompletionMessageToolCall>> {
+) -> anyhow::Result<Vec<dynamo_async_openai::types::ChatCompletionMessageToolCall>> {
    let parsed = detect_and_parse_tool_call(message, parser_str)?;
    if parsed.is_empty() {
        return Ok(vec![]);
@@ -22,10 +22,10 @@ pub fn try_tool_call_parse_aggregate(
    Ok(parsed
        .into_iter()
        .map(
-            |parsed| async_openai::types::ChatCompletionMessageToolCall {
+            |parsed| dynamo_async_openai::types::ChatCompletionMessageToolCall {
                id: parsed.id,
-                r#type: async_openai::types::ChatCompletionToolType::Function,
+                r#type: dynamo_async_openai::types::ChatCompletionToolType::Function,
-                function: async_openai::types::FunctionCall {
+                function: dynamo_async_openai::types::FunctionCall {
                    name: parsed.function.name,
                    arguments: parsed.function.arguments,
                },
@@ -40,7 +40,7 @@ pub fn try_tool_call_parse_aggregate(
 pub fn try_tool_call_parse_stream(
    message: &str,
    parser_str: Option<&str>,
-) -> anyhow::Result<Vec<async_openai::types::ChatCompletionMessageToolCallChunk>> {
+) -> anyhow::Result<Vec<dynamo_async_openai::types::ChatCompletionMessageToolCallChunk>> {
    let parsed = detect_and_parse_tool_call(message, parser_str)?;
    if parsed.is_empty() {
        return Ok(vec![]);
@@ -49,11 +49,11 @@ pub fn try_tool_call_parse_stream(
        .into_iter()
        .enumerate()
        .map(
-            |(idx, parsed)| async_openai::types::ChatCompletionMessageToolCallChunk {
+            |(idx, parsed)| dynamo_async_openai::types::ChatCompletionMessageToolCallChunk {
                index: idx as u32,
                id: Some(parsed.id),
-                r#type: Some(async_openai::types::ChatCompletionToolType::Function),
+                r#type: Some(dynamo_async_openai::types::ChatCompletionToolType::Function),
-                function: Some(async_openai::types::FunctionCallStream {
+                function: Some(dynamo_async_openai::types::FunctionCallStream {
                    name: Some(parsed.function.name),
                    arguments: Some(parsed.function.arguments),
                }),

--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -15,7 +15,7 @@ pub mod prompt;
 pub mod tools;
 use anyhow::Result;
-use async_openai::types::EncodingFormat;
+use dynamo_async_openai::types::EncodingFormat;
 use futures::stream::{self, StreamExt};
 use prompt::OAIPromptFormatter;
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
@@ -276,11 +276,11 @@ impl OpenAIPreprocessor {
        let mut builder = PreprocessedEmbeddingRequest::builder();
        let all_token_ids = match &request.inner.input {
-            async_openai::types::EmbeddingInput::String(s) => {
+            dynamo_async_openai::types::EmbeddingInput::String(s) => {
                let encoding = self.tokenizer.encode(s)?;
                vec![encoding.token_ids().to_vec()]
            }
-            async_openai::types::EmbeddingInput::StringArray(arr) => {
+            dynamo_async_openai::types::EmbeddingInput::StringArray(arr) => {
                let input_strs: Vec<String> = arr.to_vec();
                let encodings = tokio::task::spawn_blocking({
                    let tokenizer = self.tokenizer.clone();
@@ -296,8 +296,10 @@ impl OpenAIPreprocessor {
                    .collect();
                token_arrays
            }
-            async_openai::types::EmbeddingInput::IntegerArray(token_ids) => vec![token_ids.clone()],
+            dynamo_async_openai::types::EmbeddingInput::IntegerArray(token_ids) => {
-            async_openai::types::EmbeddingInput::ArrayOfIntegerArray(token_arrays) => {
+                vec![token_ids.clone()]
+            }
+            dynamo_async_openai::types::EmbeddingInput::ArrayOfIntegerArray(token_arrays) => {
                token_arrays.clone()
            }
        };
@@ -437,11 +439,11 @@ impl OpenAIPreprocessor {
        let transformed_stream = stream.map(move |output| {
            output.map_data(|engine_output| {
                // Convert engine output to OpenAI response format
-                let embeddings: Vec<async_openai::types::Embedding> = engine_output
+                let embeddings: Vec<dynamo_async_openai::types::Embedding> = engine_output
                    .embeddings
                    .into_iter()
                    .enumerate()
-                    .map(|(index, embedding)| async_openai::types::Embedding {
+                    .map(|(index, embedding)| dynamo_async_openai::types::Embedding {
                        index: index as u32,
                        object: "embedding".to_string(),
                        embedding: embedding.into_iter().map(|f| f as f32).collect(),
@@ -449,11 +451,11 @@ impl OpenAIPreprocessor {
                    .collect();
                let response = NvCreateEmbeddingResponse {
-                    inner: async_openai::types::CreateEmbeddingResponse {
+                    inner: dynamo_async_openai::types::CreateEmbeddingResponse {
                        object: "list".to_string(),
                        model: original_request.inner.model.clone(),
                        data: embeddings,
-                        usage: async_openai::types::EmbeddingUsage {
+                        usage: dynamo_async_openai::types::EmbeddingUsage {
                            prompt_tokens: engine_output.prompt_tokens,
                            total_tokens: engine_output.total_tokens,
                        },

--- a/lib/llm/src/preprocessor/prompt/template/oai.rs
+++ b/lib/llm/src/preprocessor/prompt/template/oai.rs
@@ -53,7 +53,7 @@ impl OAIChatLikeRequest for NvCreateChatCompletionRequest {
        if let Some(last) = self.inner.messages.last() {
            matches!(
                last,
-                async_openai::types::ChatCompletionRequestMessage::User(_)
+                dynamo_async_openai::types::ChatCompletionRequestMessage::User(_)
            )
        } else {
            true
@@ -70,9 +70,9 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
        self.inner.model.clone()
    }
    fn messages(&self) -> minijinja::value::Value {
-        let message = async_openai::types::ChatCompletionRequestMessage::User(
+        let message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
-            async_openai::types::ChatCompletionRequestUserMessage {
+            dynamo_async_openai::types::ChatCompletionRequestUserMessage {
-                content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+                content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                    crate::protocols::openai::completions::prompt_to_string(&self.inner.prompt),
                ),
                name: None,
@@ -88,16 +88,16 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
    fn prompt_input_type(&self) -> PromptInput {
        match &self.inner.prompt {
-            async_openai::types::Prompt::IntegerArray(_) => {
+            dynamo_async_openai::types::Prompt::IntegerArray(_) => {
                PromptInput::Tokens(TokenInput::Single(vec![]))
            }
-            async_openai::types::Prompt::ArrayOfIntegerArray(_) => {
+            dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(_) => {
                PromptInput::Tokens(TokenInput::Batch(vec![]))
            }
-            async_openai::types::Prompt::String(_) => {
+            dynamo_async_openai::types::Prompt::String(_) => {
                PromptInput::Text(TextInput::Single(String::new()))
            }
-            async_openai::types::Prompt::StringArray(_) => {
+            dynamo_async_openai::types::Prompt::StringArray(_) => {
                PromptInput::Text(TextInput::Batch(vec![]))
            }
        }
@@ -105,10 +105,10 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
    fn extract_tokens(&self) -> Option<TokenInput> {
        match &self.inner.prompt {
-            async_openai::types::Prompt::IntegerArray(tokens) => {
+            dynamo_async_openai::types::Prompt::IntegerArray(tokens) => {
                Some(TokenInput::Single(tokens.clone()))
            }
-            async_openai::types::Prompt::ArrayOfIntegerArray(arrays) => {
+            dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arrays) => {
                Some(TokenInput::Batch(arrays.clone()))
            }
            _ => None,
@@ -117,8 +117,10 @@ impl OAIChatLikeRequest for NvCreateCompletionRequest {
    fn extract_text(&self) -> Option<TextInput> {
        match &self.inner.prompt {
-            async_openai::types::Prompt::String(text) => Some(TextInput::Single(text.to_string())),
+            dynamo_async_openai::types::Prompt::String(text) => {
-            async_openai::types::Prompt::StringArray(texts) => {
+                Some(TextInput::Single(text.to_string()))
+            }
+            dynamo_async_openai::types::Prompt::StringArray(texts) => {
                Some(TextInput::Batch(texts.to_vec()))
            }
            _ => None,

--- a/lib/llm/src/protocols/common.rs
+++ b/lib/llm/src/protocols/common.rs
@@ -98,27 +98,27 @@ impl std::str::FromStr for FinishReason {
    }
 }
-impl From<FinishReason> for async_openai::types::CompletionFinishReason {
+impl From<FinishReason> for dynamo_async_openai::types::CompletionFinishReason {
    fn from(reason: FinishReason) -> Self {
        match reason {
            FinishReason::EoS | FinishReason::Stop | FinishReason::Cancelled => {
-                async_openai::types::CompletionFinishReason::Stop
+                dynamo_async_openai::types::CompletionFinishReason::Stop
            }
            FinishReason::ContentFilter => {
-                async_openai::types::CompletionFinishReason::ContentFilter
+                dynamo_async_openai::types::CompletionFinishReason::ContentFilter
            }
-            FinishReason::Length => async_openai::types::CompletionFinishReason::Length,
+            FinishReason::Length => dynamo_async_openai::types::CompletionFinishReason::Length,
-            FinishReason::Error(_) => async_openai::types::CompletionFinishReason::Stop,
+            FinishReason::Error(_) => dynamo_async_openai::types::CompletionFinishReason::Stop,
        }
    }
 }
-impl From<async_openai::types::CompletionFinishReason> for FinishReason {
+impl From<dynamo_async_openai::types::CompletionFinishReason> for FinishReason {
-    fn from(reason: async_openai::types::CompletionFinishReason) -> Self {
+    fn from(reason: dynamo_async_openai::types::CompletionFinishReason) -> Self {
        match reason {
-            async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
+            dynamo_async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
-            async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
+            dynamo_async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
-            async_openai::types::CompletionFinishReason::ContentFilter => {
+            dynamo_async_openai::types::CompletionFinishReason::ContentFilter => {
                FinishReason::ContentFilter
            }
        }

--- a/lib/llm/src/protocols/openai/chat_completions.rs
+++ b/lib/llm/src/protocols/openai/chat_completions.rs
@@ -44,7 +44,7 @@ pub use delta::DeltaGenerator;
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateChatCompletionRequest {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateChatCompletionRequest,
+    pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,
    #[serde(flatten, default)]
    pub common: CommonExt,
@@ -62,7 +62,7 @@ pub struct NvCreateChatCompletionRequest {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateChatCompletionResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateChatCompletionResponse,
+    pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
 }
 /// A response structure for streamed chat completions, embedding OpenAI's
@@ -74,7 +74,7 @@ pub struct NvCreateChatCompletionResponse {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateChatCompletionStreamResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateChatCompletionStreamResponse,
+    pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
 }
 /// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
@@ -217,8 +217,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
    /// * `None` if no stop conditions are defined.
    fn get_stop(&self) -> Option<Vec<String>> {
        self.inner.stop.as_ref().map(|stop| match stop {
-            async_openai::types::Stop::String(s) => vec![s.clone()],
+            dynamo_async_openai::types::Stop::String(s) => vec![s.clone()],
-            async_openai::types::Stop::StringArray(arr) => arr.clone(),
+            dynamo_async_openai::types::Stop::StringArray(arr) => arr.clone(),
        })
    }