chore: Bring async-openai into repo as request starter (#2520)

Co-authored-by: Graham King <grahamk@nvidia.com>

chore: Bring async-openai into repo as request starter (#2520)
Co-authored-by: Graham King <grahamk@nvidia.com>
199b9a30 · nachiketb-nvidia · GitHub · 26d9f159 · 199b9a30 · 199b9a30
Unverified Commit 199b9a30 authored Aug 19, 2025 by nachiketb-nvidia Committed by GitHub Aug 19, 2025
14 changed files
--- a/lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/aggregator.rs
@@ -35,7 +35,7 @@ pub struct DeltaAggregator {
    /// Timestamp (Unix epoch) indicating when the response was created.
    created: u32,
    /// Optional usage statistics for the completion request.
-    usage: Option<async_openai::types::CompletionUsage>,
+    usage: Option<dynamo_async_openai::types::CompletionUsage>,
    /// Optional system fingerprint for version tracking.
    system_fingerprint: Option<String>,
    /// Map of incremental response choices, keyed by index.
@@ -43,7 +43,7 @@ pub struct DeltaAggregator {
    /// Optional error message if an error occurs during aggregation.
    error: Option<String>,
    /// Optional service tier information for the response.
-    service_tier: Option<async_openai::types::ServiceTierResponse>,
+    service_tier: Option<dynamo_async_openai::types::ServiceTierResponse>,
 }

 /// Represents the accumulated state of a single chat choice during streaming aggregation.
@@ -53,13 +53,13 @@ struct DeltaChoice {
    /// The accumulated text content for the choice.
    text: String,
    /// The role associated with this message (e.g., `system`, `user`, `assistant`).
-    role: Option<async_openai::types::Role>,
+    role: Option<dynamo_async_openai::types::Role>,
    /// The reason the completion was finished (if applicable).
-    finish_reason: Option<async_openai::types::FinishReason>,
+    finish_reason: Option<dynamo_async_openai::types::FinishReason>,
    /// Optional log probabilities for the chat choice.
-    logprobs: Option<async_openai::types::ChatChoiceLogprobs>,
+    logprobs: Option<dynamo_async_openai::types::ChatChoiceLogprobs>,
    // Optional tool calls for the chat choice.
-    tool_calls: Option<Vec<async_openai::types::ChatCompletionMessageToolCall>>,
+    tool_calls: Option<Vec<dynamo_async_openai::types::ChatCompletionMessageToolCall>>,
 }

 impl Default for DeltaAggregator {
@@ -182,7 +182,8 @@ impl DeltaAggregator {
                    }
                    choice.tool_calls = Some(tool_calls);
                    choice.text.clear();
-                    choice.finish_reason = Some(async_openai::types::FinishReason::ToolCalls);
+                    choice.finish_reason =
+                        Some(dynamo_async_openai::types::FinishReason::ToolCalls);
                }
            }
        }
@@ -191,13 +192,13 @@ impl DeltaAggregator {
        let mut choices: Vec<_> = aggregator
            .choices
            .into_values()
-            .map(async_openai::types::ChatChoice::from)
+            .map(dynamo_async_openai::types::ChatChoice::from)
            .collect();

        choices.sort_by(|a, b| a.index.cmp(&b.index));

        // Construct the final response object.
-        let inner = async_openai::types::CreateChatCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateChatCompletionResponse {
            id: aggregator.id,
            created: aggregator.created,
            usage: aggregator.usage,
@@ -215,14 +216,14 @@ impl DeltaAggregator {
 }

 #[allow(deprecated)]
-impl From<DeltaChoice> for async_openai::types::ChatChoice {
-    /// Converts a [`DeltaChoice`] into an [`async_openai::types::ChatChoice`].
+impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
+    /// Converts a [`DeltaChoice`] into an [`dynamo_async_openai::types::ChatChoice`].
    ///
    /// # Note
    /// The `function_call` field is deprecated.
    fn from(delta: DeltaChoice) -> Self {
-        async_openai::types::ChatChoice {
-            message: async_openai::types::ChatCompletionResponseMessage {
+        dynamo_async_openai::types::ChatChoice {
+            message: dynamo_async_openai::types::ChatCompletionResponseMessage {
                role: delta.role.expect("delta should have a Role"),
                content: if delta.tool_calls.is_some() {
                    None
@@ -282,25 +283,25 @@ mod tests {
    fn create_test_delta(
        index: u32,
        text: &str,
-        role: Option<async_openai::types::Role>,
-        finish_reason: Option<async_openai::types::FinishReason>,
+        role: Option<dynamo_async_openai::types::Role>,
+        finish_reason: Option<dynamo_async_openai::types::FinishReason>,
    ) -> Annotated<NvCreateChatCompletionStreamResponse> {
        // ALLOW: function_call is deprecated
-        let delta = async_openai::types::ChatCompletionStreamResponseDelta {
+        let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
            content: Some(text.to_string()),
            function_call: None,
            tool_calls: None,
            role,
            refusal: None,
        };
-        let choice = async_openai::types::ChatChoiceStream {
+        let choice = dynamo_async_openai::types::ChatChoiceStream {
            index,
            delta,
            finish_reason,
            logprobs: None,
        };

-        let inner = async_openai::types::CreateChatCompletionStreamResponse {
+        let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse {
            id: "test_id".to_string(),
            model: "meta/llama-3.1-8b-instruct".to_string(),
            created: 1234567890,
@@ -347,8 +348,12 @@ mod tests {
    #[tokio::test]
    async fn test_single_delta() {
        // Create a sample delta
-        let annotated_delta =
-            create_test_delta(0, "Hello,", Some(async_openai::types::Role::User), None);
+        let annotated_delta = create_test_delta(
+            0,
+            "Hello,",
+            Some(dynamo_async_openai::types::Role::User),
+            None,
+        );

        // Create a stream
        let stream = Box::pin(stream::iter(vec![annotated_delta]));
@@ -371,7 +376,7 @@ mod tests {
        assert_eq!(choice.index, 0);
        assert_eq!(choice.message.content.as_ref().unwrap(), "Hello,");
        assert!(choice.finish_reason.is_none());
-        assert_eq!(choice.message.role, async_openai::types::Role::User);
+        assert_eq!(choice.message.role, dynamo_async_openai::types::Role::User);
        assert!(response.inner.service_tier.is_none());
    }

@@ -380,13 +385,17 @@ mod tests {
        // Create multiple deltas with the same choice index
        // One will have a MessageRole and no FinishReason,
        // the other will have a FinishReason and no MessageRole
-        let annotated_delta1 =
-            create_test_delta(0, "Hello,", Some(async_openai::types::Role::User), None);
+        let annotated_delta1 = create_test_delta(
+            0,
+            "Hello,",
+            Some(dynamo_async_openai::types::Role::User),
+            None,
+        );
        let annotated_delta2 = create_test_delta(
            0,
            " world!",
            None,
-            Some(async_openai::types::FinishReason::Stop),
+            Some(dynamo_async_openai::types::FinishReason::Stop),
        );

        // Create a stream
@@ -407,9 +416,9 @@ mod tests {
        assert_eq!(choice.message.content.as_ref().unwrap(), "Hello, world!");
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::FinishReason::Stop)
+            Some(dynamo_async_openai::types::FinishReason::Stop)
        );
-        assert_eq!(choice.message.role, async_openai::types::Role::User);
+        assert_eq!(choice.message.role, dynamo_async_openai::types::Role::User);
    }

    #[allow(deprecated)]
@@ -417,7 +426,7 @@ mod tests {
    async fn test_multiple_choices() {
        // Create a delta with multiple choices
        // ALLOW: function_call is deprecated
-        let delta = async_openai::types::CreateChatCompletionStreamResponse {
+        let delta = dynamo_async_openai::types::CreateChatCompletionStreamResponse {
            id: "test_id".to_string(),
            model: "test_model".to_string(),
            created: 1234567890,
@@ -425,28 +434,28 @@ mod tests {
            usage: None,
            system_fingerprint: None,
            choices: vec![
-                async_openai::types::ChatChoiceStream {
+                dynamo_async_openai::types::ChatChoiceStream {
                    index: 0,
-                    delta: async_openai::types::ChatCompletionStreamResponseDelta {
-                        role: Some(async_openai::types::Role::Assistant),
+                    delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
+                        role: Some(dynamo_async_openai::types::Role::Assistant),
                        content: Some("Choice 0".to_string()),
                        function_call: None,
                        tool_calls: None,
                        refusal: None,
                    },
-                    finish_reason: Some(async_openai::types::FinishReason::Stop),
+                    finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
                    logprobs: None,
                },
-                async_openai::types::ChatChoiceStream {
+                dynamo_async_openai::types::ChatChoiceStream {
                    index: 1,
-                    delta: async_openai::types::ChatCompletionStreamResponseDelta {
-                        role: Some(async_openai::types::Role::Assistant),
+                    delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
+                        role: Some(dynamo_async_openai::types::Role::Assistant),
                        content: Some("Choice 1".to_string()),
                        function_call: None,
                        tool_calls: None,
                        refusal: None,
                    },
-                    finish_reason: Some(async_openai::types::FinishReason::Stop),
+                    finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
                    logprobs: None,
                },
            ],
@@ -479,18 +488,24 @@ mod tests {
        assert_eq!(choice0.message.content.as_ref().unwrap(), "Choice 0");
        assert_eq!(
            choice0.finish_reason,
-            Some(async_openai::types::FinishReason::Stop)
+            Some(dynamo_async_openai::types::FinishReason::Stop)
+        );
+        assert_eq!(
+            choice0.message.role,
+            dynamo_async_openai::types::Role::Assistant
        );
-        assert_eq!(choice0.message.role, async_openai::types::Role::Assistant);

        let choice1 = &response.inner.choices[1];
        assert_eq!(choice1.index, 1);
        assert_eq!(choice1.message.content.as_ref().unwrap(), "Choice 1");
        assert_eq!(
            choice1.finish_reason,
-            Some(async_openai::types::FinishReason::Stop)
+            Some(dynamo_async_openai::types::FinishReason::Stop)
+        );
+        assert_eq!(
+            choice1.message.role,
+            dynamo_async_openai::types::Role::Assistant
        );
-        assert_eq!(choice1.message.role, async_openai::types::Role::Assistant);
    }

    #[tokio::test]
@@ -502,8 +517,8 @@ mod tests {
        let annotated_delta = create_test_delta(
            0,
            tool_call_json,
-            Some(async_openai::types::Role::Assistant),
-            Some(async_openai::types::FinishReason::ToolCalls),
+            Some(dynamo_async_openai::types::Role::Assistant),
+            Some(dynamo_async_openai::types::FinishReason::ToolCalls),
        );
        let delta = annotated_delta.data.unwrap().inner;

@@ -547,8 +562,11 @@ mod tests {
        // The finish_reason should be ToolCalls
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::FinishReason::ToolCalls)
+            Some(dynamo_async_openai::types::FinishReason::ToolCalls)
+        );
+        assert_eq!(
+            choice.message.role,
+            dynamo_async_openai::types::Role::Assistant
        );
-        assert_eq!(choice.message.role, async_openai::types::Role::Assistant);
    }
 }
--- a/lib/llm/src/protocols/openai/chat_completions/delta.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/delta.rs
@@ -59,9 +59,9 @@ pub struct DeltaGenerator {
    /// Optional system fingerprint for version tracking.
    system_fingerprint: Option<String>,
    /// Optional service tier information for the response.
-    service_tier: Option<async_openai::types::ServiceTierResponse>,
+    service_tier: Option<dynamo_async_openai::types::ServiceTierResponse>,
    /// Tracks token usage for the completion request.
-    usage: async_openai::types::CompletionUsage,
+    usage: dynamo_async_openai::types::CompletionUsage,
    /// Counter tracking the number of messages issued.
    msg_counter: u64,
    /// Configuration options for response generation.
@@ -87,7 +87,7 @@ impl DeltaGenerator {
        // but this will not be an issue until 2106.
        let now: u32 = now.try_into().expect("timestamp exceeds u32::MAX");

-        let usage = async_openai::types::CompletionUsage {
+        let usage = dynamo_async_openai::types::CompletionUsage {
            prompt_tokens: 0,
            completion_tokens: 0,
            total_tokens: 0,
@@ -122,7 +122,7 @@ impl DeltaGenerator {
        token_ids: Vec<TokenIdType>,
        logprobs: Option<common::llm_backend::LogProbs>,
        top_logprobs: Option<common::llm_backend::TopLogprobs>,
-    ) -> Option<async_openai::types::ChatChoiceLogprobs> {
+    ) -> Option<dynamo_async_openai::types::ChatChoiceLogprobs> {
        if !self.options.enable_logprobs || logprobs.is_none() {
            return None;
        }
@@ -150,22 +150,22 @@ impl DeltaGenerator {
                            let top_t = top_lp.token.clone().unwrap_or_default();
                            let top_tid = top_lp.token_id;
                            found_selected_token = found_selected_token || top_tid == *tid;
-                            async_openai::types::TopLogprobs {
+                            dynamo_async_openai::types::TopLogprobs {
                                token: top_t,
                                logprob: top_lp.logprob as f32,
                                bytes: None,
                            }
                        })
-                        .collect::<Vec<async_openai::types::TopLogprobs>>();
+                        .collect::<Vec<dynamo_async_openai::types::TopLogprobs>>();
                    if !found_selected_token {
                        // If the selected token is not in the top logprobs, add it
-                        converted_top_lps.push(async_openai::types::TopLogprobs {
+                        converted_top_lps.push(dynamo_async_openai::types::TopLogprobs {
                            token: t.clone(),
                            logprob: lp,
                            bytes: None,
                        });
                    }
-                    async_openai::types::ChatCompletionTokenLogprob {
+                    dynamo_async_openai::types::ChatCompletionTokenLogprob {
                        token: t.clone(),
                        logprob: lp,
                        bytes: None,
@@ -175,7 +175,7 @@ impl DeltaGenerator {
                .collect()
        });

-        Some(async_openai::types::ChatChoiceLogprobs {
+        Some(dynamo_async_openai::types::ChatChoiceLogprobs {
            content,
            refusal: None,
        })
@@ -190,28 +190,28 @@ impl DeltaGenerator {
    /// * `logprobs` - Optional log probabilities of the generated tokens.
    ///
    /// # Returns
-    /// * An [`async_openai::types::CreateChatCompletionStreamResponse`] instance representing the choice.
+    /// * An [`dynamo_async_openai::types::CreateChatCompletionStreamResponse`] instance representing the choice.
    #[allow(deprecated)]
    pub fn create_choice(
        &self,
        index: u32,
        text: Option<String>,
-        finish_reason: Option<async_openai::types::FinishReason>,
-        logprobs: Option<async_openai::types::ChatChoiceLogprobs>,
-    ) -> async_openai::types::CreateChatCompletionStreamResponse {
-        let delta = async_openai::types::ChatCompletionStreamResponseDelta {
+        finish_reason: Option<dynamo_async_openai::types::FinishReason>,
+        logprobs: Option<dynamo_async_openai::types::ChatChoiceLogprobs>,
+    ) -> dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+        let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
            content: text,
            function_call: None,
            tool_calls: None,
            role: if self.msg_counter == 0 {
-                Some(async_openai::types::Role::Assistant)
+                Some(dynamo_async_openai::types::Role::Assistant)
            } else {
                None
            },
            refusal: None,
        };

-        let choice = async_openai::types::ChatChoiceStream {
+        let choice = dynamo_async_openai::types::ChatChoiceStream {
            index,
            delta,
            finish_reason,
@@ -225,7 +225,7 @@ impl DeltaGenerator {
            usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
        }

-        async_openai::types::CreateChatCompletionStreamResponse {
+        dynamo_async_openai::types::CreateChatCompletionStreamResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
@@ -281,12 +281,18 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes

        // Map backend finish reasons to OpenAI's finish reasons.
        let finish_reason = match delta.finish_reason {
-            Some(common::FinishReason::EoS) => Some(async_openai::types::FinishReason::Stop),
-            Some(common::FinishReason::Stop) => Some(async_openai::types::FinishReason::Stop),
-            Some(common::FinishReason::Length) => Some(async_openai::types::FinishReason::Length),
-            Some(common::FinishReason::Cancelled) => Some(async_openai::types::FinishReason::Stop),
+            Some(common::FinishReason::EoS) => Some(dynamo_async_openai::types::FinishReason::Stop),
+            Some(common::FinishReason::Stop) => {
+                Some(dynamo_async_openai::types::FinishReason::Stop)
+            }
+            Some(common::FinishReason::Length) => {
+                Some(dynamo_async_openai::types::FinishReason::Length)
+            }
+            Some(common::FinishReason::Cancelled) => {
+                Some(dynamo_async_openai::types::FinishReason::Stop)
+            }
            Some(common::FinishReason::ContentFilter) => {
-                Some(async_openai::types::FinishReason::ContentFilter)
+                Some(dynamo_async_openai::types::FinishReason::ContentFilter)
            }
            Some(common::FinishReason::Error(err_msg)) => {
                return Err(anyhow::anyhow!(err_msg));

--- a/lib/llm/src/protocols/openai/completions.rs
+++ b/lib/llm/src/protocols/openai/completions.rs
@@ -37,7 +37,7 @@ pub use delta::DeltaGenerator;
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionRequest {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateCompletionRequest,
+    pub inner: dynamo_async_openai::types::CreateCompletionRequest,

    #[serde(flatten)]
    pub common: CommonExt,
@@ -49,25 +49,25 @@ pub struct NvCreateCompletionRequest {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateCompletionResponse,
+    pub inner: dynamo_async_openai::types::CreateCompletionResponse,
 }

-impl ContentProvider for async_openai::types::Choice {
+impl ContentProvider for dynamo_async_openai::types::Choice {
    fn content(&self) -> String {
        self.text.clone()
    }
 }

-pub fn prompt_to_string(prompt: &async_openai::types::Prompt) -> String {
+pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
    match prompt {
-        async_openai::types::Prompt::String(s) => s.clone(),
-        async_openai::types::Prompt::StringArray(arr) => arr.join(" "), // Join strings with spaces
-        async_openai::types::Prompt::IntegerArray(arr) => arr
+        dynamo_async_openai::types::Prompt::String(s) => s.clone(),
+        dynamo_async_openai::types::Prompt::StringArray(arr) => arr.join(" "), // Join strings with spaces
+        dynamo_async_openai::types::Prompt::IntegerArray(arr) => arr
            .iter()
            .map(|&num| num.to_string())
            .collect::<Vec<_>>()
            .join(" "),
-        async_openai::types::Prompt::ArrayOfIntegerArray(arr) => arr
+        dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arr) => arr
            .iter()
            .map(|inner| {
                inner
@@ -226,10 +226,10 @@ impl ResponseFactory {

    pub fn make_response(
        &self,
-        choice: async_openai::types::Choice,
-        usage: Option<async_openai::types::CompletionUsage>,
+        choice: dynamo_async_openai::types::Choice,
+        usage: Option<dynamo_async_openai::types::CompletionUsage>,
    ) -> NvCreateCompletionResponse {
-        let inner = async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateCompletionResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
@@ -300,7 +300,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
    }
 }

-impl TryFrom<common::StreamingCompletionResponse> for async_openai::types::Choice {
+impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types::Choice {
    type Error = anyhow::Error;

    fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
@@ -321,10 +321,10 @@ impl TryFrom<common::StreamingCompletionResponse> for async_openai::types::Choic
        // TODO handle aggregating logprobs
        let logprobs = None;

-        let finish_reason: Option<async_openai::types::CompletionFinishReason> =
+        let finish_reason: Option<dynamo_async_openai::types::CompletionFinishReason> =
            response.delta.finish_reason.map(Into::into);

-        let choice = async_openai::types::Choice {
+        let choice = dynamo_async_openai::types::Choice {
            text,
            index,
            logprobs,

--- a/lib/llm/src/protocols/openai/completions/aggregator.rs
+++ b/lib/llm/src/protocols/openai/completions/aggregator.rs
@@ -30,7 +30,7 @@ pub struct DeltaAggregator {
    id: String,
    model: String,
    created: u32,
-    usage: Option<async_openai::types::CompletionUsage>,
+    usage: Option<dynamo_async_openai::types::CompletionUsage>,
    system_fingerprint: Option<String>,
    choices: HashMap<u32, DeltaChoice>,
    error: Option<String>,
@@ -40,7 +40,7 @@ struct DeltaChoice {
    index: u32,
    text: String,
    finish_reason: Option<FinishReason>,
-    logprobs: Option<async_openai::types::Logprobs>,
+    logprobs: Option<dynamo_async_openai::types::Logprobs>,
 }

 impl Default for DeltaAggregator {
@@ -112,15 +112,15 @@ impl DeltaAggregator {

                        // Handle CompletionFinishReason -> FinishReason conversation
                        state_choice.finish_reason = match choice.finish_reason {
-                            Some(async_openai::types::CompletionFinishReason::Stop) => {
+                            Some(dynamo_async_openai::types::CompletionFinishReason::Stop) => {
                                Some(FinishReason::Stop)
                            }
-                            Some(async_openai::types::CompletionFinishReason::Length) => {
+                            Some(dynamo_async_openai::types::CompletionFinishReason::Length) => {
                                Some(FinishReason::Length)
                            }
-                            Some(async_openai::types::CompletionFinishReason::ContentFilter) => {
-                                Some(FinishReason::ContentFilter)
-                            }
+                            Some(
+                                dynamo_async_openai::types::CompletionFinishReason::ContentFilter,
+                            ) => Some(FinishReason::ContentFilter),
                            None => None,
                        };
                    }
@@ -140,12 +140,12 @@ impl DeltaAggregator {
        let mut choices: Vec<_> = aggregator
            .choices
            .into_values()
-            .map(async_openai::types::Choice::from)
+            .map(dynamo_async_openai::types::Choice::from)
            .collect();

        choices.sort_by(|a, b| a.index.cmp(&b.index));

-        let inner = async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateCompletionResponse {
            id: aggregator.id,
            created: aggregator.created,
            usage: aggregator.usage,
@@ -161,11 +161,11 @@ impl DeltaAggregator {
    }
 }

-impl From<DeltaChoice> for async_openai::types::Choice {
+impl From<DeltaChoice> for dynamo_async_openai::types::Choice {
    fn from(delta: DeltaChoice) -> Self {
        let finish_reason = delta.finish_reason.map(Into::into);

-        async_openai::types::Choice {
+        dynamo_async_openai::types::Choice {
            index: delta.index,
            text: delta.text,
            finish_reason,
@@ -210,13 +210,13 @@ mod tests {
            .and_then(|s| FinishReason::from_str(s).ok())
            .map(Into::into);

-        let inner = async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateCompletionResponse {
            id: "test_id".to_string(),
            model: "meta/llama-3.1-8b".to_string(),
            created: 1234567890,
            usage: None,
            system_fingerprint: None,
-            choices: vec![async_openai::types::Choice {
+            choices: vec![dynamo_async_openai::types::Choice {
                index,
                text: text.to_string(),
                finish_reason,
@@ -283,11 +283,11 @@ mod tests {
        assert_eq!(choice.text, "Hello,".to_string());
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Length)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Length)
        );
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Length)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Length)
        );
        assert!(choice.logprobs.is_none());
    }
@@ -318,34 +318,34 @@ mod tests {
        assert_eq!(choice.text, "Hello, world!".to_string());
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );
        assert_eq!(
            choice.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );
    }

    #[tokio::test]
    async fn test_multiple_choices() {
        // Create a delta with multiple choices
-        let inner = async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateCompletionResponse {
            id: "test_id".to_string(),
            model: "meta/llama-3.1-8b".to_string(),
            created: 1234567890,
            usage: None,
            system_fingerprint: None,
            choices: vec![
-                async_openai::types::Choice {
+                dynamo_async_openai::types::Choice {
                    index: 0,
                    text: "Choice 0".to_string(),
-                    finish_reason: Some(async_openai::types::CompletionFinishReason::Stop),
+                    finish_reason: Some(dynamo_async_openai::types::CompletionFinishReason::Stop),
                    logprobs: None,
                },
-                async_openai::types::Choice {
+                dynamo_async_openai::types::Choice {
                    index: 1,
                    text: "Choice 1".to_string(),
-                    finish_reason: Some(async_openai::types::CompletionFinishReason::Stop),
+                    finish_reason: Some(dynamo_async_openai::types::CompletionFinishReason::Stop),
                    logprobs: None,
                },
            ],
@@ -379,11 +379,11 @@ mod tests {
        assert_eq!(choice0.text, "Choice 0".to_string());
        assert_eq!(
            choice0.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );
        assert_eq!(
            choice0.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );

        let choice1 = &response.inner.choices[1];
@@ -391,11 +391,11 @@ mod tests {
        assert_eq!(choice1.text, "Choice 1".to_string());
        assert_eq!(
            choice1.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );
        assert_eq!(
            choice1.finish_reason,
-            Some(async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
        );
    }
 }
--- a/lib/llm/src/protocols/openai/completions/delta.rs
+++ b/lib/llm/src/protocols/openai/completions/delta.rs
@@ -42,7 +42,7 @@ pub struct DeltaGenerator {
    created: u32,
    model: String,
    system_fingerprint: Option<String>,
-    usage: async_openai::types::CompletionUsage,
+    usage: dynamo_async_openai::types::CompletionUsage,
    options: DeltaGeneratorOptions,
 }

@@ -59,7 +59,7 @@ impl DeltaGenerator {

        // Previously, our home-rolled CompletionUsage impl'd Default
        // PR !387 - https://github.com/64bit/async-openai/pull/387
-        let usage = async_openai::types::CompletionUsage {
+        let usage = dynamo_async_openai::types::CompletionUsage {
            completion_tokens: 0,
            prompt_tokens: 0,
            total_tokens: 0,
@@ -88,7 +88,7 @@ impl DeltaGenerator {
        token_ids: Vec<TokenIdType>,
        logprobs: Option<common::llm_backend::LogProbs>,
        top_logprobs: Option<common::llm_backend::TopLogprobs>,
-    ) -> Option<async_openai::types::Logprobs> {
+    ) -> Option<dynamo_async_openai::types::Logprobs> {
        if !self.options.enable_logprobs || logprobs.is_none() {
            return None;
        }
@@ -116,16 +116,16 @@ impl DeltaGenerator {
                            let top_t = top_lp.token.clone().unwrap_or_default();
                            let top_tid = top_lp.token_id;
                            found_selected_token = found_selected_token || top_tid == *tid;
-                            async_openai::types::TopLogprobs {
+                            dynamo_async_openai::types::TopLogprobs {
                                token: top_t,
                                logprob: top_lp.logprob as f32,
                                bytes: None,
                            }
                        })
-                        .collect::<Vec<async_openai::types::TopLogprobs>>();
+                        .collect::<Vec<dynamo_async_openai::types::TopLogprobs>>();
                    if !found_selected_token {
                        // If the selected token is not in the top logprobs, add it
-                        converted_top_lps.push(async_openai::types::TopLogprobs {
+                        converted_top_lps.push(dynamo_async_openai::types::TopLogprobs {
                            token: t.clone(),
                            logprob: *lp,
                            bytes: None,
@@ -136,7 +136,7 @@ impl DeltaGenerator {
                .collect()
        });

-        Some(async_openai::types::Logprobs {
+        Some(dynamo_async_openai::types::Logprobs {
            tokens: toks.iter().map(|(t, _)| t.clone()).collect(),
            token_logprobs: tok_lps.into_iter().map(Some).collect(),
            text_offset: vec![],
@@ -148,8 +148,8 @@ impl DeltaGenerator {
        &self,
        index: u32,
        text: Option<String>,
-        finish_reason: Option<async_openai::types::CompletionFinishReason>,
-        logprobs: Option<async_openai::types::Logprobs>,
+        finish_reason: Option<dynamo_async_openai::types::CompletionFinishReason>,
+        logprobs: Option<dynamo_async_openai::types::Logprobs>,
    ) -> NvCreateCompletionResponse {
        // todo - update for tool calling

@@ -158,13 +158,13 @@ impl DeltaGenerator {
            usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
        }

-        let inner = async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_async_openai::types::CreateCompletionResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
            model: self.model.clone(),
            system_fingerprint: self.system_fingerprint.clone(),
-            choices: vec![async_openai::types::Choice {
+            choices: vec![dynamo_async_openai::types::Choice {
                text: text.unwrap_or_default(),
                index,
                finish_reason,

--- a/lib/llm/src/protocols/openai/embeddings.rs
+++ b/lib/llm/src/protocols/openai/embeddings.rs
@@ -26,7 +26,7 @@ pub use nvext::{NvExt, NvExtProvider};
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingRequest {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateEmbeddingRequest,
+    pub inner: dynamo_async_openai::types::CreateEmbeddingRequest,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<NvExt>,
@@ -41,17 +41,17 @@ pub struct NvCreateEmbeddingRequest {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::CreateEmbeddingResponse,
+    pub inner: dynamo_async_openai::types::CreateEmbeddingResponse,
 }

 impl NvCreateEmbeddingResponse {
    pub fn empty() -> Self {
        Self {
-            inner: async_openai::types::CreateEmbeddingResponse {
+            inner: dynamo_async_openai::types::CreateEmbeddingResponse {
                object: "list".to_string(),
                model: "embedding".to_string(),
                data: vec![],
-                usage: async_openai::types::EmbeddingUsage {
+                usage: dynamo_async_openai::types::EmbeddingUsage {
                    prompt_tokens: 0,
                    total_tokens: 0,
                },

--- a/lib/llm/src/protocols/openai/embeddings/aggregator.rs
+++ b/lib/llm/src/protocols/openai/embeddings/aggregator.rs
@@ -145,16 +145,16 @@ mod tests {
    use futures::stream;

    fn create_test_embedding_response(
-        embeddings: Vec<async_openai::types::Embedding>,
+        embeddings: Vec<dynamo_async_openai::types::Embedding>,
        prompt_tokens: u32,
        total_tokens: u32,
    ) -> Annotated<NvCreateEmbeddingResponse> {
        let response = NvCreateEmbeddingResponse {
-            inner: async_openai::types::CreateEmbeddingResponse {
+            inner: dynamo_async_openai::types::CreateEmbeddingResponse {
                object: "list".to_string(),
                model: "test-model".to_string(),
                data: embeddings,
-                usage: async_openai::types::EmbeddingUsage {
+                usage: dynamo_async_openai::types::EmbeddingUsage {
                    prompt_tokens,
                    total_tokens,
                },
@@ -178,7 +178,7 @@ mod tests {

    #[tokio::test]
    async fn test_single_embedding() {
-        let embedding = async_openai::types::Embedding {
+        let embedding = dynamo_async_openai::types::Embedding {
            index: 0,
            object: "embedding".to_string(),
            embedding: vec![0.1, 0.2, 0.3],
@@ -200,13 +200,13 @@ mod tests {

    #[tokio::test]
    async fn test_multiple_embeddings() {
-        let embedding1 = async_openai::types::Embedding {
+        let embedding1 = dynamo_async_openai::types::Embedding {
            index: 0,
            object: "embedding".to_string(),
            embedding: vec![0.1, 0.2, 0.3],
        };

-        let embedding2 = async_openai::types::Embedding {
+        let embedding2 = dynamo_async_openai::types::Embedding {
            index: 1,
            object: "embedding".to_string(),
            embedding: vec![0.4, 0.5, 0.6],

--- a/lib/llm/src/protocols/openai/responses.rs
+++ b/lib/llm/src/protocols/openai/responses.rs
@@ -13,11 +13,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use async_openai::types::responses::{
+use dynamo_async_openai::types::responses::{
    Content, Input, OutputContent, OutputMessage, OutputStatus, OutputText, Response,
    Role as ResponseRole, Status,
 };
-use async_openai::types::{
+use dynamo_async_openai::types::{
    ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
    ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
 };
@@ -33,7 +33,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::responses::CreateResponse,
+    pub inner: dynamo_async_openai::types::responses::CreateResponse,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<NvExt>,
@@ -42,7 +42,7 @@ pub struct NvCreateResponse {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvResponse {
    #[serde(flatten)]
-    pub inner: async_openai::types::responses::Response,
+    pub inner: dynamo_async_openai::types::responses::Response,
 }

 /// Implements `NvExtProvider` for `NvCreateResponse`,
@@ -256,8 +256,8 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {

 #[cfg(test)]
 mod tests {
-    use async_openai::types::responses::{CreateResponse, Input};
-    use async_openai::types::{
+    use dynamo_async_openai::types::responses::{CreateResponse, Input};
+    use dynamo_async_openai::types::{
        ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent,
    };

@@ -341,15 +341,15 @@ mod tests {
    fn test_into_nvresponse_from_chat_response() {
        let now = 1_726_000_000;
        let chat_resp = NvCreateChatCompletionResponse {
-            inner: async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
                id: "chatcmpl-xyz".into(),
-                choices: vec![async_openai::types::ChatChoice {
+                choices: vec![dynamo_async_openai::types::ChatChoice {
                    index: 0,
-                    message: async_openai::types::ChatCompletionResponseMessage {
+                    message: dynamo_async_openai::types::ChatCompletionResponseMessage {
                        content: Some("This is a reply".into()),
                        refusal: None,
                        tool_calls: None,
-                        role: async_openai::types::Role::Assistant,
+                        role: dynamo_async_openai::types::Role::Assistant,
                        function_call: None,
                        audio: None,
                    },

--- a/lib/llm/src/protocols/openai/validate.rs
+++ b/lib/llm/src/protocols/openai/validate.rs
@@ -224,15 +224,15 @@ pub fn validate_user(user: Option<&str>) -> Result<(), anyhow::Error> {
 }

 /// Validates stop sequences
-pub fn validate_stop(stop: &Option<async_openai::types::Stop>) -> Result<(), anyhow::Error> {
+pub fn validate_stop(stop: &Option<dynamo_async_openai::types::Stop>) -> Result<(), anyhow::Error> {
    if let Some(stop_value) = stop {
        match stop_value {
-            async_openai::types::Stop::String(s) => {
+            dynamo_async_openai::types::Stop::String(s) => {
                if s.is_empty() {
                    anyhow::bail!("Stop sequence cannot be empty");
                }
            }
-            async_openai::types::Stop::StringArray(sequences) => {
+            dynamo_async_openai::types::Stop::StringArray(sequences) => {
                if sequences.is_empty() {
                    anyhow::bail!("Stop sequences array cannot be empty");
                }
@@ -260,7 +260,7 @@ pub fn validate_stop(stop: &Option<async_openai::types::Stop>) -> Result<(), any

 /// Validates messages array
 pub fn validate_messages(
-    messages: &[async_openai::types::ChatCompletionRequestMessage],
+    messages: &[dynamo_async_openai::types::ChatCompletionRequestMessage],
 ) -> Result<(), anyhow::Error> {
    if messages.is_empty() {
        anyhow::bail!("Messages array cannot be empty");
@@ -284,7 +284,7 @@ pub fn validate_top_logprobs(top_logprobs: Option<u8>) -> Result<(), anyhow::Err

 /// Validates tools array
 pub fn validate_tools(
-    tools: &Option<&[async_openai::types::ChatCompletionTool]>,
+    tools: &Option<&[dynamo_async_openai::types::ChatCompletionTool]>,
 ) -> Result<(), anyhow::Error> {
    let tools = match tools {
        Some(val) => val,
@@ -356,7 +356,7 @@ pub fn validate_metadata(metadata: &Option<serde_json::Value>) -> Result<(), any

 /// Validates reasoning effort parameter
 pub fn validate_reasoning_effort(
-    _reasoning_effort: &Option<async_openai::types::ReasoningEffort>,
+    _reasoning_effort: &Option<dynamo_async_openai::types::ReasoningEffort>,
 ) -> Result<(), anyhow::Error> {
    // TODO ADD HERE
    // ReasoningEffort is an enum, so if it exists, it's valid by definition
@@ -366,7 +366,7 @@ pub fn validate_reasoning_effort(

 /// Validates service tier parameter
 pub fn validate_service_tier(
-    _service_tier: &Option<async_openai::types::ServiceTier>,
+    _service_tier: &Option<dynamo_async_openai::types::ServiceTier>,
 ) -> Result<(), anyhow::Error> {
    // TODO ADD HERE
    // ServiceTier is an enum, so if it exists, it's valid by definition
@@ -379,14 +379,14 @@ pub fn validate_service_tier(
 //

 /// Validates prompt
-pub fn validate_prompt(prompt: &async_openai::types::Prompt) -> Result<(), anyhow::Error> {
+pub fn validate_prompt(prompt: &dynamo_async_openai::types::Prompt) -> Result<(), anyhow::Error> {
    match prompt {
-        async_openai::types::Prompt::String(s) => {
+        dynamo_async_openai::types::Prompt::String(s) => {
            if s.is_empty() {
                anyhow::bail!("Prompt string cannot be empty");
            }
        }
-        async_openai::types::Prompt::StringArray(arr) => {
+        dynamo_async_openai::types::Prompt::StringArray(arr) => {
            if arr.is_empty() {
                anyhow::bail!("Prompt string array cannot be empty");
            }
@@ -396,7 +396,7 @@ pub fn validate_prompt(prompt: &async_openai::types::Prompt) -> Result<(), anyho
                }
            }
        }
-        async_openai::types::Prompt::IntegerArray(arr) => {
+        dynamo_async_openai::types::Prompt::IntegerArray(arr) => {
            if arr.is_empty() {
                anyhow::bail!("Prompt integer array cannot be empty");
            }
@@ -411,7 +411,7 @@ pub fn validate_prompt(prompt: &async_openai::types::Prompt) -> Result<(), anyho
                }
            }
        }
-        async_openai::types::Prompt::ArrayOfIntegerArray(arr) => {
+        dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arr) => {
            if arr.is_empty() {
                anyhow::bail!("Prompt array of integer arrays cannot be empty");
            }

--- a/lib/llm/tests/http-service.rs
+++ b/lib/llm/tests/http-service.rs
@@ -14,8 +14,8 @@
 // limitations under the License.

 use anyhow::Error;
-use async_openai::config::OpenAIConfig;
 use async_stream::stream;
+use dynamo_async_openai::config::OpenAIConfig;
 use dynamo_llm::http::{
    client::{
        GenericBYOTClient, HttpClientConfig, HttpRequestContext, NvCustomClient, PureOpenAIClient,
@@ -311,16 +311,16 @@ async fn test_http_service() {

    let client = reqwest::Client::new();

-    let message = async_openai::types::ChatCompletionRequestMessage::User(
-        async_openai::types::ChatCompletionRequestUserMessage {
-            content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+    let message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+        dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+            content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                "hi".to_string(),
            ),
            name: None,
        },
    );

-    let mut request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let mut request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("foo")
        .messages(vec![message])
        .build()
@@ -483,7 +483,7 @@ async fn test_http_service() {
    // ==== ChatCompletions / Unary / Error ====

    // ==== Completions / Unary / Error ====
-    let mut request = async_openai::types::CreateCompletionRequestArgs::default()
+    let mut request = dynamo_async_openai::types::CreateCompletionRequestArgs::default()
        .model("bar")
        .prompt("hi")
        .build()
@@ -642,12 +642,13 @@ async fn test_pure_openai_client() {
    wait_for_service_ready(port).await;

    // Test successful streaming request
-    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("foo")
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -674,12 +675,13 @@ async fn test_pure_openai_client() {
    assert!(count > 0, "Should receive at least one response");

    // Test error case with invalid model
-    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("bar") // This model will fail
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -707,12 +709,13 @@ async fn test_pure_openai_client() {

    // Test context management
    let ctx = HttpRequestContext::new();
-    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("foo")
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -751,12 +754,13 @@ async fn test_nv_custom_client() {
    wait_for_service_ready(port).await;

    // Test successful streaming request
-    let inner_request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let inner_request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("foo")
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -789,12 +793,13 @@ async fn test_nv_custom_client() {
    assert!(count > 0, "Should receive at least one response");

    // Test error case with invalid model
-    let inner_request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let inner_request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("bar") // This model will fail
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -828,12 +833,13 @@ async fn test_nv_custom_client() {

    // Test context management
    let ctx = HttpRequestContext::new();
-    let inner_request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let inner_request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("foo")
        .messages(vec![
-            async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content:
+                        dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                            "Hi".to_string(),
                        ),
                    name: None,
@@ -987,16 +993,16 @@ async fn test_client_disconnect_cancellation_unary() {

    let client = reqwest::Client::new();

-    let message = async_openai::types::ChatCompletionRequestMessage::User(
-        async_openai::types::ChatCompletionRequestUserMessage {
-            content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+    let message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+        dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+            content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                "This will take a long time".to_string(),
            ),
            name: None,
        },
    );

-    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("slow-model")
        .messages(vec![message])
        .stream(false) // Test unary response
@@ -1078,16 +1084,16 @@ async fn test_client_disconnect_cancellation_streaming() {

    let client = reqwest::Client::new();

-    let message = async_openai::types::ChatCompletionRequestMessage::User(
-        async_openai::types::ChatCompletionRequestUserMessage {
-            content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+    let message = dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+        dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+            content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                "This will stream for a long time".to_string(),
            ),
            name: None,
        },
    );

-    let request = async_openai::types::CreateChatCompletionRequestArgs::default()
+    let request = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default()
        .model("slow-stream-model")
        .messages(vec![message])
        .stream(true) // Test streaming response

--- a/lib/llm/tests/logprob_analysis_integration.rs
+++ b/lib/llm/tests/logprob_analysis_integration.rs
@@ -10,7 +10,7 @@ use dynamo_llm::perf::logprobs::analyze_logprob_sensitivity;
 use dynamo_llm::perf::{RecordedStream, TimestampedResponse};
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;

-use async_openai::types::{
+use dynamo_async_openai::types::{
    ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
    ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
    TopLogprobs,

--- a/lib/llm/tests/openai_completions.rs
+++ b/lib/llm/tests/openai_completions.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use async_openai::types::CreateCompletionRequestArgs;
+use dynamo_async_openai::types::CreateCompletionRequestArgs;
 use dynamo_llm::protocols::openai::{completions::NvCreateCompletionRequest, validate};
 use serde::{Deserialize, Serialize};


--- a/lib/llm/tests/preprocessor.rs
+++ b/lib/llm/tests/preprocessor.rs
@@ -231,31 +231,31 @@ const TOOLS: &str = r#"
 "#;

 // Notes:
-// protocols::openai::chat_completions::ChatCompletionMessage -> async_openai::types::ChatCompletionRequestMessage
-// protocols::openai::chat_completions::Tool -> async_openai::types::ChatCompletionTool
-// protocols::openai::chat_completions::ToolChoiceType -> async_openai::types::ChatCompletionToolChoiceOption
+// protocols::openai::chat_completions::ChatCompletionMessage -> dynamo_async_openai::types::ChatCompletionRequestMessage
+// protocols::openai::chat_completions::Tool -> dynamo_async_openai::types::ChatCompletionTool
+// protocols::openai::chat_completions::ToolChoiceType -> dynamo_async_openai::types::ChatCompletionToolChoiceOption
 #[derive(Serialize, Deserialize)]
 struct Request {
-    messages: Vec<async_openai::types::ChatCompletionRequestMessage>,
-    tools: Option<Vec<async_openai::types::ChatCompletionTool>>,
-    tool_choice: Option<async_openai::types::ChatCompletionToolChoiceOption>,
+    messages: Vec<dynamo_async_openai::types::ChatCompletionRequestMessage>,
+    tools: Option<Vec<dynamo_async_openai::types::ChatCompletionTool>>,
+    tool_choice: Option<dynamo_async_openai::types::ChatCompletionToolChoiceOption>,
 }

 impl Request {
    fn from(
        messages: &str,
        tools: Option<&str>,
-        tool_choice: Option<async_openai::types::ChatCompletionToolChoiceOption>,
+        tool_choice: Option<dynamo_async_openai::types::ChatCompletionToolChoiceOption>,
        model: String,
    ) -> NvCreateChatCompletionRequest {
-        let messages: Vec<async_openai::types::ChatCompletionRequestMessage> =
+        let messages: Vec<dynamo_async_openai::types::ChatCompletionRequestMessage> =
            serde_json::from_str(messages).unwrap();
-        let tools: Option<Vec<async_openai::types::ChatCompletionTool>> =
+        let tools: Option<Vec<dynamo_async_openai::types::ChatCompletionTool>> =
            tools.map(|x| serde_json::from_str(x).unwrap());
        //let tools = tools.unwrap();
        //let tool_choice = tool_choice.unwrap();

-        let mut inner = async_openai::types::CreateChatCompletionRequestArgs::default();
+        let mut inner = dynamo_async_openai::types::CreateChatCompletionRequestArgs::default();
        inner.model(model);
        inner.messages(messages);
        if let Some(tools) = tools {
@@ -326,7 +326,7 @@ async fn test_single_turn_with_tools() {
        let request = Request::from(
            SINGLE_CHAT_MESSAGE,
            Some(TOOLS),
-            Some(async_openai::types::ChatCompletionToolChoiceOption::Auto),
+            Some(dynamo_async_openai::types::ChatCompletionToolChoiceOption::Auto),
            mdc.slug().to_string(),
        );
        let formatted_prompt = formatter.render(&request).unwrap();
@@ -433,7 +433,7 @@ async fn test_multi_turn_with_system_with_tools() {
        let request = Request::from(
            THREE_TURN_CHAT_MESSAGE_WITH_SYSTEM,
            Some(TOOLS),
-            Some(async_openai::types::ChatCompletionToolChoiceOption::Auto),
+            Some(dynamo_async_openai::types::ChatCompletionToolChoiceOption::Auto),
            mdc.slug().to_string(),
        );
        let formatted_prompt = formatter.render(&request).unwrap();

--- a/lib/llm/tests/test_common_ext.rs
+++ b/lib/llm/tests/test_common_ext.rs
@@ -226,11 +226,11 @@ fn test_completions_common_overrides_nvext() {
 fn test_serialization_preserves_structure() {
    // Test that serialization preserves the flattened structure
    let request = NvCreateChatCompletionRequest {
-        inner: async_openai::types::CreateChatCompletionRequest {
+        inner: dynamo_async_openai::types::CreateChatCompletionRequest {
            model: "test-model".to_string(),
-            messages: vec![async_openai::types::ChatCompletionRequestMessage::User(
-                async_openai::types::ChatCompletionRequestUserMessage {
-                    content: async_openai::types::ChatCompletionRequestUserMessageContent::Text(
+            messages: vec![dynamo_async_openai::types::ChatCompletionRequestMessage::User(
+                dynamo_async_openai::types::ChatCompletionRequestUserMessage {
+                    content: dynamo_async_openai::types::ChatCompletionRequestUserMessageContent::Text(
                        "Hello".to_string(),
                    ),
                    ..Default::default()