refactor(2/3): rename dynamo-async-openai to dynamo-protocols (#7565)

b6a3b0c6 · ishandhanani · GitHub · c84c0934 · b6a3b0c6 · b6a3b0c6
Unverified Commit b6a3b0c6 authored Apr 01, 2026 by ishandhanani Committed by GitHub Apr 01, 2026
20 changed files
--- a/lib/llm/src/preprocessor/speculative_prefill.rs
+++ b/lib/llm/src/preprocessor/speculative_prefill.rs
@@ -12,7 +12,7 @@ use std::pin::Pin;
 use std::sync::Arc;

 use anyhow::Result;
-use dynamo_async_openai::types::{
+use dynamo_protocols::types::{
    ChatCompletionMessageContent, ChatCompletionRequestAssistantMessage,
    ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestMessage,
 };

--- a/lib/llm/src/protocols/anthropic/stream_converter.rs
+++ b/lib/llm/src/protocols/anthropic/stream_converter.rs
@@ -10,7 +10,7 @@
 use std::collections::HashSet;

 use axum::response::sse::Event;
-use dynamo_async_openai::types::ChatCompletionMessageContent;
+use dynamo_protocols::types::ChatCompletionMessageContent;
 use uuid::Uuid;

 use super::types::{
@@ -136,17 +136,15 @@ impl AnthropicStreamConverter {
            // Track finish reason
            if let Some(ref fr) = choice.finish_reason {
                self.stop_reason = Some(match fr {
-                    dynamo_async_openai::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
-                    dynamo_async_openai::types::FinishReason::Length => {
-                        AnthropicStopReason::MaxTokens
-                    }
-                    dynamo_async_openai::types::FinishReason::ToolCalls => {
+                    dynamo_protocols::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
+                    dynamo_protocols::types::FinishReason::Length => AnthropicStopReason::MaxTokens,
+                    dynamo_protocols::types::FinishReason::ToolCalls => {
                        AnthropicStopReason::ToolUse
                    }
-                    dynamo_async_openai::types::FinishReason::ContentFilter => {
+                    dynamo_protocols::types::FinishReason::ContentFilter => {
                        AnthropicStopReason::EndTurn
                    }
-                    dynamo_async_openai::types::FinishReason::FunctionCall => {
+                    dynamo_protocols::types::FinishReason::FunctionCall => {
                        AnthropicStopReason::ToolUse
                    }
                });
@@ -478,17 +476,15 @@ impl AnthropicStreamConverter {

            if let Some(ref fr) = choice.finish_reason {
                self.stop_reason = Some(match fr {
-                    dynamo_async_openai::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
-                    dynamo_async_openai::types::FinishReason::Length => {
-                        AnthropicStopReason::MaxTokens
-                    }
-                    dynamo_async_openai::types::FinishReason::ToolCalls => {
+                    dynamo_protocols::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
+                    dynamo_protocols::types::FinishReason::Length => AnthropicStopReason::MaxTokens,
+                    dynamo_protocols::types::FinishReason::ToolCalls => {
                        AnthropicStopReason::ToolUse
                    }
-                    dynamo_async_openai::types::FinishReason::ContentFilter => {
+                    dynamo_protocols::types::FinishReason::ContentFilter => {
                        AnthropicStopReason::EndTurn
                    }
-                    dynamo_async_openai::types::FinishReason::FunctionCall => {
+                    dynamo_protocols::types::FinishReason::FunctionCall => {
                        AnthropicStopReason::ToolUse
                    }
                });
@@ -734,7 +730,7 @@ impl AnthropicStreamConverter {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use dynamo_async_openai::types::{
+    use dynamo_protocols::types::{
        ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionMessageToolCallChunk,
        ChatCompletionStreamResponseDelta, ChatCompletionToolType, FunctionCallStream,
    };
@@ -742,7 +738,7 @@ mod tests {
    fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
        #[allow(deprecated)]
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: "chat-1".into(),
                choices: vec![ChatChoiceStream {
                    index: 0,
@@ -777,7 +773,7 @@ mod tests {
    ) -> NvCreateChatCompletionStreamResponse {
        #[allow(deprecated)]
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: "chat-1".into(),
                choices: vec![ChatChoiceStream {
                    index: 0,
@@ -932,7 +928,7 @@ mod tests {
    fn reasoning_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
        #[allow(deprecated)]
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: "chat-1".into(),
                choices: vec![ChatChoiceStream {
                    index: 0,

--- a/lib/llm/src/protocols/anthropic/types.rs
+++ b/lib/llm/src/protocols/anthropic/types.rs
@@ -3,15 +3,15 @@

 //! Anthropic Messages API conversion logic.
 //!
-//! Pure protocol types live in `dynamo_async_openai::types::anthropic`.
+//! Pure protocol types live in `dynamo_protocols::types::anthropic`.
 //! This module provides bidirectional conversion to/from the internal
 //! chat completions format used by the Dynamo engine.

 // Re-export all pure Anthropic protocol types so existing `use crate::protocols::anthropic::*`
 // continues to work throughout dynamo-llm.
-pub use dynamo_async_openai::types::anthropic::*;
+pub use dynamo_protocols::types::anthropic::*;

-use dynamo_async_openai::types::{
+use dynamo_protocols::types::{
    ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
    ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent,
    ChatCompletionRequestMessage, ChatCompletionRequestMessageContentPartImage,
@@ -96,10 +96,10 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
        // Convert stop_sequences -> stop
        let stop = req
            .stop_sequences
-            .map(dynamo_async_openai::types::Stop::StringArray);
+            .map(dynamo_protocols::types::Stop::StringArray);

        Ok(NvCreateChatCompletionRequest {
-            inner: dynamo_async_openai::types::CreateChatCompletionRequest {
+            inner: dynamo_protocols::types::CreateChatCompletionRequest {
                messages,
                model: req.model,
                temperature: req.temperature,
@@ -109,7 +109,7 @@ impl TryFrom<AnthropicCreateMessageRequest> for NvCreateChatCompletionRequest {
                tools,
                tool_choice,
                stream: Some(true), // Always stream internally
-                stream_options: Some(dynamo_async_openai::types::ChatCompletionStreamOptions {
+                stream_options: Some(dynamo_protocols::types::ChatCompletionStreamOptions {
                    include_usage: true,
                    continuous_usage_stats: false,
                }),
@@ -350,7 +350,7 @@ fn convert_assistant_blocks(
                tool_calls.push(ChatCompletionMessageToolCall {
                    id: id.clone(),
                    r#type: ChatCompletionToolType::Function,
-                    function: dynamo_async_openai::types::FunctionCall {
+                    function: dynamo_protocols::types::FunctionCall {
                        name: name.clone(),
                        arguments: serde_json::to_string(input).unwrap_or_default(),
                    },
@@ -487,11 +487,11 @@ pub fn chat_completion_to_anthropic_response(
    if let Some(choice) = choice {
        // Map finish_reason
        stop_reason = choice.finish_reason.map(|fr| match fr {
-            dynamo_async_openai::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
-            dynamo_async_openai::types::FinishReason::Length => AnthropicStopReason::MaxTokens,
-            dynamo_async_openai::types::FinishReason::ToolCalls => AnthropicStopReason::ToolUse,
-            dynamo_async_openai::types::FinishReason::ContentFilter => AnthropicStopReason::EndTurn,
-            dynamo_async_openai::types::FinishReason::FunctionCall => AnthropicStopReason::ToolUse,
+            dynamo_protocols::types::FinishReason::Stop => AnthropicStopReason::EndTurn,
+            dynamo_protocols::types::FinishReason::Length => AnthropicStopReason::MaxTokens,
+            dynamo_protocols::types::FinishReason::ToolCalls => AnthropicStopReason::ToolUse,
+            dynamo_protocols::types::FinishReason::ContentFilter => AnthropicStopReason::EndTurn,
+            dynamo_protocols::types::FinishReason::FunctionCall => AnthropicStopReason::ToolUse,
        });

        // Extract tool calls
@@ -523,8 +523,8 @@ pub fn chat_completion_to_anthropic_response(

        // Extract text content
        let text = match choice.message.content {
-            Some(dynamo_async_openai::types::ChatCompletionMessageContent::Text(t)) => Some(t),
-            Some(dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_)) => {
+            Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(t)) => Some(t),
+            Some(dynamo_protocols::types::ChatCompletionMessageContent::Parts(_)) => {
                tracing::warn!(
                    "Multimodal (Parts) content in chat completion response replaced with placeholder text in Anthropic conversion."
                );
@@ -821,24 +821,22 @@ mod tests {
    #[test]
    fn test_chat_completion_to_anthropic_response() {
        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                id: "chatcmpl-xyz".into(),
-                choices: vec![dynamo_async_openai::types::ChatChoice {
+                choices: vec![dynamo_protocols::types::ChatChoice {
                    index: 0,
-                    message: dynamo_async_openai::types::ChatCompletionResponseMessage {
-                        content: Some(
-                            dynamo_async_openai::types::ChatCompletionMessageContent::Text(
+                    message: dynamo_protocols::types::ChatCompletionResponseMessage {
+                        content: Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(
                            "Hello!".to_string(),
-                            ),
-                        ),
+                        )),
                        refusal: None,
                        tool_calls: None,
-                        role: dynamo_async_openai::types::Role::Assistant,
+                        role: dynamo_protocols::types::Role::Assistant,
                        function_call: None,
                        audio: None,
                        reasoning_content: None,
                    },
-                    finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
+                    finish_reason: Some(dynamo_protocols::types::FinishReason::Stop),
                    stop_reason: None,
                    logprobs: None,
                }],
@@ -847,7 +845,7 @@ mod tests {
                service_tier: None,
                system_fingerprint: None,
                object: "chat.completion".to_string(),
-                usage: Some(dynamo_async_openai::types::CompletionUsage {
+                usage: Some(dynamo_protocols::types::CompletionUsage {
                    prompt_tokens: 10,
                    completion_tokens: 5,
                    total_tokens: 15,

--- a/lib/llm/src/protocols/common.rs
+++ b/lib/llm/src/protocols/common.rs
@@ -90,27 +90,27 @@ impl std::str::FromStr for FinishReason {
    }
 }

-impl From<FinishReason> for dynamo_async_openai::types::CompletionFinishReason {
+impl From<FinishReason> for dynamo_protocols::types::CompletionFinishReason {
    fn from(reason: FinishReason) -> Self {
        match reason {
            FinishReason::EoS | FinishReason::Stop | FinishReason::Cancelled => {
-                dynamo_async_openai::types::CompletionFinishReason::Stop
+                dynamo_protocols::types::CompletionFinishReason::Stop
            }
            FinishReason::ContentFilter => {
-                dynamo_async_openai::types::CompletionFinishReason::ContentFilter
+                dynamo_protocols::types::CompletionFinishReason::ContentFilter
            }
-            FinishReason::Length => dynamo_async_openai::types::CompletionFinishReason::Length,
-            FinishReason::Error(_) => dynamo_async_openai::types::CompletionFinishReason::Stop,
+            FinishReason::Length => dynamo_protocols::types::CompletionFinishReason::Length,
+            FinishReason::Error(_) => dynamo_protocols::types::CompletionFinishReason::Stop,
        }
    }
 }

-impl From<dynamo_async_openai::types::CompletionFinishReason> for FinishReason {
-    fn from(reason: dynamo_async_openai::types::CompletionFinishReason) -> Self {
+impl From<dynamo_protocols::types::CompletionFinishReason> for FinishReason {
+    fn from(reason: dynamo_protocols::types::CompletionFinishReason) -> Self {
        match reason {
-            dynamo_async_openai::types::CompletionFinishReason::Stop => FinishReason::Stop,
-            dynamo_async_openai::types::CompletionFinishReason::Length => FinishReason::Length,
-            dynamo_async_openai::types::CompletionFinishReason::ContentFilter => {
+            dynamo_protocols::types::CompletionFinishReason::Stop => FinishReason::Stop,
+            dynamo_protocols::types::CompletionFinishReason::Length => FinishReason::Length,
+            dynamo_protocols::types::CompletionFinishReason::ContentFilter => {
                FinishReason::ContentFilter
            }
        }

--- a/lib/llm/src/protocols/common/llm_backend.rs
+++ b/lib/llm/src/protocols/common/llm_backend.rs
@@ -6,8 +6,8 @@ use serde::{Deserialize, Serialize};
 pub use super::FinishReason;
 pub use super::preprocessor::PreprocessedRequest;
 use crate::protocols::TokenIdType;
-use dynamo_async_openai::types::CompletionUsage;
-use dynamo_async_openai::types::StopReason;
+use dynamo_protocols::types::CompletionUsage;
+use dynamo_protocols::types::StopReason;
 use dynamo_runtime::error::DynamoError;
 use dynamo_runtime::protocols::maybe_error::MaybeError;


--- a/lib/llm/src/protocols/common/preprocessor.rs
+++ b/lib/llm/src/protocols/common/preprocessor.rs
@@ -86,7 +86,7 @@ pub struct PrefillResult {
    pub disaggregated_params: serde_json::Value,
    /// Prompt token details produced during prefill
    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub prompt_tokens_details: Option<dynamo_async_openai::types::PromptTokensDetails>,
+    pub prompt_tokens_details: Option<dynamo_protocols::types::PromptTokensDetails>,
 }

 /// Optional multimodal routing-only data.

--- a/lib/llm/src/protocols/openai.rs
+++ b/lib/llm/src/protocols/openai.rs
@@ -230,15 +230,15 @@ pub(crate) fn convert_backend_top_logprobs(
    selected_token: &str,
    selected_token_id: TokenIdType,
    selected_logprob: f32,
-) -> Vec<dynamo_async_openai::types::TopLogprobs> {
+) -> Vec<dynamo_protocols::types::TopLogprobs> {
    let mut found_selected = false;
-    let mut result: Vec<dynamo_async_openai::types::TopLogprobs> = top_lps
+    let mut result: Vec<dynamo_protocols::types::TopLogprobs> = top_lps
        .iter()
        .map(|top_lp| {
            let tok = top_lp.token.clone().unwrap_or_default();
            found_selected = found_selected || top_lp.token_id == selected_token_id;
            let bytes = top_lp.bytes.clone().or_else(|| token_to_utf8_bytes(&tok));
-            dynamo_async_openai::types::TopLogprobs {
+            dynamo_protocols::types::TopLogprobs {
                token: tok,
                logprob: top_lp.logprob as f32,
                bytes,
@@ -247,7 +247,7 @@ pub(crate) fn convert_backend_top_logprobs(
        .collect();

    if !found_selected {
-        result.push(dynamo_async_openai::types::TopLogprobs {
+        result.push(dynamo_protocols::types::TopLogprobs {
            token: selected_token.to_string(),
            logprob: selected_logprob,
            bytes: token_to_utf8_bytes(selected_token),
@@ -277,7 +277,7 @@ pub trait DeltaGeneratorExt<ResponseType: Send + 'static + std::fmt::Debug>:
    fn is_continuous_usage_enabled(&self) -> bool;

    /// Get the current usage statistics with properly calculated total_tokens.
-    fn get_usage(&self) -> dynamo_async_openai::types::CompletionUsage;
+    fn get_usage(&self) -> dynamo_protocols::types::CompletionUsage;

    /// Returns the request tracker if available, for accessing worker timing metrics.
    fn tracker(&self) -> Option<std::sync::Arc<common::timing::RequestTracker>> {

--- a/lib/llm/src/protocols/openai/chat_completions.rs
+++ b/lib/llm/src/protocols/openai/chat_completions.rs
@@ -35,7 +35,7 @@ pub use delta::DeltaGenerator;
 #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateChatCompletionRequest {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,
+    pub inner: dynamo_protocols::types::CreateChatCompletionRequest,

    #[serde(flatten, default)]
    pub common: CommonExt,
@@ -68,7 +68,7 @@ pub struct NvCreateChatCompletionRequest {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct NvCreateChatCompletionResponse {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
+    pub inner: dynamo_protocols::types::CreateChatCompletionResponse,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<serde_json::Value>,
 }
@@ -78,7 +78,7 @@ pub struct NvCreateChatCompletionResponse {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct NvCreateChatCompletionStreamResponse {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
+    pub inner: dynamo_protocols::types::CreateChatCompletionStreamResponse,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<serde_json::Value>,
 }
@@ -202,7 +202,7 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {

        // 2) OpenAI `response_format` (applies to assistant content, not tool calls)
        if let Some(response_format) = self.inner.response_format.as_ref() {
-            use dynamo_async_openai::types::ResponseFormat;
+            use dynamo_protocols::types::ResponseFormat;
            match response_format {
                ResponseFormat::Text => {}
                ResponseFormat::JsonObject => {
@@ -289,8 +289,8 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
    /// * `None` if no stop conditions are defined.
    fn get_stop(&self) -> Option<Vec<String>> {
        self.inner.stop.as_ref().map(|stop| match stop {
-            dynamo_async_openai::types::Stop::String(s) => vec![s.clone()],
-            dynamo_async_openai::types::Stop::StringArray(arr) => arr.clone(),
+            dynamo_protocols::types::Stop::String(s) => vec![s.clone()],
+            dynamo_protocols::types::Stop::StringArray(arr) => arr.clone(),
        })
    }


--- a/lib/llm/src/protocols/openai/chat_completions/aggregator.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/aggregator.rs
--- a/lib/llm/src/protocols/openai/chat_completions/delta.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/delta.rs
@@ -32,7 +32,7 @@ impl NvCreateChatCompletionRequest {
            // For non-streaming requests (stream=false), enable usage by default
            if self.inner.stream_options.is_none() {
                self.inner.stream_options =
-                    Some(dynamo_async_openai::types::ChatCompletionStreamOptions {
+                    Some(dynamo_protocols::types::ChatCompletionStreamOptions {
                        include_usage: true,
                        continuous_usage_stats: false,
                    });
@@ -116,9 +116,9 @@ pub struct DeltaGenerator {
    /// Optional system fingerprint for version tracking.
    system_fingerprint: Option<String>,
    /// Optional service tier information for the response.
-    service_tier: Option<dynamo_async_openai::types::ServiceTierResponse>,
+    service_tier: Option<dynamo_protocols::types::ServiceTierResponse>,
    /// Tracks token usage for the completion request.
-    usage: dynamo_async_openai::types::CompletionUsage,
+    usage: dynamo_protocols::types::CompletionUsage,
    /// Counter tracking the number of messages issued.
    msg_counter: u64,
    /// Configuration options for response generation.
@@ -147,7 +147,7 @@ impl DeltaGenerator {
        // but this will not be an issue until 2106.
        let now: u32 = now.try_into().expect("timestamp exceeds u32::MAX");

-        let usage = dynamo_async_openai::types::CompletionUsage {
+        let usage = dynamo_protocols::types::CompletionUsage {
            prompt_tokens: 0,
            completion_tokens: 0,
            total_tokens: 0,
@@ -194,7 +194,7 @@ impl DeltaGenerator {
        token_ids: &[TokenIdType],
        logprobs: Option<common::llm_backend::LogProbs>,
        top_logprobs: Option<common::llm_backend::TopLogprobs>,
-    ) -> Option<dynamo_async_openai::types::ChatChoiceLogprobs> {
+    ) -> Option<dynamo_protocols::types::ChatChoiceLogprobs> {
        if !self.options.enable_logprobs || logprobs.is_none() {
            return None;
        }
@@ -216,7 +216,7 @@ impl DeltaGenerator {
                .zip(top_logprobs)
                .map(|(((t, tid), lp), top_lps)| {
                    let converted = convert_backend_top_logprobs(&top_lps, t, *tid, lp);
-                    dynamo_async_openai::types::ChatCompletionTokenLogprob {
+                    dynamo_protocols::types::ChatCompletionTokenLogprob {
                        token: t.clone(),
                        logprob: lp,
                        bytes: token_to_utf8_bytes(t),
@@ -226,7 +226,7 @@ impl DeltaGenerator {
                .collect()
        });

-        Some(dynamo_async_openai::types::ChatChoiceLogprobs {
+        Some(dynamo_protocols::types::ChatChoiceLogprobs {
            content,
            refusal: None,
        })
@@ -242,22 +242,22 @@ impl DeltaGenerator {
    /// * `stop_reason` - Optional stop string or token that triggered the stop.
    ///
    /// # Returns
-    /// * An [`dynamo_async_openai::types::CreateChatCompletionStreamResponse`] instance representing the choice.
+    /// * An [`dynamo_protocols::types::CreateChatCompletionStreamResponse`] instance representing the choice.
    #[allow(deprecated)]
    pub fn create_choice(
        &mut self,
        index: u32,
        text: Option<String>,
-        finish_reason: Option<dynamo_async_openai::types::FinishReason>,
-        logprobs: Option<dynamo_async_openai::types::ChatChoiceLogprobs>,
-        stop_reason: Option<dynamo_async_openai::types::StopReason>,
+        finish_reason: Option<dynamo_protocols::types::FinishReason>,
+        logprobs: Option<dynamo_protocols::types::ChatChoiceLogprobs>,
+        stop_reason: Option<dynamo_protocols::types::StopReason>,
    ) -> NvCreateChatCompletionStreamResponse {
-        let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
-            content: text.map(dynamo_async_openai::types::ChatCompletionMessageContent::Text),
+        let delta = dynamo_protocols::types::ChatCompletionStreamResponseDelta {
+            content: text.map(dynamo_protocols::types::ChatCompletionMessageContent::Text),
            function_call: None,
            tool_calls: None,
            role: if self.msg_counter == 0 {
-                Some(dynamo_async_openai::types::Role::Assistant)
+                Some(dynamo_protocols::types::Role::Assistant)
            } else {
                None
            },
@@ -265,7 +265,7 @@ impl DeltaGenerator {
            reasoning_content: None,
        };

-        let choice = dynamo_async_openai::types::ChatChoiceStream {
+        let choice = dynamo_protocols::types::ChatChoiceStream {
            index,
            delta,
            finish_reason,
@@ -279,7 +279,7 @@ impl DeltaGenerator {
        // all intermediate chunks should have usage: null
        // The final usage chunk will be sent separately with empty choices
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: self.id.clone(),
                object: self.object.clone(),
                created: self.created,
@@ -306,7 +306,7 @@ impl DeltaGenerator {
        let usage = self.get_usage();

        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: self.id.clone(),
                object: self.object.clone(),
                created: self.created,
@@ -330,7 +330,7 @@ impl DeltaGenerator {
        self.options.continuous_usage_stats
    }

-    pub fn get_usage(&self) -> dynamo_async_openai::types::CompletionUsage {
+    pub fn get_usage(&self) -> dynamo_protocols::types::CompletionUsage {
        let mut usage = self.usage.clone();
        usage.total_tokens = usage.prompt_tokens.saturating_add(usage.completion_tokens);
        usage
@@ -387,18 +387,16 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes

        // Map backend finish reasons to OpenAI's finish reasons.
        let finish_reason = match delta.finish_reason {
-            Some(common::FinishReason::EoS) => Some(dynamo_async_openai::types::FinishReason::Stop),
-            Some(common::FinishReason::Stop) => {
-                Some(dynamo_async_openai::types::FinishReason::Stop)
-            }
+            Some(common::FinishReason::EoS) => Some(dynamo_protocols::types::FinishReason::Stop),
+            Some(common::FinishReason::Stop) => Some(dynamo_protocols::types::FinishReason::Stop),
            Some(common::FinishReason::Length) => {
-                Some(dynamo_async_openai::types::FinishReason::Length)
+                Some(dynamo_protocols::types::FinishReason::Length)
            }
            Some(common::FinishReason::Cancelled) => {
-                Some(dynamo_async_openai::types::FinishReason::Stop)
+                Some(dynamo_protocols::types::FinishReason::Stop)
            }
            Some(common::FinishReason::ContentFilter) => {
-                Some(dynamo_async_openai::types::FinishReason::ContentFilter)
+                Some(dynamo_protocols::types::FinishReason::ContentFilter)
            }
            Some(common::FinishReason::Error(err_msg)) => {
                return Err(anyhow::anyhow!(err_msg));
@@ -490,7 +488,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
        DeltaGenerator::is_continuous_usage_enabled(self)
    }

-    fn get_usage(&self) -> dynamo_async_openai::types::CompletionUsage {
+    fn get_usage(&self) -> dynamo_protocols::types::CompletionUsage {
        DeltaGenerator::get_usage(self)
    }

@@ -502,7 +500,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
 #[cfg(test)]
 mod tests {
    use super::*;
-    use dynamo_async_openai::types::{
+    use dynamo_protocols::types::{
        ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
        ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
    };

--- a/lib/llm/src/protocols/openai/chat_completions/jail.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0

 use async_stream::stream;
-use dynamo_async_openai::types::{
+use dynamo_protocols::types::{
    ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionMessageToolCallChunk,
    ChatCompletionStreamResponseDelta, FinishReason, FunctionCallStream, Role,
 };
@@ -116,7 +116,7 @@ fn create_choice_stream(
    content: &str,
    tool_calls: Option<Vec<ChatCompletionMessageToolCallChunk>>,
    finish_reason: Option<FinishReason>,
-    stop_reason: Option<dynamo_async_openai::types::StopReason>,
+    stop_reason: Option<dynamo_protocols::types::StopReason>,
    logprobs: Option<ChatChoiceLogprobs>,
 ) -> ChatChoiceStream {
    #[allow(deprecated)]
@@ -124,9 +124,9 @@ fn create_choice_stream(
        index,
        delta: ChatCompletionStreamResponseDelta {
            role,
-            content: Some(
-                dynamo_async_openai::types::ChatCompletionMessageContent::Text(content.to_string()),
-            ),
+            content: Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(
+                content.to_string(),
+            )),
            tool_calls,
            function_call: None,
            refusal: None,
@@ -543,8 +543,8 @@ impl JailedStream {
                        if let Some(ref content) = choice.delta.content {
                            // Jailing only applies to text content
                            let text_content = match content {
-                                dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => Some(text.as_str()),
-                                dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => None,
+                                dynamo_protocols::types::ChatCompletionMessageContent::Text(text) => Some(text.as_str()),
+                                dynamo_protocols::types::ChatCompletionMessageContent::Parts(_) => None,
                            };

                            if let Some(text) = text_content {
@@ -676,7 +676,7 @@ impl JailedStream {
                tracing::debug!("Stream ended while jailed, releasing accumulated content");
                // Create a finalization response carrying forward real stream metadata
                let dummy_response = NvCreateChatCompletionStreamResponse {
-                    inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+                    inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                        id: last_stream_id,
                        object: "chat.completion.chunk".to_string(),
                        created: last_stream_created,
@@ -932,7 +932,7 @@ impl JailedStream {
        ChatCompletionMessageToolCallChunk {
            index,
            id: Some(format!("call-{}", Uuid::new_v4())),
-            r#type: Some(dynamo_async_openai::types::ChatCompletionToolType::Function),
+            r#type: Some(dynamo_protocols::types::ChatCompletionToolType::Function),
            function: Some(FunctionCallStream {
                name: Some(name),
                arguments: Some(arguments),

--- a/lib/llm/src/protocols/openai/completions.rs
+++ b/lib/llm/src/protocols/openai/completions.rs
@@ -27,7 +27,7 @@ pub use delta::DeltaGenerator;
 #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionRequest {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateCompletionRequest,
+    pub inner: dynamo_protocols::types::CreateCompletionRequest,

    #[serde(flatten)]
    pub common: CommonExt,
@@ -47,27 +47,27 @@ pub struct NvCreateCompletionRequest {
 #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionResponse {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateCompletionResponse,
+    pub inner: dynamo_protocols::types::CreateCompletionResponse,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<serde_json::Value>,
 }

-impl ContentProvider for dynamo_async_openai::types::Choice {
+impl ContentProvider for dynamo_protocols::types::Choice {
    fn content(&self) -> String {
        self.text.clone()
    }
 }

-pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
+pub fn prompt_to_string(prompt: &dynamo_protocols::types::Prompt) -> String {
    match prompt {
-        dynamo_async_openai::types::Prompt::String(s) => s.clone(),
-        dynamo_async_openai::types::Prompt::StringArray(arr) => arr.join(" "), // Join strings with spaces
-        dynamo_async_openai::types::Prompt::IntegerArray(arr) => arr
+        dynamo_protocols::types::Prompt::String(s) => s.clone(),
+        dynamo_protocols::types::Prompt::StringArray(arr) => arr.join(" "), // Join strings with spaces
+        dynamo_protocols::types::Prompt::IntegerArray(arr) => arr
            .iter()
            .map(|&num| num.to_string())
            .collect::<Vec<_>>()
            .join(" "),
-        dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arr) => arr
+        dynamo_protocols::types::Prompt::ArrayOfIntegerArray(arr) => arr
            .iter()
            .map(|inner| {
                inner
@@ -82,12 +82,12 @@ pub fn prompt_to_string(prompt: &dynamo_async_openai::types::Prompt) -> String {
 }

 /// Get the batch size from a prompt (1 for single prompts, array length for batch prompts)
-pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usize {
+pub fn get_prompt_batch_size(prompt: &dynamo_protocols::types::Prompt) -> usize {
    match prompt {
-        dynamo_async_openai::types::Prompt::String(_) => 1,
-        dynamo_async_openai::types::Prompt::IntegerArray(_) => 1,
-        dynamo_async_openai::types::Prompt::StringArray(arr) => arr.len(),
-        dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arr) => arr.len(),
+        dynamo_protocols::types::Prompt::String(_) => 1,
+        dynamo_protocols::types::Prompt::IntegerArray(_) => 1,
+        dynamo_protocols::types::Prompt::StringArray(arr) => arr.len(),
+        dynamo_protocols::types::Prompt::ArrayOfIntegerArray(arr) => arr.len(),
    }
 }

@@ -95,21 +95,21 @@ pub fn get_prompt_batch_size(prompt: &dynamo_async_openai::types::Prompt) -> usi
 /// For single prompts, returns a clone regardless of index.
 /// For batch prompts, returns the prompt at the specified index.
 pub fn extract_single_prompt(
-    prompt: &dynamo_async_openai::types::Prompt,
+    prompt: &dynamo_protocols::types::Prompt,
    index: usize,
-) -> dynamo_async_openai::types::Prompt {
+) -> dynamo_protocols::types::Prompt {
    match prompt {
-        dynamo_async_openai::types::Prompt::String(s) => {
-            dynamo_async_openai::types::Prompt::String(s.clone())
+        dynamo_protocols::types::Prompt::String(s) => {
+            dynamo_protocols::types::Prompt::String(s.clone())
        }
-        dynamo_async_openai::types::Prompt::IntegerArray(arr) => {
-            dynamo_async_openai::types::Prompt::IntegerArray(arr.clone())
+        dynamo_protocols::types::Prompt::IntegerArray(arr) => {
+            dynamo_protocols::types::Prompt::IntegerArray(arr.clone())
        }
-        dynamo_async_openai::types::Prompt::StringArray(arr) => {
-            dynamo_async_openai::types::Prompt::String(arr[index].clone())
+        dynamo_protocols::types::Prompt::StringArray(arr) => {
+            dynamo_protocols::types::Prompt::String(arr[index].clone())
        }
-        dynamo_async_openai::types::Prompt::ArrayOfIntegerArray(arr) => {
-            dynamo_async_openai::types::Prompt::IntegerArray(arr[index].clone())
+        dynamo_protocols::types::Prompt::ArrayOfIntegerArray(arr) => {
+            dynamo_protocols::types::Prompt::IntegerArray(arr[index].clone())
        }
    }
 }
@@ -241,7 +241,7 @@ impl OpenAIStopConditionsProvider for NvCreateCompletionRequest {
    }

    fn get_stop(&self) -> Option<Vec<String>> {
-        use dynamo_async_openai::types::Stop;
+        use dynamo_protocols::types::Stop;
        self.inner.stop.as_ref().map(|s| match s {
            Stop::String(s) => vec![s.clone()],
            Stop::StringArray(arr) => arr.clone(),
@@ -287,10 +287,10 @@ impl ResponseFactory {

    pub fn make_response(
        &self,
-        choice: dynamo_async_openai::types::Choice,
-        usage: Option<dynamo_async_openai::types::CompletionUsage>,
+        choice: dynamo_protocols::types::Choice,
+        usage: Option<dynamo_protocols::types::CompletionUsage>,
    ) -> NvCreateCompletionResponse {
-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
@@ -361,7 +361,7 @@ impl TryFrom<NvCreateCompletionRequest> for common::CompletionRequest {
    }
 }

-impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types::Choice {
+impl TryFrom<common::StreamingCompletionResponse> for dynamo_protocols::types::Choice {
    type Error = anyhow::Error;

    fn try_from(response: common::StreamingCompletionResponse) -> Result<Self, Self::Error> {
@@ -382,10 +382,10 @@ impl TryFrom<common::StreamingCompletionResponse> for dynamo_async_openai::types
        // TODO handle aggregating logprobs
        let logprobs = None;

-        let finish_reason: Option<dynamo_async_openai::types::CompletionFinishReason> =
+        let finish_reason: Option<dynamo_protocols::types::CompletionFinishReason> =
            response.delta.finish_reason.map(Into::into);

-        let choice = dynamo_async_openai::types::Choice {
+        let choice = dynamo_protocols::types::Choice {
            text,
            index,
            logprobs,

--- a/lib/llm/src/protocols/openai/completions/aggregator.rs
+++ b/lib/llm/src/protocols/openai/completions/aggregator.rs
@@ -20,7 +20,7 @@ pub struct DeltaAggregator {
    id: String,
    model: String,
    created: u32,
-    usage: Option<dynamo_async_openai::types::CompletionUsage>,
+    usage: Option<dynamo_protocols::types::CompletionUsage>,
    system_fingerprint: Option<String>,
    choices: HashMap<u32, DeltaChoice>,
    error: Option<String>,
@@ -31,7 +31,7 @@ struct DeltaChoice {
    index: u32,
    text: String,
    finish_reason: Option<FinishReason>,
-    logprobs: Option<dynamo_async_openai::types::Logprobs>,
+    logprobs: Option<dynamo_protocols::types::Logprobs>,
 }

 impl Default for DeltaAggregator {
@@ -109,14 +109,14 @@ impl DeltaAggregator {

                        // Handle CompletionFinishReason -> FinishReason conversation
                        state_choice.finish_reason = match choice.finish_reason {
-                            Some(dynamo_async_openai::types::CompletionFinishReason::Stop) => {
+                            Some(dynamo_protocols::types::CompletionFinishReason::Stop) => {
                                Some(FinishReason::Stop)
                            }
-                            Some(dynamo_async_openai::types::CompletionFinishReason::Length) => {
+                            Some(dynamo_protocols::types::CompletionFinishReason::Length) => {
                                Some(FinishReason::Length)
                            }
                            Some(
-                                dynamo_async_openai::types::CompletionFinishReason::ContentFilter,
+                                dynamo_protocols::types::CompletionFinishReason::ContentFilter,
                            ) => Some(FinishReason::ContentFilter),
                            None => None,
                        };
@@ -124,7 +124,7 @@ impl DeltaAggregator {
                        // Update logprobs
                        if let Some(logprobs) = &choice.logprobs {
                            let state_lps = state_choice.logprobs.get_or_insert(
-                                dynamo_async_openai::types::Logprobs {
+                                dynamo_protocols::types::Logprobs {
                                    tokens: Vec::new(),
                                    token_logprobs: Vec::new(),
                                    top_logprobs: Vec::new(),
@@ -155,12 +155,12 @@ impl DeltaAggregator {
        let mut choices: Vec<_> = aggregator
            .choices
            .into_values()
-            .map(dynamo_async_openai::types::Choice::from)
+            .map(dynamo_protocols::types::Choice::from)
            .collect();

        choices.sort_by(|a, b| a.index.cmp(&b.index));

-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: aggregator.id,
            created: aggregator.created,
            usage: aggregator.usage,
@@ -179,11 +179,11 @@ impl DeltaAggregator {
    }
 }

-impl From<DeltaChoice> for dynamo_async_openai::types::Choice {
+impl From<DeltaChoice> for dynamo_protocols::types::Choice {
    fn from(delta: DeltaChoice) -> Self {
        let finish_reason = delta.finish_reason.map(Into::into);

-        dynamo_async_openai::types::Choice {
+        dynamo_protocols::types::Choice {
            index: delta.index,
            text: delta.text,
            finish_reason,
@@ -231,11 +231,11 @@ mod tests {
            .and_then(|s| FinishReason::from_str(s).ok())
            .map(Into::into);

-        let logprobs = logprob.map(|lp| dynamo_async_openai::types::Logprobs {
+        let logprobs = logprob.map(|lp| dynamo_protocols::types::Logprobs {
            tokens: vec![text.to_string()],
            token_logprobs: vec![Some(lp)],
            top_logprobs: vec![
-                serde_json::to_value(dynamo_async_openai::types::TopLogprobs {
+                serde_json::to_value(dynamo_protocols::types::TopLogprobs {
                    token: text.to_string(),
                    logprob: lp,
                    bytes: None,
@@ -245,13 +245,13 @@ mod tests {
            text_offset: vec![0],
        });

-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: "test_id".to_string(),
            model: "meta/llama-3.1-8b".to_string(),
            created: 1234567890,
            usage: None,
            system_fingerprint: None,
-            choices: vec![dynamo_async_openai::types::Choice {
+            choices: vec![dynamo_protocols::types::Choice {
                index,
                text: text.to_string(),
                finish_reason,
@@ -319,11 +319,11 @@ mod tests {
        assert_eq!(choice.text, "Hello,".to_string());
        assert_eq!(
            choice.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Length)
+            Some(dynamo_protocols::types::CompletionFinishReason::Length)
        );
        assert_eq!(
            choice.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Length)
+            Some(dynamo_protocols::types::CompletionFinishReason::Length)
        );
        assert!(choice.logprobs.is_none());
    }
@@ -355,7 +355,7 @@ mod tests {
        assert_eq!(choice.text, "Hello, world!".to_string());
        assert_eq!(
            choice.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_protocols::types::CompletionFinishReason::Stop)
        );
        assert_eq!(choice.logprobs.as_ref().unwrap().tokens.len(), 2);
        assert_eq!(
@@ -367,23 +367,23 @@ mod tests {
    #[tokio::test]
    async fn test_multiple_choices() {
        // Create a delta with multiple choices
-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: "test_id".to_string(),
            model: "meta/llama-3.1-8b".to_string(),
            created: 1234567890,
            usage: None,
            system_fingerprint: None,
            choices: vec![
-                dynamo_async_openai::types::Choice {
+                dynamo_protocols::types::Choice {
                    index: 0,
                    text: "Choice 0".to_string(),
-                    finish_reason: Some(dynamo_async_openai::types::CompletionFinishReason::Stop),
+                    finish_reason: Some(dynamo_protocols::types::CompletionFinishReason::Stop),
                    logprobs: None,
                },
-                dynamo_async_openai::types::Choice {
+                dynamo_protocols::types::Choice {
                    index: 1,
                    text: "Choice 1".to_string(),
-                    finish_reason: Some(dynamo_async_openai::types::CompletionFinishReason::Stop),
+                    finish_reason: Some(dynamo_protocols::types::CompletionFinishReason::Stop),
                    logprobs: None,
                },
            ],
@@ -418,11 +418,11 @@ mod tests {
        assert_eq!(choice0.text, "Choice 0".to_string());
        assert_eq!(
            choice0.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_protocols::types::CompletionFinishReason::Stop)
        );
        assert_eq!(
            choice0.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_protocols::types::CompletionFinishReason::Stop)
        );

        let choice1 = &response.inner.choices[1];
@@ -430,11 +430,11 @@ mod tests {
        assert_eq!(choice1.text, "Choice 1".to_string());
        assert_eq!(
            choice1.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_protocols::types::CompletionFinishReason::Stop)
        );
        assert_eq!(
            choice1.finish_reason,
-            Some(dynamo_async_openai::types::CompletionFinishReason::Stop)
+            Some(dynamo_protocols::types::CompletionFinishReason::Stop)
        );
    }
 }
--- a/lib/llm/src/protocols/openai/completions/delta.rs
+++ b/lib/llm/src/protocols/openai/completions/delta.rs
@@ -31,7 +31,7 @@ impl NvCreateCompletionRequest {
            // For non-streaming requests (stream=false), enable usage by default
            if self.inner.stream_options.is_none() {
                self.inner.stream_options =
-                    Some(dynamo_async_openai::types::ChatCompletionStreamOptions {
+                    Some(dynamo_protocols::types::ChatCompletionStreamOptions {
                        include_usage: true,
                        continuous_usage_stats: false,
                    });
@@ -95,7 +95,7 @@ pub struct DeltaGenerator {
    created: u32,
    model: String,
    system_fingerprint: Option<String>,
-    usage: dynamo_async_openai::types::CompletionUsage,
+    usage: dynamo_protocols::types::CompletionUsage,
    options: DeltaGeneratorOptions,
    tracker: Option<Arc<RequestTracker>>,
 }
@@ -113,7 +113,7 @@ impl DeltaGenerator {

        // Previously, our home-rolled CompletionUsage impl'd Default
        // PR !387 - https://github.com/64bit/async-openai/pull/387
-        let usage = dynamo_async_openai::types::CompletionUsage {
+        let usage = dynamo_protocols::types::CompletionUsage {
            completion_tokens: 0,
            prompt_tokens: 0,
            total_tokens: 0,
@@ -154,7 +154,7 @@ impl DeltaGenerator {
        token_ids: Vec<TokenIdType>,
        logprobs: Option<common::llm_backend::LogProbs>,
        top_logprobs: Option<common::llm_backend::TopLogprobs>,
-    ) -> Option<dynamo_async_openai::types::Logprobs> {
+    ) -> Option<dynamo_protocols::types::Logprobs> {
        if !self.options.enable_logprobs || logprobs.is_none() {
            return None;
        }
@@ -181,7 +181,7 @@ impl DeltaGenerator {
                .collect()
        });

-        Some(dynamo_async_openai::types::Logprobs {
+        Some(dynamo_protocols::types::Logprobs {
            tokens: toks.iter().map(|(t, _)| t.clone()).collect(),
            token_logprobs: tok_lps.into_iter().map(Some).collect(),
            text_offset: vec![],
@@ -193,21 +193,21 @@ impl DeltaGenerator {
        &self,
        index: u32,
        text: Option<String>,
-        finish_reason: Option<dynamo_async_openai::types::CompletionFinishReason>,
-        logprobs: Option<dynamo_async_openai::types::Logprobs>,
+        finish_reason: Option<dynamo_protocols::types::CompletionFinishReason>,
+        logprobs: Option<dynamo_protocols::types::Logprobs>,
    ) -> NvCreateCompletionResponse {
        // todo - update for tool calling

        // According to OpenAI spec: when stream_options.include_usage is true,
        // all intermediate chunks should have usage: null
        // The final usage chunk will be sent separately with empty choices
-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
            model: self.model.clone(),
            system_fingerprint: self.system_fingerprint.clone(),
-            choices: vec![dynamo_async_openai::types::Choice {
+            choices: vec![dynamo_protocols::types::Choice {
                text: text.unwrap_or_default(),
                index,
                finish_reason,
@@ -231,7 +231,7 @@ impl DeltaGenerator {
    pub fn create_usage_chunk(&self) -> NvCreateCompletionResponse {
        let usage = self.get_usage();

-        let inner = dynamo_async_openai::types::CreateCompletionResponse {
+        let inner = dynamo_protocols::types::CreateCompletionResponse {
            id: self.id.clone(),
            object: self.object.clone(),
            created: self.created,
@@ -254,7 +254,7 @@ impl DeltaGenerator {
        self.options.continuous_usage_stats
    }

-    pub fn get_usage(&self) -> dynamo_async_openai::types::CompletionUsage {
+    pub fn get_usage(&self) -> dynamo_protocols::types::CompletionUsage {
        let mut usage = self.usage.clone();
        usage.total_tokens = usage.prompt_tokens.saturating_add(usage.completion_tokens);
        usage
@@ -377,7 +377,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
        DeltaGenerator::is_continuous_usage_enabled(self)
    }

-    fn get_usage(&self) -> dynamo_async_openai::types::CompletionUsage {
+    fn get_usage(&self) -> dynamo_protocols::types::CompletionUsage {
        DeltaGenerator::get_usage(self)
    }


--- a/lib/llm/src/protocols/openai/embeddings.rs
+++ b/lib/llm/src/protocols/openai/embeddings.rs
@@ -15,7 +15,7 @@ pub use nvext::{NvExt, NvExtProvider};
 #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingRequest {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateEmbeddingRequest,
+    pub inner: dynamo_protocols::types::CreateEmbeddingRequest,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<NvExt>,
@@ -30,17 +30,17 @@ pub struct NvCreateEmbeddingRequest {
 #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingResponse {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateEmbeddingResponse,
+    pub inner: dynamo_protocols::types::CreateEmbeddingResponse,
 }

 impl NvCreateEmbeddingResponse {
    pub fn empty() -> Self {
        Self {
-            inner: dynamo_async_openai::types::CreateEmbeddingResponse {
+            inner: dynamo_protocols::types::CreateEmbeddingResponse {
                object: "list".to_string(),
                model: "embedding".to_string(),
                data: vec![],
-                usage: dynamo_async_openai::types::EmbeddingUsage {
+                usage: dynamo_protocols::types::EmbeddingUsage {
                    prompt_tokens: 0,
                    total_tokens: 0,
                },

--- a/lib/llm/src/protocols/openai/embeddings/aggregator.rs
+++ b/lib/llm/src/protocols/openai/embeddings/aggregator.rs
@@ -133,16 +133,16 @@ mod tests {
    use futures::stream;

    fn create_test_embedding_response(
-        embeddings: Vec<dynamo_async_openai::types::Embedding>,
+        embeddings: Vec<dynamo_protocols::types::Embedding>,
        prompt_tokens: u32,
        total_tokens: u32,
    ) -> Annotated<NvCreateEmbeddingResponse> {
        let response = NvCreateEmbeddingResponse {
-            inner: dynamo_async_openai::types::CreateEmbeddingResponse {
+            inner: dynamo_protocols::types::CreateEmbeddingResponse {
                object: "list".to_string(),
                model: "test-model".to_string(),
                data: embeddings,
-                usage: dynamo_async_openai::types::EmbeddingUsage {
+                usage: dynamo_protocols::types::EmbeddingUsage {
                    prompt_tokens,
                    total_tokens,
                },
@@ -166,7 +166,7 @@ mod tests {

    #[tokio::test]
    async fn test_single_embedding() {
-        let embedding = dynamo_async_openai::types::Embedding {
+        let embedding = dynamo_protocols::types::Embedding {
            index: 0,
            object: "embedding".to_string(),
            embedding: vec![0.1, 0.2, 0.3],
@@ -188,13 +188,13 @@ mod tests {

    #[tokio::test]
    async fn test_multiple_embeddings() {
-        let embedding1 = dynamo_async_openai::types::Embedding {
+        let embedding1 = dynamo_protocols::types::Embedding {
            index: 0,
            object: "embedding".to_string(),
            embedding: vec![0.1, 0.2, 0.3],
        };

-        let embedding2 = dynamo_async_openai::types::Embedding {
+        let embedding2 = dynamo_protocols::types::Embedding {
            index: 1,
            object: "embedding".to_string(),
            embedding: vec![0.4, 0.5, 0.6],

--- a/lib/llm/src/protocols/openai/images.rs
+++ b/lib/llm/src/protocols/openai/images.rs
@@ -14,7 +14,7 @@ pub use nvext::{NvExt, NvExtProvider};
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateImageRequest {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::CreateImageRequest,
+    pub inner: dynamo_protocols::types::CreateImageRequest,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<NvExt>,
@@ -28,13 +28,13 @@ pub struct NvCreateImageRequest {
 #[derive(Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvImagesResponse {
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::ImagesResponse,
+    pub inner: dynamo_protocols::types::ImagesResponse,
 }

 impl NvImagesResponse {
    pub fn empty() -> Self {
        Self {
-            inner: dynamo_async_openai::types::ImagesResponse {
+            inner: dynamo_protocols::types::ImagesResponse {
                created: 0,
                data: vec![],
            },

--- a/lib/llm/src/protocols/openai/nvext.rs
+++ b/lib/llm/src/protocols/openai/nvext.rs
@@ -216,7 +216,7 @@ pub struct AgentHints {

 // Re-export CacheControl types from dynamo-async-openai where they are canonically defined
 // alongside the Anthropic protocol types they originate from.
-pub use dynamo_async_openai::types::anthropic::{CacheControl, CacheControlType};
+pub use dynamo_protocols::types::anthropic::{CacheControl, CacheControlType};

 impl Default for NvExt {
    fn default() -> Self {

--- a/lib/llm/src/protocols/openai/responses/mod.rs
+++ b/lib/llm/src/protocols/openai/responses/mod.rs
@@ -3,7 +3,7 @@

 pub mod stream_converter;

-use dynamo_async_openai::types::responses::{
+use dynamo_protocols::types::responses::{
    AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem,
    InputParam, InputRole, InputTokenDetails, Instructions, Item, MessageItem, OutputItem,
    OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent, OutputTokenDetails,
@@ -11,7 +11,7 @@ use dynamo_async_openai::types::responses::{
    ServiceTier, Status, Summary, SummaryPart, TextResponseFormatConfiguration, Tool,
    ToolChoiceOptions, ToolChoiceParam, Truncation,
 };
-use dynamo_async_openai::types::{
+use dynamo_protocols::types::{
    ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
    ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent,
    ChatCompletionRequestMessage, ChatCompletionRequestMessageContentPartImage,
@@ -38,7 +38,7 @@ use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
 pub struct NvCreateResponse {
    /// Flattened CreateResponse fields (model, input, temperature, etc.)
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::responses::CreateResponse,
+    pub inner: dynamo_protocols::types::responses::CreateResponse,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<NvExt>,
@@ -48,7 +48,7 @@ pub struct NvCreateResponse {
 pub struct NvResponse {
    /// Flattened Response fields.
    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::responses::Response,
+    pub inner: dynamo_protocols::types::responses::Response,

    /// NVIDIA extension field for response metadata (worker IDs, etc.)
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -144,12 +144,12 @@ impl OpenAIStopConditionsProvider for NvCreateResponse {

 /// Convert a Responses API ImageDetail to the Chat Completions ImageDetail.
 fn convert_image_detail(
-    detail: &dynamo_async_openai::types::responses::ImageDetail,
+    detail: &dynamo_protocols::types::responses::ImageDetail,
 ) -> ChatImageDetail {
    match detail {
-        dynamo_async_openai::types::responses::ImageDetail::Auto => ChatImageDetail::Auto,
-        dynamo_async_openai::types::responses::ImageDetail::Low => ChatImageDetail::Low,
-        dynamo_async_openai::types::responses::ImageDetail::High => ChatImageDetail::High,
+        dynamo_protocols::types::responses::ImageDetail::Auto => ChatImageDetail::Auto,
+        dynamo_protocols::types::responses::ImageDetail::Low => ChatImageDetail::Low,
+        dynamo_protocols::types::responses::ImageDetail::High => ChatImageDetail::High,
    }
 }

@@ -316,7 +316,7 @@ fn convert_input_items_to_messages(
                            tool_calls: Some(vec![ChatCompletionMessageToolCall {
                                id: fc.call_id.clone(),
                                r#type: ChatCompletionToolType::Function,
-                                function: dynamo_async_openai::types::FunctionCall {
+                                function: dynamo_protocols::types::FunctionCall {
                                    name: fc.name.clone(),
                                    arguments: fc.arguments.clone(),
                                },
@@ -349,10 +349,10 @@ fn convert_input_items_to_messages(
            InputItem::EasyMessage(easy) => {
                // Handle easy input messages based on role
                let content_text = match &easy.content {
-                    dynamo_async_openai::types::responses::EasyInputContent::Text(text) => {
+                    dynamo_protocols::types::responses::EasyInputContent::Text(text) => {
                        text.clone()
                    }
-                    dynamo_async_openai::types::responses::EasyInputContent::ContentList(parts) => {
+                    dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => {
                        convert_input_content_to_text(parts)
                    }
                };
@@ -737,10 +737,8 @@ pub fn chat_completion_to_response(
        // Handle text content -- also parse <tool_call> blocks from models
        // that emit tool calls as text (e.g. Qwen3)
        let content_text = match choice.message.content {
-            Some(dynamo_async_openai::types::ChatCompletionMessageContent::Text(text)) => {
-                Some(text)
-            }
-            Some(dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_)) => {
+            Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(text)) => Some(text),
+            Some(dynamo_protocols::types::ChatCompletionMessageContent::Parts(_)) => {
                tracing::warn!(
                    "Multimodal content in responses API not yet supported, using placeholder"
                );
@@ -880,12 +878,12 @@ pub fn chat_completion_to_response(

 #[cfg(test)]
 mod tests {
-    use dynamo_async_openai::types::responses::{
+    use dynamo_protocols::types::responses::{
        CreateResponse, FunctionCallOutput, FunctionCallOutputItemParam, FunctionTool,
        FunctionToolCall, ImageDetail, InputContent, InputImageContent, InputItem, InputMessage,
        InputParam, InputRole, InputTextContent, Item, MessageItem, Tool,
    };
-    use dynamo_async_openai::types::{
+    use dynamo_protocols::types::{
        ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent,
    };

@@ -1167,19 +1165,17 @@ mod tests {
    fn test_into_nvresponse_from_chat_response() {
        let now = 1_726_000_000;
        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                id: "chatcmpl-xyz".into(),
-                choices: vec![dynamo_async_openai::types::ChatChoice {
+                choices: vec![dynamo_protocols::types::ChatChoice {
                    index: 0,
-                    message: dynamo_async_openai::types::ChatCompletionResponseMessage {
-                        content: Some(
-                            dynamo_async_openai::types::ChatCompletionMessageContent::Text(
+                    message: dynamo_protocols::types::ChatCompletionResponseMessage {
+                        content: Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(
                            "This is a reply".to_string(),
-                            ),
-                        ),
+                        )),
                        refusal: None,
                        tool_calls: None,
-                        role: dynamo_async_openai::types::Role::Assistant,
+                        role: dynamo_protocols::types::Role::Assistant,
                        function_call: None,
                        audio: None,
                        reasoning_content: None,
@@ -1225,22 +1221,22 @@ mod tests {
    fn test_response_with_tool_calls() {
        let now = 1_726_000_000;
        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                id: "chatcmpl-xyz".into(),
-                choices: vec![dynamo_async_openai::types::ChatChoice {
+                choices: vec![dynamo_protocols::types::ChatChoice {
                    index: 0,
-                    message: dynamo_async_openai::types::ChatCompletionResponseMessage {
+                    message: dynamo_protocols::types::ChatCompletionResponseMessage {
                        content: None,
                        refusal: None,
                        tool_calls: Some(vec![ChatCompletionMessageToolCall {
                            id: "call_abc".into(),
                            r#type: ChatCompletionToolType::Function,
-                            function: dynamo_async_openai::types::FunctionCall {
+                            function: dynamo_protocols::types::FunctionCall {
                                name: "get_weather".into(),
                                arguments: r#"{"location":"SF"}"#.into(),
                            },
                        }]),
-                        role: dynamo_async_openai::types::Role::Assistant,
+                        role: dynamo_protocols::types::Role::Assistant,
                        function_call: None,
                        audio: None,
                        reasoning_content: None,
@@ -1335,8 +1331,8 @@ thinking

    #[test]
    fn test_reasoning_effort_mapped_to_chat_completion() {
-        use dynamo_async_openai::types::ReasoningEffort;
-        use dynamo_async_openai::types::responses::Reasoning;
+        use dynamo_protocols::types::ReasoningEffort;
+        use dynamo_protocols::types::responses::Reasoning;

        let mut req = make_response_with_input("think hard");
        req.inner.reasoning = Some(Reasoning {
@@ -1357,8 +1353,8 @@ thinking

    #[test]
    fn test_text_format_json_object_mapped() {
-        use dynamo_async_openai::types::ResponseFormat;
-        use dynamo_async_openai::types::responses::{
+        use dynamo_protocols::types::ResponseFormat;
+        use dynamo_protocols::types::responses::{
            ResponseTextParam, TextResponseFormatConfiguration,
        };

@@ -1374,10 +1370,10 @@ thinking

    #[test]
    fn test_text_format_json_schema_mapped() {
-        use dynamo_async_openai::types::responses::{
+        use dynamo_protocols::types::responses::{
            ResponseTextParam, TextResponseFormatConfiguration,
        };
-        use dynamo_async_openai::types::{ResponseFormat, ResponseFormatJsonSchema};
+        use dynamo_protocols::types::{ResponseFormat, ResponseFormatJsonSchema};

        let schema = ResponseFormatJsonSchema {
            name: "city".into(),
@@ -1402,7 +1398,7 @@ thinking

    #[test]
    fn test_text_format_plain_text_leaves_response_format_none() {
-        use dynamo_async_openai::types::responses::{
+        use dynamo_protocols::types::responses::{
            ResponseTextParam, TextResponseFormatConfiguration,
        };

@@ -1418,8 +1414,8 @@ thinking

    #[test]
    fn test_service_tier_mapped_to_chat_completion() {
-        use dynamo_async_openai::types::ServiceTier as ChatServiceTier;
-        use dynamo_async_openai::types::responses::ServiceTier as RespServiceTier;
+        use dynamo_protocols::types::ServiceTier as ChatServiceTier;
+        use dynamo_protocols::types::responses::ServiceTier as RespServiceTier;

        let mut req = make_response_with_input("priority");
        req.inner.service_tier = Some(RespServiceTier::Priority);
@@ -1430,8 +1426,8 @@ thinking

    #[test]
    fn test_response_echoes_reasoning() {
-        use dynamo_async_openai::types::ReasoningEffort;
-        use dynamo_async_openai::types::responses::Reasoning;
+        use dynamo_protocols::types::ReasoningEffort;
+        use dynamo_protocols::types::responses::Reasoning;

        let params = ResponseParams {
            reasoning: Some(Reasoning {
@@ -1442,7 +1438,7 @@ thinking
        };

        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                choices: vec![],
                created: 0,
                id: "test".into(),
@@ -1462,7 +1458,7 @@ thinking

    #[test]
    fn test_response_echoes_text_format() {
-        use dynamo_async_openai::types::responses::{
+        use dynamo_protocols::types::responses::{
            ResponseTextParam, TextResponseFormatConfiguration,
        };

@@ -1475,7 +1471,7 @@ thinking
        };

        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                choices: vec![],
                created: 0,
                id: "test".into(),
@@ -1495,7 +1491,7 @@ thinking

    #[test]
    fn test_response_echoes_service_tier() {
-        use dynamo_async_openai::types::responses::ServiceTier;
+        use dynamo_protocols::types::responses::ServiceTier;

        let params = ResponseParams {
            service_tier: Some(ServiceTier::Flex),
@@ -1503,7 +1499,7 @@ thinking
        };

        let chat_resp = NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                choices: vec![],
                created: 0,
                id: "test".into(),
@@ -1522,7 +1518,7 @@ thinking

    #[test]
    fn test_output_message_deserializes_without_id_and_status() {
-        use dynamo_async_openai::types::responses::{InputItem, Item, MessageItem};
+        use dynamo_protocols::types::responses::{InputItem, Item, MessageItem};

        let json = serde_json::json!({
            "role": "assistant",
@@ -1544,7 +1540,7 @@ thinking

    #[test]
    fn test_output_message_with_id_and_status_still_works() {
-        use dynamo_async_openai::types::responses::{InputItem, Item, MessageItem, OutputStatus};
+        use dynamo_protocols::types::responses::{InputItem, Item, MessageItem, OutputStatus};

        let json = serde_json::json!({
            "role": "assistant",
@@ -1567,17 +1563,17 @@ thinking
    // ── PR2: include filtering + truncation echo-back tests ──

    fn make_chat_resp_with_text(text: &str) -> NvCreateChatCompletionResponse {
-        use dynamo_async_openai::types::{
+        use dynamo_protocols::types::{
            ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason,
        };
        NvCreateChatCompletionResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionResponse {
                choices: vec![ChatChoice {
                    index: 0,
                    #[allow(deprecated)]
                    message: ChatCompletionResponseMessage {
                        content: Some(ChatCompletionMessageContent::Text(text.into())),
-                        role: dynamo_async_openai::types::Role::Assistant,
+                        role: dynamo_protocols::types::Role::Assistant,
                        tool_calls: None,
                        refusal: None,
                        reasoning_content: None,
@@ -1622,7 +1618,7 @@ thinking

    #[test]
    fn test_include_logprobs_kept_when_requested() {
-        use dynamo_async_openai::types::responses::IncludeEnum;
+        use dynamo_protocols::types::responses::IncludeEnum;

        let chat_resp = make_chat_resp_with_text("hello");
        let params = ResponseParams {
@@ -1650,7 +1646,7 @@ thinking

    #[test]
    fn test_truncation_auto_echoed_back() {
-        use dynamo_async_openai::types::responses::Truncation;
+        use dynamo_protocols::types::responses::Truncation;

        let chat_resp = make_chat_resp_with_text("hello");
        let params = ResponseParams {

--- a/lib/llm/src/protocols/openai/responses/stream_converter.rs
+++ b/lib/llm/src/protocols/openai/responses/stream_converter.rs
@@ -12,7 +12,7 @@
 use std::time::{SystemTime, UNIX_EPOCH};

 use axum::response::sse::Event;
-use dynamo_async_openai::types::responses::{
+use dynamo_protocols::types::responses::{
    AssistantRole, FunctionToolCall, InputTokenDetails, Instructions, OutputContent, OutputItem,
    OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent, OutputTokenDetails,
    Response, ResponseCompletedEvent, ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
@@ -24,7 +24,7 @@ use dynamo_async_openai::types::responses::{
 };
 use uuid::Uuid;

-use dynamo_async_openai::types::ChatCompletionMessageContent;
+use dynamo_protocols::types::ChatCompletionMessageContent;

 use super::ResponseParams;
 use crate::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
@@ -673,7 +673,7 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
 mod tests {
    use super::*;
    use crate::protocols::unified::ResponsesContext;
-    use dynamo_async_openai::types::{
+    use dynamo_protocols::types::{
        ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionMessageToolCallChunk,
        ChatCompletionStreamResponseDelta, ChatCompletionToolType, FunctionCallStream,
    };
@@ -704,7 +704,7 @@ mod tests {
    ) -> NvCreateChatCompletionStreamResponse {
        #[allow(deprecated)]
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: "chat-1".into(),
                choices: vec![ChatChoiceStream {
                    index: 0,
@@ -742,7 +742,7 @@ mod tests {
    fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
        #[allow(deprecated)]
        NvCreateChatCompletionStreamResponse {
-            inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
+            inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
                id: "chat-1".into(),
                choices: vec![ChatChoiceStream {
                    index: 0,