// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 pub mod stream_converter; use std::collections::HashMap; use dynamo_protocols::types::responses::{ AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem, InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item, MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent, OutputTokenDetails, PromptCacheRetention, Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage, Role as ResponseRole, ServiceTier, Status, SummaryPart, SummaryTextContent, TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam, Truncation, }; use dynamo_protocols::types::{ ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestMessage, ChatCompletionRequestMessageContentPartImage, ChatCompletionRequestMessageContentPartText, ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent, ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent, ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent, ChatCompletionRequestUserMessageContentPart, ChatCompletionTool, ChatCompletionToolChoiceOption, ChatCompletionToolType, CreateChatCompletionRequest, FunctionName, FunctionObject, FunctionType, ImageDetail as ChatImageDetail, ImageUrl, ReasoningContent, ResponseFormat, ServiceTier as ChatServiceTier, }; use dynamo_runtime::protocols::annotated::AnnotationsProvider; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use uuid::Uuid; use validator::Validate; use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse}; use super::nvext::{NvExt, NvExtProvider}; use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider}; /// Request body for `POST /v1/responses`. Uses a plain /// `#[derive(Deserialize)]` — the relaxed input shapes are handled by /// Dynamo-owning the input chain in `dynamo_protocols::types::responses` /// (see that crate's `CLAUDE.md`), not by a custom pre-parse JSON patcher. /// An earlier iteration of this type carried a hand-written `impl Deserialize` /// that walked `serde_json::Value` to inject synthetic defaults for missing /// `id` / `status` / `annotations`; that was replaced by typed ownership for /// correctness and to avoid the double-deserialize cost. #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)] pub struct NvCreateResponse { /// Flattened CreateResponse fields (model, input, temperature, etc.). /// /// `CreateResponse` and its `input` chain (`InputParam`, `InputItem`, /// `Item`, `MessageItem`, `InputOutputMessage`, `InputOutputMessageContent`, /// `InputOutputTextContent`) are Dynamo-owned in `dynamo-protocols`. They /// mirror upstream async-openai but accept the relaxed shapes real clients /// emit (optional `id` / `status` / `content` on assistant messages, /// optional `annotations` on `output_text` parts). See /// `dynamo_protocols::types::responses` for the full rationale. #[serde(flatten)] #[schema(value_type = Object)] pub inner: dynamo_protocols::types::responses::CreateResponse, #[serde(skip_serializing_if = "Option::is_none")] pub nvext: Option, } #[derive(ToSchema, Deserialize, Validate, Debug, Clone)] pub struct NvResponse { /// Flattened Response fields (includes upstream + extended spec fields). #[serde(flatten)] #[schema(value_type = Object)] pub inner: dynamo_protocols::types::responses::Response, /// NVIDIA extension field for response metadata (worker IDs, etc.) #[serde(skip_serializing_if = "Option::is_none")] pub nvext: Option, /// OpenResponses spec requires these as non-null scalars on every response, /// but async-openai's `Response` doesn't model them. Populated from the /// originating request. Surfaced during serialization (see `Serialize` /// impl below); not persisted as top-level fields on the inner struct. #[serde(default)] pub presence_penalty: f32, #[serde(default)] pub frequency_penalty: f32, #[serde(default)] pub store: bool, } /// Patch an already-serialized `Response` JSON object to match the /// OpenResponses spec. Applied both to one-shot `NvResponse` serialization /// and to every `Response` embedded inside a streaming event payload. /// /// Reconciles two spec gaps between upstream async-openai's `Response` and /// the OpenResponses spec: /// /// 1. Fields the spec requires as `T | null` that upstream marks /// `Option` with `skip_serializing_if = Option::is_none`. These are /// silently dropped when None; the spec wants them present as null. /// 2. Fields the spec requires (`presence_penalty`, `frequency_penalty`, /// `store`) that are absent from upstream `Response` entirely. /// /// Rather than fork the upstream output chain (which would cascade into /// `OutputItem`, streaming events, and a long tail of sub-types, per /// `lib/protocols/CLAUDE.md`), we patch the serialized JSON. Adds a /// single `serde_json::to_value` round-trip per response, which is /// negligible next to tokenization/inference cost. pub(crate) fn patch_response_for_spec( obj: &mut serde_json::Map, presence_penalty: f32, frequency_penalty: f32, store: bool, ) { for key in dynamo_protocols::types::responses::SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS { obj.entry(*key).or_insert(serde_json::Value::Null); } obj.insert( "presence_penalty".into(), serde_json::json!(presence_penalty), ); obj.insert( "frequency_penalty".into(), serde_json::json!(frequency_penalty), ); obj.insert("store".into(), serde_json::json!(store)); } impl Serialize for NvResponse { fn serialize(&self, serializer: S) -> Result { let mut value = serde_json::to_value(&self.inner).map_err(serde::ser::Error::custom)?; let serde_json::Value::Object(obj) = &mut value else { return value.serialize(serializer); }; patch_response_for_spec( obj, self.presence_penalty, self.frequency_penalty, self.store, ); if let Some(nvext) = &self.nvext { obj.insert("nvext".into(), nvext.clone()); } value.serialize(serializer) } } /// Implements `NvExtProvider` for `NvCreateResponse`, /// providing access to NVIDIA-specific extensions. impl NvExtProvider for NvCreateResponse { fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } fn raw_prompt(&self) -> Option { None } } /// Implements `AnnotationsProvider` for `NvCreateResponse`, /// enabling retrieval and management of request annotations. impl AnnotationsProvider for NvCreateResponse { fn annotations(&self) -> Option> { self.nvext .as_ref() .and_then(|nvext| nvext.annotations.clone()) } fn has_annotation(&self, annotation: &str) -> bool { self.nvext .as_ref() .and_then(|nvext| nvext.annotations.as_ref()) .map(|annotations| annotations.contains(&annotation.to_string())) .unwrap_or(false) } } impl OpenAISamplingOptionsProvider for NvCreateResponse { fn get_temperature(&self) -> Option { self.inner.temperature } fn get_top_p(&self) -> Option { self.inner.top_p } fn get_frequency_penalty(&self) -> Option { None } fn get_presence_penalty(&self) -> Option { None } fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } fn get_seed(&self) -> Option { None } fn get_n(&self) -> Option { None } fn get_best_of(&self) -> Option { None } } impl OpenAIStopConditionsProvider for NvCreateResponse { #[allow(deprecated)] fn get_max_tokens(&self) -> Option { self.inner.max_output_tokens } fn get_min_tokens(&self) -> Option { None } fn get_stop(&self) -> Option> { None } fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } } // --------------------------------------------------------------------------- // Responses API -> Chat Completions conversion // --------------------------------------------------------------------------- /// Convert a Responses API ImageDetail to the Chat Completions ImageDetail. /// The responses module re-exports an `ImageDetail` from the upstream async-openai /// crate which is distinct from `dynamo_protocols::types::ImageDetail` (chat). /// We bridge via serde to avoid direct cross-crate type dependencies. fn convert_image_detail_str(detail: &impl serde::Serialize) -> ChatImageDetail { match serde_json::to_value(detail) .ok() .and_then(|v| v.as_str().map(String::from)) .as_deref() { Some("low") => ChatImageDetail::Low, Some("high") => ChatImageDetail::High, _ => ChatImageDetail::Auto, } } /// Convert a slice of InputContent to ChatCompletionRequestUserMessageContent. fn convert_input_content_to_user_content( content: &[InputContent], ) -> Result { // If there's a single InputText, treat as simple text if content.len() == 1 && let InputContent::InputText(t) = &content[0] { return Ok(ChatCompletionRequestUserMessageContent::Text( t.text.clone(), )); } let mut chat_parts = Vec::with_capacity(content.len()); for part in content { match part { InputContent::InputText(t) => { chat_parts.push(ChatCompletionRequestUserMessageContentPart::Text( ChatCompletionRequestMessageContentPartText { text: t.text.clone(), }, )); } InputContent::InputImage(img) => { if img.file_id.is_some() && img.image_url.is_none() { return Err(anyhow::anyhow!( "Image input by file_id is not yet supported" )); } let url_str = img .image_url .as_deref() .ok_or_else(|| anyhow::anyhow!("input_image requires image_url"))?; let url = url::Url::parse(url_str) .map_err(|e| anyhow::anyhow!("Invalid image URL '{}': {}", url_str, e))?; chat_parts.push(ChatCompletionRequestUserMessageContentPart::ImageUrl( ChatCompletionRequestMessageContentPartImage { image_url: ImageUrl { url, detail: Some(convert_image_detail_str(&img.detail)), uuid: None, }, }, )); } // TODO: handle InputVideo / InputAudio when upstream adds them InputContent::InputFile(_) => { return Err(anyhow::anyhow!("File input content is not yet supported")); } } } Ok(ChatCompletionRequestUserMessageContent::Array(chat_parts)) } /// Convert a slice of InputContent to a plain text string (for system/developer/assistant messages). fn convert_input_content_to_text(content: &[InputContent]) -> String { content .iter() .filter_map(|p| match p { InputContent::InputText(t) => Some(t.text.as_str()), _ => None, }) .collect::>() .join("") } /// Counterpart to `convert_input_content_to_text` for upstream's /// `InputContent`. Upstream's enum appears inside `FunctionCallOutput::Content` /// and `EasyInputContent::ContentList`, neither of which is Dynamo-owned, so /// payloads deserialized through those paths land as upstream variants. fn convert_upstream_input_content_to_text( content: &[dynamo_protocols::types::responses::UpstreamInputContent], ) -> String { use dynamo_protocols::types::responses::UpstreamInputContent; content .iter() .filter_map(|p| match p { UpstreamInputContent::InputText(t) => Some(t.text.as_str()), _ => None, }) .collect::>() .join("") } /// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall, /// Reasoning, assistant EasyMessage). Chat Completions represents an assistant /// turn as a single message carrying `content`, `tool_calls`, and /// `reasoning_content`, so we coalesce adjacent assistant-side Responses input /// items before emitting. /// /// `touched` records whether any assistant-side item was seen in the current /// run. Without it, a standalone assistant message with empty text (or only /// Refusal content parts that this converter currently strips) would be lost /// entirely — breaking turn boundaries the model relies on. #[derive(Default)] struct PendingAssistant { content: Option, reasoning_content: Option, tool_calls: Vec, touched: bool, } impl PendingAssistant { fn push_text(&mut self, text: &str) { self.touched = true; if text.is_empty() { return; } match self.content.as_mut() { Some(existing) => existing.push_str(text), None => self.content = Some(text.to_string()), } } /// Route prior-turn reasoning summary text into the pending assistant's /// `reasoning_content`. Codex and the Agents SDK round-trip `Item::Reasoning` /// mid-turn so the model can see its own chain-of-thought as input context. fn push_reasoning(&mut self, text: &str) { self.touched = true; if text.is_empty() { return; } match self.reasoning_content.as_mut() { Some(existing) => existing.push_str(text), None => self.reasoning_content = Some(text.to_string()), } } fn push_tool_call(&mut self, call: ChatCompletionMessageToolCall) { self.touched = true; self.tool_calls.push(call); } fn flush_into(self, out: &mut Vec) { if !self.touched { return; } // Content rules: // - real text pushed → emit Some(Text(text)) // - pure tool-call turn (no text, has tool_calls) → emit None, matching // Chat Completions spec's nullable-content contract and the converter's // behavior before the coalescing refactor. // - turn-boundary preservation (assistant item seen but no text, no // tool_calls) → emit Some(Text("")) so adjacent user turns aren't // silently merged. let content = if self.content.is_some() || self.tool_calls.is_empty() { Some( self.content .map(ChatCompletionRequestAssistantMessageContent::Text) .unwrap_or_else(|| { ChatCompletionRequestAssistantMessageContent::Text(String::new()) }), ) } else { None }; out.push(ChatCompletionRequestMessage::Assistant( ChatCompletionRequestAssistantMessage { content, reasoning_content: self.reasoning_content.map(ReasoningContent::Text), refusal: None, name: None, audio: None, tool_calls: if self.tool_calls.is_empty() { None } else { Some(self.tool_calls) }, #[allow(deprecated)] function_call: None, }, )); } } /// Convert InputParam::Items to a Vec of ChatCompletionRequestMessages. fn convert_input_items_to_messages( items: &[InputItem], ) -> Result, anyhow::Error> { let mut messages = Vec::with_capacity(items.len()); let mut pending = PendingAssistant::default(); for item in items { match item { InputItem::Item(inner_item) => match inner_item { Item::Message(msg_item) => match msg_item { MessageItem::Input(msg) => { std::mem::take(&mut pending).flush_into(&mut messages); let chat_msg = match msg.role { InputRole::System | InputRole::Developer => { let text = convert_input_content_to_text(&msg.content); ChatCompletionRequestMessage::System( ChatCompletionRequestSystemMessage { content: ChatCompletionRequestSystemMessageContent::Text( text, ), name: None, }, ) } InputRole::User => { let content = convert_input_content_to_user_content(&msg.content)?; ChatCompletionRequestMessage::User( ChatCompletionRequestUserMessage { content, name: None, }, ) } }; messages.push(chat_msg); } MessageItem::Output(out_msg) => { // Fold Refusal parts into the assistant's text content // (same turn-position they appeared in). Upstream // `ChatCompletionRequestAssistantMessage` has a // dedicated `refusal` field, but most chat templates // render only `content`; putting refusal text inline // preserves it across turns without requiring template // awareness of a separate refusal field. let text = out_msg .content .iter() .map(|c| match c { InputOutputMessageContent::OutputText(t) => t.text.as_str(), InputOutputMessageContent::Refusal(r) => r.refusal.as_str(), }) .collect::>() .join(""); pending.push_text(&text); } }, Item::FunctionCall(fc) => { pending.push_tool_call(ChatCompletionMessageToolCall { id: fc.call_id.clone(), r#type: FunctionType::Function, function: dynamo_protocols::types::FunctionCall { name: fc.name.clone(), arguments: fc.arguments.clone(), }, }); } Item::FunctionCallOutput(fco) => { std::mem::take(&mut pending).flush_into(&mut messages); let output_text = match &fco.output { FunctionCallOutput::Text(text) => text.clone(), FunctionCallOutput::Content(parts) => { convert_upstream_input_content_to_text(parts) } }; messages.push(ChatCompletionRequestMessage::Tool( ChatCompletionRequestToolMessage { content: ChatCompletionRequestToolMessageContent::Text(output_text), tool_call_id: fco.call_id.clone(), }, )); } Item::Reasoning(r) => { let text = r .summary .iter() .map(|SummaryPart::SummaryText(t)| t.text.as_str()) .collect::>() .join(""); pending.push_reasoning(&text); } other => { // Unknown / unsupported variants (ComputerCall, WebSearchCall, // tool-output items other than FunctionCallOutput, etc.). We do // not have a faithful Chat Completions mapping, but silently // consuming them without flushing would let a following // FunctionCall coalesce with tool_calls from a different // semantic turn. Flush first, then skip. tracing::debug!( "Skipping unsupported input item type during conversion: {:?}", std::mem::discriminant(other) ); std::mem::take(&mut pending).flush_into(&mut messages); } }, InputItem::EasyMessage(easy) => { let content_text = match &easy.content { dynamo_protocols::types::responses::EasyInputContent::Text(text) => { text.clone() } dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => { convert_upstream_input_content_to_text(parts) } }; match easy.role { ResponseRole::System | ResponseRole::Developer => { std::mem::take(&mut pending).flush_into(&mut messages); messages.push(ChatCompletionRequestMessage::System( ChatCompletionRequestSystemMessage { content: ChatCompletionRequestSystemMessageContent::Text( content_text, ), name: None, }, )); } ResponseRole::User => { std::mem::take(&mut pending).flush_into(&mut messages); messages.push(ChatCompletionRequestMessage::User( ChatCompletionRequestUserMessage { content: ChatCompletionRequestUserMessageContent::Text( content_text, ), name: None, }, )); } ResponseRole::Assistant => { pending.push_text(&content_text); } } } InputItem::ItemReference(_) => { // Skip item references } } } pending.flush_into(&mut messages); Ok(messages) } /// Convert Responses API Tool to ChatCompletionTool. fn convert_tools(tools: &[Tool]) -> Vec { tools .iter() .filter_map(|tool| match tool { Tool::Function(f) => Some(ChatCompletionTool { r#type: ChatCompletionToolType::Function, function: FunctionObject { name: f.name.clone(), description: f.description.clone(), parameters: f.parameters.clone(), strict: f.strict, }, }), _ => None, // Only function tools are forwarded to chat completions }) .collect() } /// Convert Responses API ToolChoiceParam to ChatCompletionToolChoiceOption. fn convert_tool_choice(tc: &ToolChoiceParam) -> ChatCompletionToolChoiceOption { match tc { ToolChoiceParam::Mode(mode) => match mode { ToolChoiceOptions::None => ChatCompletionToolChoiceOption::None, ToolChoiceOptions::Auto => ChatCompletionToolChoiceOption::Auto, ToolChoiceOptions::Required => ChatCompletionToolChoiceOption::Required, }, ToolChoiceParam::Function(f) => { ChatCompletionToolChoiceOption::Named(ChatCompletionNamedToolChoice { r#type: ChatCompletionToolType::Function, function: FunctionName { name: f.name.clone(), }, }) } ToolChoiceParam::Hosted(_) => { // Hosted tools are not forwarded to chat completions ChatCompletionToolChoiceOption::Auto } _ => { // Other tool choice types (AllowedTools, Mcp, Custom, etc.) default to auto ChatCompletionToolChoiceOption::Auto } } } /// Convert Responses API `text.format` to Chat Completions `response_format`. fn convert_text_format(text: &ResponseTextParam) -> Option { match &text.format { TextResponseFormatConfiguration::Text => None, TextResponseFormatConfiguration::JsonObject => Some(ResponseFormat::JsonObject), TextResponseFormatConfiguration::JsonSchema(s) => Some(ResponseFormat::JsonSchema { json_schema: s.clone(), }), } } /// Convert Responses API `ServiceTier` to Chat Completions `ServiceTier`. /// These are structurally identical enums in different modules. fn convert_service_tier(tier: &ServiceTier) -> ChatServiceTier { match tier { ServiceTier::Auto => ChatServiceTier::Auto, ServiceTier::Default => ChatServiceTier::Default, ServiceTier::Flex => ChatServiceTier::Flex, ServiceTier::Scale => ChatServiceTier::Scale, ServiceTier::Priority => ChatServiceTier::Priority, } } impl TryFrom for NvCreateChatCompletionRequest { type Error = anyhow::Error; fn try_from(resp: NvCreateResponse) -> Result { let mut messages = Vec::new(); // Prepend instructions as system message if present if let Some(instructions) = &resp.inner.instructions { messages.push(ChatCompletionRequestMessage::System( ChatCompletionRequestSystemMessage { content: ChatCompletionRequestSystemMessageContent::Text(instructions.clone()), name: None, }, )); } // Convert input to messages match &resp.inner.input { InputParam::Text(text) => { messages.push(ChatCompletionRequestMessage::User( ChatCompletionRequestUserMessage { content: ChatCompletionRequestUserMessageContent::Text(text.clone()), name: None, }, )); } InputParam::Items(items) => { let item_messages = convert_input_items_to_messages(items)?; messages.extend(item_messages); } } let top_logprobs = convert_top_logprobs(resp.inner.top_logprobs); // Convert tools if present let tools = resp .inner .tools .as_ref() .map(|t| convert_tools(t)) .filter(|t: &Vec<_>| !t.is_empty()); // Convert tool_choice if present let tool_choice = resp.inner.tool_choice.as_ref().map(convert_tool_choice); // Determine stream setting: respect caller's preference, default to true for aggregation let stream = resp.inner.stream.or(Some(true)); // Map reasoning.effort to reasoning_effort let reasoning_effort = resp.inner.reasoning.as_ref().and_then(|r| r.effort.clone()); // Map text.format to response_format let response_format = resp.inner.text.as_ref().and_then(convert_text_format); // Map service_tier let service_tier = resp.inner.service_tier.as_ref().map(convert_service_tier); Ok(NvCreateChatCompletionRequest { inner: CreateChatCompletionRequest { messages, model: resp.inner.model.unwrap_or_default(), temperature: resp.inner.temperature, top_p: resp.inner.top_p, max_completion_tokens: resp.inner.max_output_tokens, store: resp.inner.store, parallel_tool_calls: resp.inner.parallel_tool_calls, top_logprobs, metadata: resp .inner .metadata .map(|m| serde_json::to_value(m).unwrap_or_default()), stream, tools, tool_choice, reasoning_effort, response_format, service_tier, ..Default::default() }, common: Default::default(), nvext: resp.nvext, chat_template_args: None, media_io_kwargs: None, unsupported_fields: Default::default(), }) } } fn convert_top_logprobs(input: Option) -> Option { input.map(|x| x.min(20)) } /// Parse `` blocks from model text output. /// Returns a list of (name, arguments_json) tuples. /// Returns an empty vec immediately if no `` tag is present. fn parse_tool_call_text(text: &str) -> Vec<(String, String)> { if !text.contains("") { return Vec::new(); } let mut results = Vec::new(); let mut search_start = 0; while let Some(start) = text[search_start..].find("") { let abs_start = search_start + start + "".len(); if let Some(end) = text[abs_start..].find("") { let block = text[abs_start..abs_start + end].trim(); if let Ok(parsed) = serde_json::from_str::(block) { let name = parsed .get("name") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let arguments = if let Some(args) = parsed.get("arguments") { if args.is_string() { args.as_str().unwrap_or("{}").to_string() } else { serde_json::to_string(args).unwrap_or_else(|_| "{}".to_string()) } } else { "{}".to_string() }; if !name.is_empty() { results.push((name, arguments)); } } search_start = abs_start + end + "".len(); } else { break; } } results } /// Strip `...` blocks and any `...` blocks from text. /// Returns the original string (no allocation) if no tags are present. fn strip_tool_call_text(text: &str) -> std::borrow::Cow<'_, str> { let has_tool = text.contains(""); let has_think = text.contains(""); if !has_tool && !has_think { return std::borrow::Cow::Borrowed(text); } fn strip_tag(input: &mut String, open: &str, close: &str) { while let Some(start) = input.find(open) { if let Some(end_offset) = input[start..].find(close) { input.replace_range(start..start + end_offset + close.len(), ""); } else { input.truncate(start); break; } } } let mut result = text.to_string(); if has_tool { strip_tag(&mut result, "", ""); } if has_think { strip_tag(&mut result, "", ""); } std::borrow::Cow::Owned(result) } // --------------------------------------------------------------------------- // Chat Completions -> Responses API response conversion // --------------------------------------------------------------------------- /// Request parameters to echo back in Response objects. /// Extracted from the incoming CreateResponse request so that /// response objects reflect actual request values. #[derive(Clone, Debug, Default)] pub struct ResponseParams { pub model: Option, pub temperature: Option, pub top_p: Option, pub max_output_tokens: Option, pub parallel_tool_calls: Option, pub store: Option, pub tools: Option>, pub tool_choice: Option, pub instructions: Option, pub reasoning: Option, pub text: Option, pub service_tier: Option, pub include: Option>, pub truncation: Option, /// OpenResponses spec requires these fields on the response body. Upstream /// `CreateResponse` doesn't model them on the request yet, so for now they /// pass through as `None`; the response serializer defaults to 0.0 (the /// effective sglang default). Wired through `ResponseParams` anyway so /// that when upstream relaxes or we shadow `CreateResponse`, threading a /// real value becomes a one-line change at the request-extraction site. pub presence_penalty: Option, pub frequency_penalty: Option, /// Pass-through metadata fields. Codex and other clients send these as /// hints for OpenAI's caching/moderation backends; Dynamo doesn't act on /// them, but the spec includes them on the response body so we echo back /// what the caller sent rather than silently dropping. Echoing makes /// receipt observable to the client without needing a real backend. pub prompt_cache_key: Option, pub prompt_cache_retention: Option, pub safety_identifier: Option, } /// Normalize tools so that `FunctionTool.strict` is always set. /// The upstream type uses `skip_serializing_if = "Option::is_none"` on `strict`, /// so `None` causes the field to be omitted during JSON serialization. /// Schema validators (Zod, etc.) expect `strict` to always be present. /// OpenAI defaults `strict` to `true`. pub(super) fn normalize_tools(tools: Vec) -> Vec { tools .into_iter() .map(|tool| match tool { Tool::Function(mut ft) => { if ft.strict.is_none() { ft.strict = Some(true); } Tool::Function(ft) } other => other, }) .collect() } /// Build an assistant text message output item. fn make_text_message(id: String, text: String) -> OutputItem { OutputItem::Message(OutputMessage { id, role: AssistantRole::Assistant, status: OutputStatus::Completed, phase: None, content: vec![OutputMessageContent::OutputText(OutputTextContent { text, annotations: vec![], logprobs: Some(vec![]), })], }) } /// Build a function call output item with generated IDs. fn make_function_call(name: String, arguments: String) -> OutputItem { OutputItem::FunctionCall(FunctionToolCall { arguments, call_id: format!("call_{}", Uuid::new_v4().simple()), namespace: None, name, id: Some(format!("fc_{}", Uuid::new_v4().simple())), status: Some(OutputStatus::Completed), }) } /// Convert a ChatCompletion response into a Responses API response object, /// echoing back the actual request parameters from `params`. pub fn chat_completion_to_response( nv_resp: NvCreateChatCompletionResponse, params: &ResponseParams, api_context: Option<&crate::protocols::unified::ResponsesContext>, ) -> Result { let nvext = nv_resp.nvext.clone(); let chat_resp = nv_resp.inner; let message_id = format!("msg_{}", Uuid::new_v4().simple()); let response_id = format!("resp_{}", Uuid::new_v4().simple()); let choice = chat_resp.choices.into_iter().next(); let mut output = Vec::new(); if let Some(choice) = choice { // Handle structured tool calls if let Some(tool_calls) = choice.message.tool_calls { for tc in &tool_calls { output.push(OutputItem::FunctionCall(FunctionToolCall { arguments: tc.function.arguments.clone(), call_id: tc.id.clone(), namespace: None, name: tc.function.name.clone(), id: Some(format!("fc_{}", Uuid::new_v4().simple())), status: Some(OutputStatus::Completed), })); } } // Map reasoning_content to a Reasoning output item if let Some(reasoning_text) = choice.message.reasoning_content && !reasoning_text.is_empty() { output.push(OutputItem::Reasoning(ReasoningItem { id: format!("rs_{}", Uuid::new_v4().simple()), summary: vec![SummaryPart::SummaryText(SummaryTextContent { text: reasoning_text, })], content: None, encrypted_content: None, status: Some(OutputStatus::Completed), })); } // Handle text content -- also parse blocks from models // that emit tool calls as text (e.g. Qwen3) let content_text = match choice.message.content { Some(dynamo_protocols::types::ChatCompletionMessageContent::Text(text)) => Some(text), Some(dynamo_protocols::types::ChatCompletionMessageContent::Parts(_)) => { tracing::warn!( "Multimodal content in responses API not yet supported, using placeholder" ); Some("[multimodal content]".to_string()) } None => None, }; if let Some(content_text) = content_text && !content_text.is_empty() { let parsed_calls = parse_tool_call_text(&content_text); if !parsed_calls.is_empty() { for (name, arguments) in parsed_calls { output.push(make_function_call(name, arguments)); } let remaining = strip_tool_call_text(&content_text); if !remaining.trim().is_empty() { output.push(make_text_message( message_id.clone(), remaining.into_owned(), )); } } else { output.push(make_text_message(message_id.clone(), content_text)); } } if output.is_empty() { output.push(make_text_message(message_id, String::new())); } } else { tracing::warn!("No choices in chat completion response, using empty content"); output.push(make_text_message(message_id, String::new())); } // Apply `include` filtering: strip logprobs from output text unless // the caller explicitly requested them via `message.output_text.logprobs`. let keep_logprobs = params .include .as_ref() .is_some_and(|inc| inc.contains(&IncludeEnum::MessageOutputTextLogprobs)); for item in &mut output { if let OutputItem::Message(msg) = item { for content in &mut msg.content { if let OutputMessageContent::OutputText(text) = content && (!keep_logprobs || text.logprobs.is_none()) { text.logprobs = Some(Vec::new()); } } } } let created_at = chat_resp.created as u64; let response = Response { id: response_id, object: "response".to_string(), created_at, completed_at: Some(created_at), model: if chat_resp.model == "unknown" { params.model.clone().unwrap_or(chat_resp.model) } else { chat_resp.model }, status: Status::Completed, output, // Spec-required defaults (OpenResponses requires these as non-null) background: Some(false), metadata: Some(HashMap::new()), parallel_tool_calls: params.parallel_tool_calls.or(Some(true)), temperature: params.temperature.or(Some(1.0)), text: Some(params.text.clone().unwrap_or(ResponseTextParam { format: TextResponseFormatConfiguration::Text, verbosity: None, })), tool_choice: params .tool_choice .clone() .or(Some(ToolChoiceParam::Mode(ToolChoiceOptions::Auto))), tools: Some( params .tools .clone() .map(normalize_tools) .unwrap_or_default(), ), top_p: params.top_p.or(Some(1.0)), truncation: Some(params.truncation.unwrap_or(Truncation::Disabled)), // Nullable but required to be present (null is valid) billing: None, conversation: None, error: None, incomplete_details: None, instructions: params.instructions.clone().map(Instructions::Text), max_output_tokens: params.max_output_tokens, previous_response_id: api_context.and_then(|ctx| ctx.previous_response_id.clone()), prompt: None, prompt_cache_key: params.prompt_cache_key.clone(), prompt_cache_retention: params.prompt_cache_retention, reasoning: params.reasoning.clone(), safety_identifier: params.safety_identifier.clone(), service_tier: Some(params.service_tier.unwrap_or(ServiceTier::Auto)), top_logprobs: Some(0), usage: chat_resp.usage.map(|u| ResponseUsage { input_tokens: u.prompt_tokens, input_tokens_details: InputTokenDetails { cached_tokens: u .prompt_tokens_details .map(|d| d.cached_tokens.unwrap_or(0)) .unwrap_or(0), }, output_tokens: u.completion_tokens, output_tokens_details: OutputTokenDetails { reasoning_tokens: u .completion_tokens_details .map(|d| d.reasoning_tokens.unwrap_or(0)) .unwrap_or(0), }, total_tokens: u.total_tokens, }), }; Ok(NvResponse { inner: response, nvext, presence_penalty: params.presence_penalty.unwrap_or(0.0), frequency_penalty: params.frequency_penalty.unwrap_or(0.0), store: params.store.unwrap_or(false), }) } #[cfg(test)] mod tests { use dynamo_protocols::types::responses::{ CreateResponse, FunctionCallOutput, FunctionCallOutputItemParam, FunctionTool, FunctionToolCall, InputContent, InputImageContent, InputItem, InputMessage, InputOutputMessage, InputOutputMessageContent, InputOutputTextContent, InputParam, InputRole, InputTextContent, Item, MessageItem, Tool, }; use dynamo_protocols::types::{ ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent, }; use super::*; use crate::types::openai::chat_completions::NvCreateChatCompletionResponse; fn make_response_with_input(text: &str) -> NvCreateResponse { NvCreateResponse { inner: CreateResponse { input: InputParam::Text(text.into()), model: Some("test-model".into()), max_output_tokens: Some(1024), temperature: Some(0.5), top_p: Some(0.9), top_logprobs: Some(15), ..Default::default() }, nvext: Some(NvExt { annotations: Some(vec!["debug".into(), "trace".into()]), ..Default::default() }), } } #[test] fn test_annotations_trait_behavior() { let req = make_response_with_input("hello"); assert_eq!( req.annotations(), Some(vec!["debug".to_string(), "trace".to_string()]) ); assert!(req.has_annotation("debug")); assert!(req.has_annotation("trace")); assert!(!req.has_annotation("missing")); } #[test] fn test_openai_sampling_trait_behavior() { let req = make_response_with_input("hello"); assert_eq!(req.get_temperature(), Some(0.5)); assert_eq!(req.get_top_p(), Some(0.9)); assert_eq!(req.get_frequency_penalty(), None); assert_eq!(req.get_presence_penalty(), None); } #[test] fn test_openai_stop_conditions_trait_behavior() { let req = make_response_with_input("hello"); assert_eq!(req.get_max_tokens(), Some(1024)); assert_eq!(req.get_min_tokens(), None); assert_eq!(req.get_stop(), None); } #[test] fn test_into_nvcreate_chat_completion_request() { let nv_req: NvCreateChatCompletionRequest = make_response_with_input("hi there").try_into().unwrap(); assert_eq!(nv_req.inner.model, "test-model"); assert_eq!(nv_req.inner.temperature, Some(0.5)); assert_eq!(nv_req.inner.top_p, Some(0.9)); assert_eq!(nv_req.inner.max_completion_tokens, Some(1024)); assert_eq!(nv_req.inner.top_logprobs, Some(15)); assert_eq!(nv_req.inner.stream, Some(true)); let messages = &nv_req.inner.messages; assert_eq!(messages.len(), 1); match &messages[0] { ChatCompletionRequestMessage::User(user_msg) => match &user_msg.content { ChatCompletionRequestUserMessageContent::Text(t) => { assert_eq!(t, "hi there"); } _ => panic!("unexpected user content type"), }, _ => panic!("expected user message"), } } #[test] fn test_store_mapped_to_chat_completion_request() { let mut req = make_response_with_input("audit me"); req.inner.store = Some(true); let nv_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(nv_req.inner.store, Some(true)); } #[test] fn test_instructions_prepended_as_system_message() { let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Text("hello".into()), model: Some("test-model".into()), instructions: Some("You are a helpful assistant.".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 2); match &messages[0] { ChatCompletionRequestMessage::System(sys) => match &sys.content { ChatCompletionRequestSystemMessageContent::Text(t) => { assert_eq!(t, "You are a helpful assistant."); } _ => panic!("expected text content"), }, _ => panic!("expected system message first"), } } #[test] fn test_input_items_multi_turn() { let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "Be concise.".into(), })], role: InputRole::System, status: None, }))), InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "What is 2+2?".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: Some("msg_1".into()), role: AssistantRole::Assistant, status: Some(OutputStatus::Completed), phase: None, content: vec![InputOutputMessageContent::OutputText( InputOutputTextContent { text: "4".into(), annotations: vec![], logprobs: None, }, )], }))), InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "And 3+3?".into(), })], role: InputRole::User, status: None, }))), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 4); assert!(matches!( messages[0], ChatCompletionRequestMessage::System(_) )); assert!(matches!(messages[1], ChatCompletionRequestMessage::User(_))); assert!(matches!( messages[2], ChatCompletionRequestMessage::Assistant(_) )); assert!(matches!(messages[3], ChatCompletionRequestMessage::User(_))); } #[test] fn test_input_items_with_image() { let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![InputItem::Item(Item::Message(MessageItem::Input( InputMessage { content: vec![ InputContent::InputText(InputTextContent { text: "What is in this image?".into(), }), InputContent::InputImage(InputImageContent { detail: Default::default(), // ImageDetail::Auto file_id: None, image_url: Some("https://example.com/cat.jpg".into()), }), ], role: InputRole::User, status: None, }, )))]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 1); match &messages[0] { ChatCompletionRequestMessage::User(u) => match &u.content { ChatCompletionRequestUserMessageContent::Array(parts) => { assert_eq!(parts.len(), 2); } _ => panic!("expected array content"), }, _ => panic!("expected user message"), } } #[test] fn test_function_call_input_items() { let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "What's the weather?".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: r#"{"location":"SF"}"#.into(), call_id: "call_123".into(), namespace: None, name: "get_weather".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "call_123".into(), output: FunctionCallOutput::Text(r#"{"temp":"72F"}"#.into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_))); assert!(matches!( messages[1], ChatCompletionRequestMessage::Assistant(_) )); assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_))); } #[test] fn test_function_call_with_interstitial_assistant_message_is_coalesced() { // Regression: prior turn was `function_call` + assistant text + `function_call_output`. // The converter must emit a SINGLE assistant chat message carrying both `content` // and `tool_calls`, otherwise chat templates that require a tool message to // immediately follow its assistant tool_call (e.g. MiniMax) will reject the input. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "What's the weather?".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: r#"{"location":"SF"}"#.into(), call_id: "call_123".into(), namespace: None, name: "get_weather".into(), id: None, status: None, })), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: Some("msg_interstitial".into()), role: AssistantRole::Assistant, status: Some(OutputStatus::Completed), phase: None, content: vec![InputOutputMessageContent::OutputText( InputOutputTextContent { text: "\n\n".into(), annotations: vec![], logprobs: None, }, )], }))), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "call_123".into(), output: FunctionCallOutput::Text(r#"{"temp":"72F"}"#.into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!( messages.len(), 3, "expected coalesced [user, assistant, tool]" ); assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_))); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { let tool_calls = a.tool_calls.as_ref().expect("tool_calls must be present"); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, "call_123"); assert_eq!(tool_calls[0].function.name, "get_weather"); match a .content .as_ref() .expect("content must carry interstitial text") { ChatCompletionRequestAssistantMessageContent::Text(t) => { assert_eq!(t, "\n\n"); } _ => panic!("expected text content"), } } _ => panic!("expected a single merged assistant message at index 1"), } assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_))); } #[test] fn test_easy_message_assistant_coalesced_with_adjacent_function_call() { // The same coalescing rule applies to EasyInputMessage shape (string content, // role=assistant, no `type:"message"` discriminator). use dynamo_protocols::types::responses::{ EasyInputContent, EasyInputMessage, Role as ResponseRole, }; let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::EasyMessage(EasyInputMessage { role: ResponseRole::User, content: EasyInputContent::Text("x".into()), ..Default::default() }), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::EasyMessage(EasyInputMessage { role: ResponseRole::Assistant, content: EasyInputContent::Text("".into()), ..Default::default() }), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c".into(), output: FunctionCallOutput::Text("x".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { assert!(a.tool_calls.is_some()); assert_eq!(a.tool_calls.as_ref().unwrap().len(), 1); } _ => panic!("expected merged assistant message"), } assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_))); } #[test] fn test_standalone_assistant_message_with_empty_content_preserves_turn() { // A prior assistant turn that produced no text (empty content or // refusal-only parts the converter strips) must still emit an assistant // message. Otherwise adjacent user turns get silently merged, which // breaks strict-alternation chat templates and distorts the context // the model sees. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "first question".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: None, role: AssistantRole::Assistant, status: None, phase: None, content: vec![InputOutputMessageContent::OutputText( InputOutputTextContent { text: "".into(), annotations: vec![], logprobs: None, }, )], }))), InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "second question".into(), })], role: InputRole::User, status: None, }))), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!( messages.len(), 3, "empty assistant turn must not be silently dropped" ); assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_))); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { assert!(a.tool_calls.is_none()); match a.content.as_ref().expect("empty turn still emits content") { ChatCompletionRequestAssistantMessageContent::Text(t) => { assert_eq!(t, ""); } _ => panic!("expected text content"), } } _ => panic!("expected assistant turn boundary preserved"), } assert!(matches!(messages[2], ChatCompletionRequestMessage::User(_))); } #[test] fn test_easy_assistant_message_with_empty_content_preserves_turn() { // Same turn-boundary preservation applies to EasyInputMessage shape. use dynamo_protocols::types::responses::{ EasyInputContent, EasyInputMessage, Role as ResponseRole, }; let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::EasyMessage(EasyInputMessage { role: ResponseRole::User, content: EasyInputContent::Text("first".into()), ..Default::default() }), InputItem::EasyMessage(EasyInputMessage { role: ResponseRole::Assistant, content: EasyInputContent::Text("".into()), ..Default::default() }), InputItem::EasyMessage(EasyInputMessage { role: ResponseRole::User, content: EasyInputContent::Text("second".into()), ..Default::default() }), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); assert!(matches!( messages[1], ChatCompletionRequestMessage::Assistant(_) )); } #[test] fn test_pure_function_call_turn_emits_null_content() { // Chat Completions spec allows `content: null` on assistant messages // that carry only `tool_calls`. Some Jinja templates gate on // `{% if message.content is not none %}`; we must not emit // `content: ""` for pure-tool-call turns. Turn-boundary cases (empty // OutputMessage with no tool_calls) still emit `Some(Text(""))`. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "hi".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c".into(), output: FunctionCallOutput::Text("ok".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { assert!( a.content.is_none(), "pure tool-call turn must have content: null, got {:?}", a.content ); assert!(a.tool_calls.is_some()); } _ => panic!("expected assistant message"), } } #[test] fn test_reasoning_item_routed_into_reasoning_content() { // Regression: Codex / Agents SDK round-trip Item::Reasoning mid-turn. // The converter must route the reasoning summary into the coalesced // assistant message's `reasoning_content`, not silently drop it. use dynamo_protocols::types::responses::{ReasoningItem, SummaryPart, SummaryTextContent}; let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "solve".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::Reasoning(ReasoningItem { id: "rs_1".into(), summary: vec![SummaryPart::SummaryText(SummaryTextContent { text: "thinking step 1".into(), })], content: None, encrypted_content: None, status: None, })), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c".into(), output: FunctionCallOutput::Text("ok".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { match a .reasoning_content .as_ref() .expect("reasoning must be preserved") { ReasoningContent::Text(t) => assert_eq!(t, "thinking step 1"), _ => panic!("expected Text reasoning content"), } assert!(a.tool_calls.is_some()); } _ => panic!("expected assistant message with reasoning + tool_calls"), } } #[test] fn test_unsupported_item_variant_flushes_pending() { // Sequence: function_call → (an unsupported tool-output variant) → // function_call → function_call_output. Without a flush on the // catch-all, the two FunctionCalls would coalesce into a single // assistant `tool_calls` list despite being different semantic turns. use dynamo_protocols::types::responses::{ ComputerCallOutputItemParam, ComputerScreenshotImage, ComputerScreenshotImageType, }; let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "go".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c1".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::Item(Item::ComputerCallOutput(ComputerCallOutputItemParam { call_id: "cc1".into(), output: ComputerScreenshotImage { r#type: ComputerScreenshotImageType::ComputerScreenshot, image_url: None, file_id: None, }, acknowledged_safety_checks: None, id: None, status: None, })), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c2".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c2".into(), output: FunctionCallOutput::Text("ok".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; // Expected: User, Assistant(tc=[c1]), Assistant(tc=[c2]), Tool(c2) // Without the catch-all flush, we'd get Assistant(tc=[c1,c2]) instead. assert!(messages.len() >= 4, "catch-all must flush pending"); let tc_msgs: Vec<_> = messages .iter() .filter_map(|m| match m { ChatCompletionRequestMessage::Assistant(a) => a.tool_calls.as_ref(), _ => None, }) .collect(); assert_eq!( tc_msgs.len(), 2, "two tool-call turns must not coalesce across unsupported variant" ); assert_eq!(tc_msgs[0].len(), 1); assert_eq!(tc_msgs[0][0].id, "c1"); assert_eq!(tc_msgs[1].len(), 1); assert_eq!(tc_msgs[1][0].id, "c2"); } #[test] fn test_function_call_then_output_text_then_output_merges_to_one_turn() { // Canonical MiniMax repro (the Codex/Agents-SDK sequence that first // broke): user → function_call → assistant text → function_call_output. // Must yield 3 chat messages: user, assistant(content + tool_calls), // tool. Any other shape breaks the chat template's tool-call pairing. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "call say".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: r#"{"x":"hi"}"#.into(), call_id: "c".into(), namespace: None, name: "say".into(), id: None, status: None, })), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: None, role: AssistantRole::Assistant, status: None, phase: None, content: vec![InputOutputMessageContent::OutputText( InputOutputTextContent { text: "\n\n\n".into(), annotations: vec![], logprobs: None, }, )], }))), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c".into(), output: FunctionCallOutput::Text("hi".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_))); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { let tool_calls = a.tool_calls.as_ref().expect("tool_calls present"); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, "c"); match a.content.as_ref().expect("text content present") { ChatCompletionRequestAssistantMessageContent::Text(t) => { assert_eq!(t, "\n\n\n"); } _ => panic!("expected text content"), } } _ => panic!("expected merged assistant message"), } assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_))); } #[test] fn test_output_text_then_function_call_then_output_merges_to_one_turn() { // Reverse ordering: assistant text before the function_call. The // coalescer's accumulator is order-agnostic — both orderings must // produce the same merged assistant message. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "call say".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: None, role: AssistantRole::Assistant, status: None, phase: None, content: vec![InputOutputMessageContent::OutputText( InputOutputTextContent { text: "let me call it".into(), annotations: vec![], logprobs: None, }, )], }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: r#"{"x":"hi"}"#.into(), call_id: "c".into(), namespace: None, name: "say".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c".into(), output: FunctionCallOutput::Text("hi".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { assert_eq!(a.tool_calls.as_ref().expect("tool_calls present").len(), 1); match a.content.as_ref().expect("content present") { ChatCompletionRequestAssistantMessageContent::Text(t) => { assert_eq!(t, "let me call it"); } _ => panic!("expected text content"), } } _ => panic!("expected merged assistant message"), } } #[test] fn test_multiple_function_calls_merge_into_single_assistant_message() { // Parallel tool calls (`parallel_tool_calls: true`) produce multiple // adjacent Item::FunctionCall items. They must coalesce into a single // assistant message carrying all tool_calls. let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "do two things".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c1".into(), namespace: None, name: "f".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCall(FunctionToolCall { arguments: "{}".into(), call_id: "c2".into(), namespace: None, name: "g".into(), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c1".into(), output: FunctionCallOutput::Text("r1".into()), id: None, status: None, })), InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam { call_id: "c2".into(), output: FunctionCallOutput::Text("r2".into()), id: None, status: None, })), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; // user, assistant(tc=[c1, c2]), tool(c1), tool(c2) assert_eq!(messages.len(), 4); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { let tool_calls = a.tool_calls.as_ref().expect("tool_calls present"); assert_eq!(tool_calls.len(), 2, "parallel tool_calls must coalesce"); assert_eq!(tool_calls[0].id, "c1"); assert_eq!(tool_calls[1].id, "c2"); assert!(a.content.is_none(), "pure-tool-call turn has null content"); } _ => panic!("expected single merged assistant message"), } assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_))); assert!(matches!(messages[3], ChatCompletionRequestMessage::Tool(_))); } #[test] fn test_refusal_content_folded_into_assistant_text() { // Refusal parts in a prior assistant turn must survive to the next // turn. We fold refusal text into the assistant's `content` so // templates render it identically to normal content; otherwise the // model loses visibility into what it previously refused. use dynamo_protocols::types::responses::RefusalContent; let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Items(vec![ InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "try again".into(), })], role: InputRole::User, status: None, }))), InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage { id: None, role: AssistantRole::Assistant, status: None, phase: None, content: vec![InputOutputMessageContent::Refusal(RefusalContent { refusal: "I cannot help with that.".into(), })], }))), InputItem::Item(Item::Message(MessageItem::Input(InputMessage { content: vec![InputContent::InputText(InputTextContent { text: "ok different question".into(), })], role: InputRole::User, status: None, }))), ]), model: Some("test-model".into()), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); let messages = &chat_req.inner.messages; assert_eq!(messages.len(), 3); match &messages[1] { ChatCompletionRequestMessage::Assistant(a) => { match a.content.as_ref().expect("refusal folded into content") { ChatCompletionRequestAssistantMessageContent::Text(t) => { assert_eq!(t, "I cannot help with that."); } _ => panic!("expected text content"), } } _ => panic!("expected assistant message carrying folded refusal"), } } #[test] fn test_tools_conversion() { let req = NvCreateResponse { inner: CreateResponse { input: InputParam::Text("hello".into()), model: Some("test-model".into()), tools: Some(vec![Tool::Function(FunctionTool { name: "get_weather".into(), parameters: Some(serde_json::json!({ "type": "object", "properties": { "location": {"type": "string"} }, "required": ["location"] })), strict: Some(true), description: Some("Get weather info".into()), defer_loading: None, })]), ..Default::default() }, nvext: None, }; let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert!(chat_req.inner.tools.is_some()); let tools = chat_req.inner.tools.unwrap(); assert_eq!(tools.len(), 1); assert_eq!(tools[0].function.name, "get_weather"); } #[allow(deprecated)] #[test] fn test_into_nvresponse_from_chat_response() { let now = 1_726_000_000; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { id: "chatcmpl-xyz".into(), choices: vec![dynamo_protocols::types::ChatChoice { index: 0, message: dynamo_protocols::types::ChatCompletionResponseMessage { content: Some(dynamo_protocols::types::ChatCompletionMessageContent::Text( "This is a reply".to_string(), )), refusal: None, tool_calls: None, role: dynamo_protocols::types::Role::Assistant, function_call: None, audio: None, reasoning_content: None, }, finish_reason: None, stop_reason: None, logprobs: None, }], created: now, model: "llama-3.1-8b-instruct".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".to_string(), usage: None, }, nvext: None, }; let wrapped = chat_completion_to_response(chat_resp, &ResponseParams::default(), None).unwrap(); assert_eq!(wrapped.inner.model, "llama-3.1-8b-instruct"); assert_eq!(wrapped.inner.status, Status::Completed); assert_eq!(wrapped.inner.object, "response"); assert!(wrapped.inner.id.starts_with("resp_")); let msg = match &wrapped.inner.output[0] { OutputItem::Message(m) => m, _ => panic!("Expected Message variant"), }; assert_eq!(msg.role, AssistantRole::Assistant); match &msg.content[0] { OutputMessageContent::OutputText(txt) => { assert_eq!(txt.text, "This is a reply"); } _ => panic!("Expected OutputText content"), } } #[allow(deprecated)] #[test] fn test_response_with_tool_calls() { let now = 1_726_000_000; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { id: "chatcmpl-xyz".into(), choices: vec![dynamo_protocols::types::ChatChoice { index: 0, message: dynamo_protocols::types::ChatCompletionResponseMessage { content: None, refusal: None, tool_calls: Some(vec![ChatCompletionMessageToolCall { id: "call_abc".into(), r#type: FunctionType::Function, function: dynamo_protocols::types::FunctionCall { name: "get_weather".into(), arguments: r#"{"location":"SF"}"#.into(), }, }]), role: dynamo_protocols::types::Role::Assistant, function_call: None, audio: None, reasoning_content: None, }, finish_reason: None, stop_reason: None, logprobs: None, }], created: now, model: "test-model".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".to_string(), usage: None, }, nvext: None, }; let wrapped = chat_completion_to_response(chat_resp, &ResponseParams::default(), None).unwrap(); assert_eq!(wrapped.inner.output.len(), 1); match &wrapped.inner.output[0] { OutputItem::FunctionCall(fc) => { assert_eq!(fc.call_id, "call_abc"); assert_eq!(fc.name, "get_weather"); } _ => panic!("Expected FunctionCall output"), } } #[test] fn test_convert_top_logprobs_clamped() { assert_eq!(convert_top_logprobs(Some(5)), Some(5)); assert_eq!(convert_top_logprobs(Some(21)), Some(20)); assert_eq!(convert_top_logprobs(Some(255)), Some(20)); assert_eq!(convert_top_logprobs(None), None); } #[test] fn test_parse_tool_call_text() { // Standard Qwen3 format let text = r#" Let me check the weather. {"name": "get_weather", "arguments": {"location": "San Francisco"}} "#; let calls = parse_tool_call_text(text); assert_eq!(calls.len(), 1); assert_eq!(calls[0].0, "get_weather"); let args: serde_json::Value = serde_json::from_str(&calls[0].1).unwrap(); assert_eq!(args["location"], "San Francisco"); } #[test] fn test_parse_tool_call_text_multiple() { let text = r#" {"name": "func_a", "arguments": {"x": 1}} {"name": "func_b", "arguments": {"y": 2}} "#; let calls = parse_tool_call_text(text); assert_eq!(calls.len(), 2); assert_eq!(calls[0].0, "func_a"); assert_eq!(calls[1].0, "func_b"); } #[test] fn test_parse_tool_call_text_no_calls() { let text = "Just a regular message with no tool calls."; let calls = parse_tool_call_text(text); assert!(calls.is_empty()); } #[test] fn test_strip_tool_call_text() { let text = r#" thinking {"name": "f", "arguments": {}} "#; let stripped = strip_tool_call_text(text); assert!(!stripped.contains("")); assert!(!stripped.contains("")); } // ── PR1: reasoning / text.format / service_tier pass-through tests ── #[test] fn test_reasoning_effort_mapped_to_chat_completion() { use dynamo_protocols::types::ReasoningEffort; use dynamo_protocols::types::responses::Reasoning; let mut req = make_response_with_input("think hard"); req.inner.reasoning = Some(Reasoning { effort: Some(ReasoningEffort::Medium), ..Default::default() }); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.reasoning_effort, Some(ReasoningEffort::Medium)); } #[test] fn test_reasoning_none_leaves_chat_field_none() { let req = make_response_with_input("no reasoning"); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.reasoning_effort, None); } #[test] fn test_text_format_json_object_mapped() { use dynamo_protocols::types::ResponseFormat; use dynamo_protocols::types::responses::{ ResponseTextParam, TextResponseFormatConfiguration, }; let mut req = make_response_with_input("give json"); req.inner.text = Some(ResponseTextParam { format: TextResponseFormatConfiguration::JsonObject, verbosity: None, }); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.response_format, Some(ResponseFormat::JsonObject)); } #[test] fn test_text_format_json_schema_mapped() { use dynamo_protocols::types::responses::{ ResponseTextParam, TextResponseFormatConfiguration, }; use dynamo_protocols::types::{ResponseFormat, ResponseFormatJsonSchema}; let schema = ResponseFormatJsonSchema { name: "city".into(), description: None, schema: Some(serde_json::json!({"type": "object"})), strict: Some(true), }; let mut req = make_response_with_input("structured"); req.inner.text = Some(ResponseTextParam { format: TextResponseFormatConfiguration::JsonSchema(schema.clone()), verbosity: None, }); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!( chat.inner.response_format, Some(ResponseFormat::JsonSchema { json_schema: schema }) ); } #[test] fn test_text_format_plain_text_leaves_response_format_none() { use dynamo_protocols::types::responses::{ ResponseTextParam, TextResponseFormatConfiguration, }; let mut req = make_response_with_input("plain"); req.inner.text = Some(ResponseTextParam { format: TextResponseFormatConfiguration::Text, verbosity: None, }); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.response_format, None); } #[test] fn test_service_tier_mapped_to_chat_completion() { use dynamo_protocols::types::ServiceTier as ChatServiceTier; use dynamo_protocols::types::responses::ServiceTier as RespServiceTier; let mut req = make_response_with_input("priority"); req.inner.service_tier = Some(RespServiceTier::Priority); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.service_tier, Some(ChatServiceTier::Priority)); } #[test] fn test_parallel_tool_calls_mapped_to_chat_completion() { let mut req = make_response_with_input("parallel tools off"); req.inner.parallel_tool_calls = Some(false); let chat: NvCreateChatCompletionRequest = req.try_into().unwrap(); assert_eq!(chat.inner.parallel_tool_calls, Some(false)); } #[test] fn test_response_echoes_reasoning() { use dynamo_protocols::types::ReasoningEffort; use dynamo_protocols::types::responses::Reasoning; let params = ResponseParams { reasoning: Some(Reasoning { effort: Some(ReasoningEffort::High), ..Default::default() }), ..Default::default() }; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { choices: vec![], created: 0, id: "test".into(), model: "m".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".into(), usage: None, }, nvext: None, }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); let reasoning = resp.inner.reasoning.unwrap(); assert_eq!(reasoning.effort, Some(ReasoningEffort::High)); } #[test] fn test_response_echoes_text_format() { use dynamo_protocols::types::responses::{ ResponseTextParam, TextResponseFormatConfiguration, }; let params = ResponseParams { text: Some(ResponseTextParam { format: TextResponseFormatConfiguration::JsonObject, verbosity: None, }), ..Default::default() }; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { choices: vec![], created: 0, id: "test".into(), model: "m".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".into(), usage: None, }, nvext: None, }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); let text = resp.inner.text.unwrap(); assert_eq!(text.format, TextResponseFormatConfiguration::JsonObject); } #[test] fn test_response_echoes_service_tier() { use dynamo_protocols::types::responses::ServiceTier; let params = ResponseParams { service_tier: Some(ServiceTier::Flex), ..Default::default() }; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { choices: vec![], created: 0, id: "test".into(), model: "m".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".into(), usage: None, }, nvext: None, }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); assert_eq!(resp.inner.service_tier, Some(ServiceTier::Flex)); } #[test] fn test_response_echoes_parallel_tool_calls() { let params = ResponseParams { parallel_tool_calls: Some(false), ..Default::default() }; let chat_resp = NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { choices: vec![], created: 0, id: "test".into(), model: "m".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".into(), usage: None, }, nvext: None, }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); assert_eq!(resp.inner.parallel_tool_calls, Some(false)); } #[test] fn test_bare_assistant_output_message_deserializes_via_owned_types() { // Regression: upstream async-openai's OutputMessage required `id` and // `status`. Dynamo-owned types make them optional so real-world client // shapes (no id/status, no annotations) round-trip successfully. let json = serde_json::json!({ "role": "assistant", "content": [{"type": "output_text", "text": "Hello!"}], "type": "message" }); let item: InputItem = serde_json::from_value(json).expect("relaxed deserialize should succeed"); match item { InputItem::Item(Item::Message(MessageItem::Output(msg))) => { assert_eq!(msg.role, AssistantRole::Assistant); assert!(msg.id.is_none()); assert!(msg.status.is_none()); } other => panic!("Expected Item::Message(Output), got {:?}", other), } } #[test] fn test_nvcreate_response_accepts_bare_assistant_messages() { // End-to-end: a real Codex-style payload with an interstitial assistant // text item (no id/status/annotations) deserializes into NvCreateResponse // via the standard derive on our Dynamo-owned CreateResponse chain. let body = serde_json::json!({ "model": "m", "input": [ {"type": "message", "role": "user", "content": [ {"type": "input_text", "text": "hi"} ]}, {"type": "function_call", "call_id": "c", "name": "f", "arguments": "{}"}, {"type": "message", "role": "assistant", "content": [ {"type": "output_text", "text": "\n\n\n"} ]}, {"type": "function_call_output", "call_id": "c", "output": "x"} ] }); let req: NvCreateResponse = serde_json::from_value(body).expect("relaxed deserialize should succeed"); let items = match &req.inner.input { InputParam::Items(items) => items, _ => panic!("expected Items input"), }; assert_eq!(items.len(), 4); match &items[2] { InputItem::Item(Item::Message(MessageItem::Output(out))) => { assert_eq!(out.role, AssistantRole::Assistant); } other => panic!("expected MessageItem::Output, got {:?}", other), } } #[test] fn test_output_message_with_id_and_status_still_works() { use dynamo_protocols::types::responses::{InputItem, Item, MessageItem, OutputStatus}; let json = serde_json::json!({ "role": "assistant", "id": "msg_abc123", "status": "completed", "content": [{"type": "output_text", "text": "Hello!", "annotations": []}], "type": "message" }); let item: InputItem = serde_json::from_value(json).unwrap(); match item { InputItem::Item(Item::Message(MessageItem::Output(msg))) => { assert_eq!(msg.id.as_deref(), Some("msg_abc123")); assert_eq!(msg.status, Some(OutputStatus::Completed)); } other => panic!("Expected Item::Message(Output), got {:?}", other), } } // ── PR2: include filtering + truncation echo-back tests ── fn make_chat_resp_with_text(text: &str) -> NvCreateChatCompletionResponse { use dynamo_protocols::types::{ ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason, }; NvCreateChatCompletionResponse { inner: dynamo_protocols::types::CreateChatCompletionResponse { choices: vec![ChatChoice { index: 0, #[allow(deprecated)] message: ChatCompletionResponseMessage { content: Some(ChatCompletionMessageContent::Text(text.into())), role: dynamo_protocols::types::Role::Assistant, tool_calls: None, refusal: None, reasoning_content: None, function_call: None, audio: None, }, finish_reason: Some(FinishReason::Stop), stop_reason: None, logprobs: None, }], created: 0, id: "test".into(), model: "m".into(), service_tier: None, system_fingerprint: None, object: "chat.completion".into(), usage: None, }, nvext: None, } } #[test] fn test_include_logprobs_empty_by_default() { // OpenResponses schema requires `logprobs` to be an array. When the // caller did not request them via `include`, emit an empty array // rather than null. let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams::default(); let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); for item in &resp.inner.output { if let OutputItem::Message(msg) = item { for content in &msg.content { if let OutputMessageContent::OutputText(t) = content { assert_eq!( t.logprobs.as_deref(), Some(&[][..]), "logprobs should be an empty array by default" ); } } } } } #[test] fn test_include_logprobs_kept_when_requested() { use dynamo_protocols::types::responses::IncludeEnum; let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams { include: Some(vec![IncludeEnum::MessageOutputTextLogprobs]), ..Default::default() }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); let mut found_text = false; for item in &resp.inner.output { if let OutputItem::Message(msg) = item { for content in &msg.content { if let OutputMessageContent::OutputText(t) = content { found_text = true; assert!( t.logprobs.is_some(), "logprobs should be preserved when included" ); } } } } assert!(found_text, "Expected text output"); } #[test] fn test_truncation_auto_echoed_back() { use dynamo_protocols::types::responses::Truncation; let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams { truncation: Some(Truncation::Auto), ..Default::default() }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); assert_eq!(resp.inner.truncation, Some(Truncation::Auto)); } #[test] fn test_truncation_defaults_to_disabled() { let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams::default(); let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); assert_eq!(resp.inner.truncation, Some(Truncation::Disabled)); } /// Pass-through metadata fields the OpenResponses spec includes on the /// response body. Codex sends `prompt_cache_key` on every request; we /// echo it back so the caller can confirm receipt without enforcing any /// caching semantics. Same pattern for `prompt_cache_retention` and /// `safety_identifier`. #[test] fn test_response_echoes_passthrough_metadata() { let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams { prompt_cache_key: Some("cache-key-codex-1".into()), prompt_cache_retention: Some(PromptCacheRetention::InMemory), safety_identifier: Some("user-abc".into()), ..Default::default() }; let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); assert_eq!( resp.inner.prompt_cache_key.as_deref(), Some("cache-key-codex-1") ); assert_eq!( resp.inner.prompt_cache_retention, Some(PromptCacheRetention::InMemory) ); assert_eq!(resp.inner.safety_identifier.as_deref(), Some("user-abc")); } /// Validate the JSON wire shape of NvResponse matches the OpenResponses /// spec: required scalars always present, nullable-required fields /// emitted as `null` when None. #[test] fn test_response_wire_format_shape() { let chat_resp = make_chat_resp_with_text("hello"); let params = ResponseParams::default(); let resp = chat_completion_to_response(chat_resp, ¶ms, None).unwrap(); let json = serde_json::to_value(&resp).unwrap(); // Required scalars the spec mandates on every response. Upstream // async-openai's Response struct doesn't model these; NvResponse's // custom serializer injects them. assert_eq!(json["frequency_penalty"], 0.0); assert_eq!(json["presence_penalty"], 0.0); assert_eq!(json["store"], false); // Other required fields with expected values assert_eq!(json["object"], "response"); assert_eq!(json["status"], "completed"); assert_eq!(json["metadata"], serde_json::json!({})); assert!(json["output"].is_array()); assert!(json["output"][0].get("id").is_some()); assert!(json["output"][0].get("status").is_some()); // Nullable-required fields must be present as null (not missing). for key in [ "error", "incomplete_details", "billing", "conversation", "safety_identifier", "max_tool_calls", "instructions", "previous_response_id", "prompt_cache_key", "reasoning", ] { assert_eq!( json.get(key), Some(&serde_json::Value::Null), "expected {key} to be present as null" ); } // nvext should be omitted when None assert!(json.get("nvext").is_none()); } }