Unverified Commit c12fe501 authored by nachiketb-nvidia's avatar nachiketb-nvidia Committed by GitHub
Browse files

chore: remove flatten for chat response types, add reasoning_content (#2543)

Changing the chat completions response objects from structs to types of dynamo_async_openai

Implement aggregator traits for them chat completion structs

add reasoning_content under message and delta message in lib/async-openai
parent a0ddcbce
......@@ -50,6 +50,7 @@ jobs:
# Set GITHUB_TOKEN to avoid github rate limits on URL checks
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
cd docs
set -euo pipefail
# Run lychee against all files in repo
lychee \
......
......@@ -449,6 +449,9 @@ pub struct ChatCompletionResponseMessage {
/// If the audio output modality is requested, this object contains data about the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio).
#[serde(skip_serializing_if = "Option::is_none")]
pub audio: Option<ChatCompletionResponseMessageAudio>,
/// NVIDIA-specific extensions for the chat completion response.
pub reasoning_content: Option<String>,
}
#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
......@@ -1021,6 +1024,9 @@ pub struct ChatCompletionStreamResponseDelta {
pub role: Option<Role>,
/// The refusal message generated by the model.
pub refusal: Option<String>,
/// NVIDIA-specific extensions for the chat completion response.
pub reasoning_content: Option<String>,
}
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
......
......@@ -396,7 +396,7 @@ impl
//tracing::trace!("from_assistant: {from_assistant}");
#[allow(deprecated)]
let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse{
let delta = NvCreateChatCompletionStreamResponse {
id: c.id,
choices: vec![dynamo_async_openai::types::ChatChoiceStream{
index: 0,
......@@ -407,6 +407,7 @@ impl
tool_calls: None,
refusal: None,
function_call: None,
reasoning_content: None,
},
logprobs: None,
finish_reason,
......@@ -418,7 +419,6 @@ impl
system_fingerprint: Some(c.system_fingerprint),
service_tier: None,
};
let delta = NvCreateChatCompletionStreamResponse{inner};
let ann = Annotated{
id: None,
data: Some(delta),
......
......@@ -204,18 +204,12 @@ impl
for c in prompt.chars() {
// we are returning characters not tokens, so there will be some postprocessing overhead
tokio::time::sleep(*TOKEN_ECHO_DELAY).await;
let inner = deltas.create_choice(0, Some(c.to_string()), None, None);
let response = NvCreateChatCompletionStreamResponse {
inner,
};
let response = deltas.create_choice(0, Some(c.to_string()), None, None);
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
id += 1;
}
let inner = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
let response = NvCreateChatCompletionStreamResponse {
inner,
};
let response = deltas.create_choice(0, None, Some(dynamo_async_openai::types::FinishReason::Stop), None);
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
};
......
......@@ -233,7 +233,7 @@ async fn evaluate(
match (item.data.as_ref(), item.event.as_deref()) {
(Some(data), _) => {
// Normal case
let choice = data.inner.choices.first();
let choice = data.choices.first();
let chat_comp = choice.as_ref().unwrap();
if let Some(c) = &chat_comp.delta.content {
output += c;
......
......@@ -143,7 +143,7 @@ async fn main_loop(
match (item.data.as_ref(), item.event.as_deref()) {
(Some(data), _) => {
// Normal case
let entry = data.inner.choices.first();
let entry = data.choices.first();
let chat_comp = entry.as_ref().unwrap();
if let Some(c) = &chat_comp.delta.content {
let _ = stdout.write(c.as_bytes());
......
......@@ -31,6 +31,7 @@ use super::{
service_v2, RouteDoc,
};
use crate::preprocessor::LLMMetricAnnotation;
use crate::protocols::openai::chat_completions::aggregator::ChatCompletionAggregator;
use crate::protocols::openai::{
chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse},
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
......
......@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
fn extract_logprobs_by_choice(&self) -> HashMap<u32, Vec<TokenLogProbs>> {
let mut result = HashMap::new();
for choice in &self.inner.choices {
for choice in &self.choices {
let choice_index = choice.index;
let choice_logprobs = choice
......@@ -574,8 +574,7 @@ mod tests {
use approx::assert_abs_diff_eq;
use dynamo_async_openai::types::{
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
TopLogprobs,
ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
};
use futures::StreamExt;
use std::sync::Arc;
......@@ -949,7 +948,7 @@ mod tests {
token_logprobs: Vec<ChatCompletionTokenLogprob>,
) -> NvCreateChatCompletionStreamResponse {
#[expect(deprecated)]
let inner = CreateChatCompletionStreamResponse {
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![ChatChoiceStream {
index: 0,
......@@ -959,6 +958,7 @@ mod tests {
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: Some(FinishReason::Stop),
logprobs: Some(ChatChoiceLogprobs {
......@@ -972,9 +972,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
};
NvCreateChatCompletionStreamResponse { inner }
}
}
fn create_mock_response_with_multiple_choices(
......@@ -992,6 +990,7 @@ mod tests {
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: Some(FinishReason::Stop),
logprobs: Some(ChatChoiceLogprobs {
......@@ -1001,7 +1000,7 @@ mod tests {
})
.collect();
let inner = CreateChatCompletionStreamResponse {
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices,
created: 1234567890,
......@@ -1010,9 +1009,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
};
NvCreateChatCompletionStreamResponse { inner }
}
}
#[test]
......@@ -1331,7 +1328,7 @@ mod tests {
fn test_logprob_extractor_with_missing_data() {
// Test with choice that has no logprobs
#[expect(deprecated)]
let inner = CreateChatCompletionStreamResponse {
let response = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![ChatChoiceStream {
index: 0,
......@@ -1341,6 +1338,7 @@ mod tests {
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: Some(FinishReason::Stop),
logprobs: None, // No logprobs
......@@ -1353,7 +1351,6 @@ mod tests {
usage: None,
};
let response = NvCreateChatCompletionStreamResponse { inner };
let logprobs = response.extract_logprobs_by_choice();
assert_eq!(logprobs.len(), 1);
assert!(logprobs.values().any(|v| v.is_empty()));
......@@ -1556,9 +1553,8 @@ mod tests {
fn create_mock_response() -> NvCreateChatCompletionStreamResponse {
// Create a mock response for testing
// In practice, this would have real logprobs data
use dynamo_async_openai::types::CreateChatCompletionStreamResponse;
let inner = CreateChatCompletionStreamResponse {
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![],
created: 1234567890,
......@@ -1567,9 +1563,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
};
NvCreateChatCompletionStreamResponse { inner }
}
}
// Mock context for testing
......
......@@ -27,7 +27,7 @@ use super::{
OpenAIStopConditionsProvider,
};
mod aggregator;
pub mod aggregator;
mod delta;
pub use aggregator::DeltaAggregator;
......@@ -59,11 +59,7 @@ pub struct NvCreateChatCompletionRequest {
/// # Fields
/// - `inner`: The base OpenAI unary chat completion response, embedded
/// using `serde(flatten)`.
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionResponse {
#[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
}
pub type NvCreateChatCompletionResponse = dynamo_async_openai::types::CreateChatCompletionResponse;
/// A response structure for streamed chat completions, embedding OpenAI's
/// `CreateChatCompletionStreamResponse`.
......@@ -71,11 +67,8 @@ pub struct NvCreateChatCompletionResponse {
/// # Fields
/// - `inner`: The base OpenAI streaming chat completion response, embedded
/// using `serde(flatten)`.
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionStreamResponse {
#[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
}
pub type NvCreateChatCompletionStreamResponse =
dynamo_async_openai::types::CreateChatCompletionStreamResponse;
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// providing access to NVIDIA-specific extensions.
......
......@@ -110,21 +110,21 @@ impl DeltaAggregator {
if aggregator.error.is_none() && delta.data.is_some() {
// Extract the data payload from the delta.
let delta = delta.data.unwrap();
aggregator.id = delta.inner.id;
aggregator.model = delta.inner.model;
aggregator.created = delta.inner.created;
aggregator.service_tier = delta.inner.service_tier;
aggregator.id = delta.id;
aggregator.model = delta.model;
aggregator.created = delta.created;
aggregator.service_tier = delta.service_tier;
// Aggregate usage statistics if available.
if let Some(usage) = delta.inner.usage {
if let Some(usage) = delta.usage {
aggregator.usage = Some(usage);
}
if let Some(system_fingerprint) = delta.inner.system_fingerprint {
if let Some(system_fingerprint) = delta.system_fingerprint {
aggregator.system_fingerprint = Some(system_fingerprint);
}
// Aggregate choices incrementally.
for choice in delta.inner.choices {
for choice in delta.choices {
let state_choice =
aggregator
.choices
......@@ -198,7 +198,7 @@ impl DeltaAggregator {
choices.sort_by(|a, b| a.index.cmp(&b.index));
// Construct the final response object.
let inner = dynamo_async_openai::types::CreateChatCompletionResponse {
let response = NvCreateChatCompletionResponse {
id: aggregator.id,
created: aggregator.created,
usage: aggregator.usage,
......@@ -209,8 +209,6 @@ impl DeltaAggregator {
service_tier: aggregator.service_tier,
};
let response = NvCreateChatCompletionResponse { inner };
Ok(response)
}
}
......@@ -234,6 +232,7 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
refusal: None,
function_call: None,
audio: None,
reasoning_content: None,
},
index: delta.index,
finish_reason: delta.finish_reason,
......@@ -242,35 +241,48 @@ impl From<DeltaChoice> for dynamo_async_openai::types::ChatChoice {
}
}
impl NvCreateChatCompletionResponse {
/// Converts an SSE stream into a [`NvCreateChatCompletionResponse`].
/// Trait for aggregating chat completion responses from streams.
/// Setting this macro because our async functions are not used outside of the library
#[allow(async_fn_in_trait)]
pub trait ChatCompletionAggregator {
/// Aggregates an annotated stream of chat completion responses into a final response.
///
/// # Arguments
/// * `stream` - A stream of SSE messages containing chat completion responses.
/// * `stream` - A stream of annotated chat completion responses.
///
/// # Returns
/// * `Ok(NvCreateChatCompletionResponse)` if aggregation succeeds.
/// * `Err(String)` if an error occurs.
pub async fn from_sse_stream(
stream: DataStream<Result<Message, SseCodecError>>,
) -> Result<NvCreateChatCompletionResponse, String> {
let stream = convert_sse_stream::<NvCreateChatCompletionStreamResponse>(stream);
NvCreateChatCompletionResponse::from_annotated_stream(stream).await
}
async fn from_annotated_stream(
stream: impl Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>>,
) -> Result<NvCreateChatCompletionResponse, String>;
/// Aggregates an annotated stream of chat completion responses into a final response.
/// Converts an SSE stream into a [`NvCreateChatCompletionResponse`].
///
/// # Arguments
/// * `stream` - A stream of annotated chat completion responses.
/// * `stream` - A stream of SSE messages containing chat completion responses.
///
/// # Returns
/// * `Ok(NvCreateChatCompletionResponse)` if aggregation succeeds.
/// * `Err(String)` if an error occurs.
pub async fn from_annotated_stream(
async fn from_sse_stream(
stream: DataStream<Result<Message, SseCodecError>>,
) -> Result<NvCreateChatCompletionResponse, String>;
}
impl ChatCompletionAggregator for dynamo_async_openai::types::CreateChatCompletionResponse {
async fn from_annotated_stream(
stream: impl Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>>,
) -> Result<NvCreateChatCompletionResponse, String> {
DeltaAggregator::apply(stream).await
}
async fn from_sse_stream(
stream: DataStream<Result<Message, SseCodecError>>,
) -> Result<NvCreateChatCompletionResponse, String> {
let stream = convert_sse_stream::<NvCreateChatCompletionStreamResponse>(stream);
NvCreateChatCompletionResponse::from_annotated_stream(stream).await
}
}
#[cfg(test)]
......@@ -293,6 +305,7 @@ mod tests {
tool_calls: None,
role,
refusal: None,
reasoning_content: None,
};
let choice = dynamo_async_openai::types::ChatChoiceStream {
index,
......@@ -301,7 +314,7 @@ mod tests {
logprobs: None,
};
let inner = dynamo_async_openai::types::CreateChatCompletionStreamResponse {
let data = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
model: "meta/llama-3.1-8b-instruct".to_string(),
created: 1234567890,
......@@ -312,8 +325,6 @@ mod tests {
object: "chat.completion".to_string(),
};
let data = NvCreateChatCompletionStreamResponse { inner };
Annotated {
data: Some(data),
id: Some("test_id".to_string()),
......@@ -336,13 +347,13 @@ mod tests {
let response = result.unwrap();
// Verify that the response is empty and has default values
assert_eq!(response.inner.id, "");
assert_eq!(response.inner.model, "");
assert_eq!(response.inner.created, 0);
assert!(response.inner.usage.is_none());
assert!(response.inner.system_fingerprint.is_none());
assert_eq!(response.inner.choices.len(), 0);
assert!(response.inner.service_tier.is_none());
assert_eq!(response.id, "");
assert_eq!(response.model, "");
assert_eq!(response.created, 0);
assert!(response.usage.is_none());
assert!(response.system_fingerprint.is_none());
assert_eq!(response.choices.len(), 0);
assert!(response.service_tier.is_none());
}
#[tokio::test]
......@@ -366,18 +377,18 @@ mod tests {
let response = result.unwrap();
// Verify the response fields
assert_eq!(response.inner.id, "test_id");
assert_eq!(response.inner.model, "meta/llama-3.1-8b-instruct");
assert_eq!(response.inner.created, 1234567890);
assert!(response.inner.usage.is_none());
assert!(response.inner.system_fingerprint.is_none());
assert_eq!(response.inner.choices.len(), 1);
let choice = &response.inner.choices[0];
assert_eq!(response.id, "test_id");
assert_eq!(response.model, "meta/llama-3.1-8b-instruct");
assert_eq!(response.created, 1234567890);
assert!(response.usage.is_none());
assert!(response.system_fingerprint.is_none());
assert_eq!(response.choices.len(), 1);
let choice = &response.choices[0];
assert_eq!(choice.index, 0);
assert_eq!(choice.message.content.as_ref().unwrap(), "Hello,");
assert!(choice.finish_reason.is_none());
assert_eq!(choice.message.role, dynamo_async_openai::types::Role::User);
assert!(response.inner.service_tier.is_none());
assert!(response.service_tier.is_none());
}
#[tokio::test]
......@@ -410,8 +421,8 @@ mod tests {
let response = result.unwrap();
// Verify the response fields
assert_eq!(response.inner.choices.len(), 1);
let choice = &response.inner.choices[0];
assert_eq!(response.choices.len(), 1);
let choice = &response.choices[0];
assert_eq!(choice.index, 0);
assert_eq!(choice.message.content.as_ref().unwrap(), "Hello, world!");
assert_eq!(
......@@ -426,7 +437,7 @@ mod tests {
async fn test_multiple_choices() {
// Create a delta with multiple choices
// ALLOW: function_call is deprecated
let delta = dynamo_async_openai::types::CreateChatCompletionStreamResponse {
let data = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
model: "test_model".to_string(),
created: 1234567890,
......@@ -442,6 +453,7 @@ mod tests {
function_call: None,
tool_calls: None,
refusal: None,
reasoning_content: None,
},
finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
logprobs: None,
......@@ -454,6 +466,7 @@ mod tests {
function_call: None,
tool_calls: None,
refusal: None,
reasoning_content: None,
},
finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
logprobs: None,
......@@ -462,8 +475,6 @@ mod tests {
object: "chat.completion".to_string(),
};
let data = NvCreateChatCompletionStreamResponse { inner: delta };
// Wrap it in Annotated and create a stream
let annotated_delta = Annotated {
data: Some(data),
......@@ -481,9 +492,9 @@ mod tests {
let mut response = result.unwrap();
// Verify the response fields
assert_eq!(response.inner.choices.len(), 2);
response.inner.choices.sort_by(|a, b| a.index.cmp(&b.index)); // Ensure the choices are ordered
let choice0 = &response.inner.choices[0];
assert_eq!(response.choices.len(), 2);
response.choices.sort_by(|a, b| a.index.cmp(&b.index)); // Ensure the choices are ordered
let choice0 = &response.choices[0];
assert_eq!(choice0.index, 0);
assert_eq!(choice0.message.content.as_ref().unwrap(), "Choice 0");
assert_eq!(
......@@ -495,7 +506,7 @@ mod tests {
dynamo_async_openai::types::Role::Assistant
);
let choice1 = &response.inner.choices[1];
let choice1 = &response.choices[1];
assert_eq!(choice1.index, 1);
assert_eq!(choice1.message.content.as_ref().unwrap(), "Choice 1");
assert_eq!(
......@@ -520,9 +531,7 @@ mod tests {
Some(dynamo_async_openai::types::Role::Assistant),
Some(dynamo_async_openai::types::FinishReason::ToolCalls),
);
let delta = annotated_delta.data.unwrap().inner;
let data = NvCreateChatCompletionStreamResponse { inner: delta };
let data = annotated_delta.data.unwrap();
// Wrap it in Annotated and create a stream
let annotated_delta = Annotated {
......@@ -541,8 +550,8 @@ mod tests {
let response = result.unwrap();
// There should be one choice
assert_eq!(response.inner.choices.len(), 1);
let choice = &response.inner.choices[0];
assert_eq!(response.choices.len(), 1);
let choice = &response.choices[0];
// The tool_calls field should be present and parsed
assert!(choice.message.tool_calls.is_some());
......
......@@ -209,6 +209,7 @@ impl DeltaGenerator {
None
},
refusal: None,
reasoning_content: None,
};
let choice = dynamo_async_openai::types::ChatChoiceStream {
......@@ -304,9 +305,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateChatCompletionStreamRes
let index = 0;
let stream_response = self.create_choice(index, delta.text, finish_reason, logprobs);
Ok(NvCreateChatCompletionStreamResponse {
inner: stream_response,
})
Ok(stream_response)
}
fn get_isl(&self) -> Option<u32> {
......
......@@ -199,7 +199,7 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
type Error = anyhow::Error;
fn try_from(nv_resp: NvCreateChatCompletionResponse) -> Result<Self, Self::Error> {
let chat_resp = nv_resp.inner;
let chat_resp = nv_resp;
let content_text = chat_resp
.choices
.into_iter()
......@@ -341,28 +341,27 @@ mod tests {
fn test_into_nvresponse_from_chat_response() {
let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: Some("This is a reply".into()),
refusal: None,
tool_calls: None,
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
},
finish_reason: None,
logprobs: None,
}],
created: now,
model: "llama-3.1-8b-instruct".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
},
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: Some("This is a reply".into()),
refusal: None,
tool_calls: None,
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
reasoning_content: None,
},
finish_reason: None,
logprobs: None,
}],
created: now,
model: "llama-3.1-8b-instruct".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
};
let wrapped: NvResponse = chat_resp.try_into().unwrap();
......
......@@ -16,7 +16,8 @@
use dynamo_llm::protocols::{
codec::{create_message_stream, Message, SseCodecError},
openai::{
chat_completions::NvCreateChatCompletionResponse, completions::NvCreateCompletionResponse,
chat_completions::{aggregator::ChatCompletionAggregator, NvCreateChatCompletionResponse},
completions::NvCreateCompletionResponse,
},
ContentProvider, DataStream,
};
......@@ -43,7 +44,6 @@ async fn test_openai_chat_stream() {
// todo: provide a cleaner way to extract the content from choices
assert_eq!(
result
.inner
.choices
.first()
.unwrap()
......@@ -65,7 +65,6 @@ async fn test_openai_chat_edge_case_multi_line_data() {
assert_eq!(
result
.inner
.choices
.first()
.unwrap()
......@@ -86,7 +85,6 @@ async fn test_openai_chat_edge_case_comments_per_response() {
assert_eq!(
result
.inner
.choices
.first()
.unwrap()
......
......@@ -100,11 +100,7 @@ impl
let stream = stream! {
tokio::time::sleep(std::time::Duration::from_millis(max_tokens)).await;
for i in 0..10 {
let inner = generator.create_choice(i,Some(format!("choice {i}")), None, None);
let output = NvCreateChatCompletionStreamResponse {
inner,
};
let output = generator.create_choice(i,Some(format!("choice {i}")), None, None);
yield Annotated::from_data(output);
}
......
......@@ -12,8 +12,7 @@ use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStrea
use dynamo_async_openai::types::{
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
ChatCompletionTokenLogprob, CreateChatCompletionStreamResponse, FinishReason, Role,
TopLogprobs,
ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
};
// Type aliases to simplify complex test data structures
......@@ -387,6 +386,7 @@ fn create_response_with_linear_probs(
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: Some(FinishReason::Stop),
logprobs: Some(ChatChoiceLogprobs {
......@@ -395,7 +395,7 @@ fn create_response_with_linear_probs(
}),
};
let inner = CreateChatCompletionStreamResponse {
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -404,9 +404,7 @@ fn create_response_with_linear_probs(
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
};
NvCreateChatCompletionStreamResponse { inner }
}
}
fn create_multi_choice_response(
......@@ -466,6 +464,7 @@ fn create_multi_choice_response(
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: Some(FinishReason::Stop),
logprobs: Some(ChatChoiceLogprobs {
......@@ -476,7 +475,7 @@ fn create_multi_choice_response(
})
.collect();
let inner = CreateChatCompletionStreamResponse {
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices,
created: 1234567890,
......@@ -485,7 +484,5 @@ fn create_multi_choice_response(
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
};
NvCreateChatCompletionStreamResponse { inner }
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment