"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "e15932bb60e645e533a4b2f999bec9c60328e6d3"
Unverified Commit e994caeb authored by KrishnanPrash's avatar KrishnanPrash Committed by GitHub
Browse files

feat: Adding support for `response_format` field (#5127)


Signed-off-by: default avatarKrishnan Prashanth <kprashanth@nvidia.com>
parent 6306afa6
...@@ -177,21 +177,45 @@ impl CommonExtProvider for NvCreateChatCompletionRequest { ...@@ -177,21 +177,45 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
return Some(value); return Some(value);
} }
let tool_choice = self.inner.tool_choice.as_ref()?; // 1) Tool-call guided decoding (highest precedence after explicit guided_json)
let tools = self.inner.tools.as_deref()?; if let (Some(tool_choice), Some(tools)) =
(self.inner.tool_choice.as_ref(), self.inner.tools.as_deref())
{
match tools::get_json_schema_from_tools(Some(tool_choice), Some(tools)) { match tools::get_json_schema_from_tools(Some(tool_choice), Some(tools)) {
Ok(schema) => schema, Ok(Some(schema)) => return Some(schema),
Ok(None) => {}
Err(err) => { Err(err) => {
tracing::warn!( tracing::warn!(
error = %err, error = %err,
"failed to derive guided_json from tool_choice" "failed to derive guided_json from tool_choice"
); );
None
} }
} }
} }
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
if let Some(response_format) = self.inner.response_format.as_ref() {
use dynamo_async_openai::types::ResponseFormat;
match response_format {
ResponseFormat::Text => {}
ResponseFormat::JsonObject => {
// Minimal JSON Schema for "any JSON object"
return Some(serde_json::json!({
"type": "object"
}));
}
ResponseFormat::JsonSchema { json_schema } => {
// validate_response_format ensures schema is present when type=json_schema
if let Some(schema) = json_schema.schema.clone() {
return Some(schema);
}
}
}
}
None
}
fn get_guided_regex(&self) -> Option<String> { fn get_guided_regex(&self) -> Option<String> {
self.common.guided_regex.clone() self.common.guided_regex.clone()
} }
...@@ -325,7 +349,7 @@ impl ValidateRequest for NvCreateChatCompletionRequest { ...@@ -325,7 +349,7 @@ impl ValidateRequest for NvCreateChatCompletionRequest {
// none for prediction // none for prediction
// none for audio // none for audio
validate::validate_presence_penalty(self.inner.presence_penalty)?; validate::validate_presence_penalty(self.inner.presence_penalty)?;
// none for response_format validate::validate_response_format(&self.inner.response_format)?;
// none for seed // none for seed
validate::validate_service_tier(&self.inner.service_tier)?; validate::validate_service_tier(&self.inner.service_tier)?;
validate::validate_stop(&self.inner.stop)?; validate::validate_stop(&self.inner.stop)?;
......
...@@ -111,6 +111,42 @@ pub fn validate_no_unsupported_fields( ...@@ -111,6 +111,42 @@ pub fn validate_no_unsupported_fields(
Ok(()) Ok(())
} }
/// Validates response_format for chat completions.
///
/// Dynamo currently supports translating:
/// - `{"type":"json_object"}` -> guided decoding JSON object schema
/// - `{"type":"json_schema","json_schema":{"schema": ...}}` -> guided decoding JSON schema
///
/// `{"type":"text"}` is accepted and means no structured constraint.
pub fn validate_response_format(
response_format: &Option<dynamo_async_openai::types::ResponseFormat>,
) -> Result<(), anyhow::Error> {
use dynamo_async_openai::types::ResponseFormat;
let Some(fmt) = response_format else {
return Ok(());
};
match fmt {
ResponseFormat::Text => Ok(()),
ResponseFormat::JsonObject => Ok(()),
ResponseFormat::JsonSchema { json_schema } => {
// Validate name field format
if json_schema.name.is_empty() {
anyhow::bail!("`response_format.json_schema.name` cannot be empty");
}
// Validate schema presence
if json_schema.schema.is_none() {
anyhow::bail!(
"`response_format.json_schema.schema` is required when `response_format.type` is `json_schema`"
);
}
Ok(())
}
}
}
/// Validates the temperature parameter /// Validates the temperature parameter
pub fn validate_temperature(temperature: Option<f32>) -> Result<(), anyhow::Error> { pub fn validate_temperature(temperature: Option<f32>) -> Result<(), anyhow::Error> {
if let Some(temp) = temperature if let Some(temp) = temperature
......
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use std::sync::Arc;
use anyhow::Result;
use dynamo_llm::backend::{Decoder, StopTrigger};
use dynamo_llm::protocols::common::StopConditions;
use dynamo_llm::tokenizers::{self, Encoding, traits as tokenizer_traits};
const HI: u32 = 1;
const STOP: u32 = 2;
const THERE: u32 = 3;
const EOS: u32 = 99;
struct TestTokenizer;
impl tokenizer_traits::Encoder for TestTokenizer {
fn encode(&self, _: &str) -> Result<Encoding> {
Ok(Encoding::Sp(vec![]))
}
fn encode_batch(&self, _: &[&str]) -> Result<Vec<Encoding>> {
Ok(vec![])
}
}
impl tokenizer_traits::Decoder for TestTokenizer {
fn decode(&self, ids: &[u32], skip_special: bool) -> Result<String> {
Ok(ids
.iter()
.filter_map(|&id| match id {
EOS if skip_special => None,
HI => Some("hi"),
STOP => Some("STOP"),
THERE => Some("there"),
EOS => Some("</s>"),
_ => Some("?"),
})
.collect())
}
}
impl tokenizer_traits::Tokenizer for TestTokenizer {}
fn make_decoder(
max_tokens: Option<u32>,
min_tokens: Option<u32>,
hidden_stop_ids: Option<Vec<u32>>,
stop_sequences: Option<Vec<&str>>,
include_stop_str: bool,
) -> Decoder {
let tokenizer: Arc<dyn tokenizer_traits::Tokenizer> = Arc::new(TestTokenizer);
let decode_stream = tokenizers::DecodeStream::new(tokenizer, &[], false);
let stop_conditions = StopConditions {
max_tokens,
min_tokens,
stop_token_ids_hidden: hidden_stop_ids,
stop: stop_sequences.map(|v| v.into_iter().map(String::from).collect()),
..Default::default()
};
Decoder::new(decode_stream, stop_conditions, include_stop_str)
}
#[test]
fn normal_completion_no_stop() {
let mut decoder = make_decoder(None, None, None, None, false);
let result = decoder.process_token_ids(&[HI, THERE]).unwrap();
assert_eq!(result.text.as_deref(), Some("hithere"));
assert!(result.stop_trigger.is_none());
}
#[test]
fn hidden_stop_token_excluded() {
let mut decoder = make_decoder(None, None, Some(vec![EOS]), None, false);
let result = decoder.process_token_ids(&[HI, EOS]).unwrap();
assert_eq!(result.text.as_deref(), Some("hi"));
assert!(matches!(
result.stop_trigger,
Some(StopTrigger::HiddenStopTokenDetected(id)) if id == EOS
));
}
#[test]
fn include_stop_str_false_excludes() {
let mut decoder = make_decoder(None, None, None, Some(vec!["STOP"]), false);
let result = decoder.process_token_ids(&[HI, STOP, THERE]).unwrap();
assert_eq!(result.text.as_deref(), Some("hi"));
assert!(matches!(
result.stop_trigger,
Some(StopTrigger::HiddenStopSequenceDetected(ref s)) if s == "STOP"
));
}
#[test]
fn include_stop_str_true_includes() {
let mut decoder = make_decoder(None, None, None, Some(vec!["STOP"]), true);
let result = decoder.process_token_ids(&[HI, STOP, THERE]).unwrap();
assert_eq!(result.text.as_deref(), Some("hiSTOP"));
assert!(matches!(
result.stop_trigger,
Some(StopTrigger::VisibleStopSequenceDetected(ref s)) if s == "STOP"
));
}
#[test]
fn trailing_tokens_ignored_after_stop() {
let mut decoder = make_decoder(None, None, Some(vec![EOS]), None, false);
let result = decoder.process_token_ids(&[HI, EOS, THERE]).unwrap();
assert_eq!(result.text.as_deref(), Some("hi"));
assert_eq!(result.tokens.len(), 2);
}
#[test]
fn min_tokens_delays_stop() {
let mut decoder = make_decoder(None, Some(3), Some(vec![EOS]), None, false);
let result = decoder.process_token_ids(&[HI, EOS]).unwrap();
assert_eq!(result.text.as_deref(), Some("hi</s>"));
assert!(result.stop_trigger.is_none());
}
#[test]
fn stop_token_priority_over_sequence() {
let mut decoder = make_decoder(None, None, Some(vec![STOP]), Some(vec!["STOP"]), false);
let result = decoder.process_token_ids(&[HI, STOP]).unwrap();
assert_eq!(result.text.as_deref(), Some("hi"));
assert!(matches!(
result.stop_trigger,
Some(StopTrigger::HiddenStopTokenDetected(id)) if id == STOP
));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment