Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e994caeb
Unverified
Commit
e994caeb
authored
Jan 07, 2026
by
KrishnanPrash
Committed by
GitHub
Jan 07, 2026
Browse files
feat: Adding support for `response_format` field (#5127)
Signed-off-by:
Krishnan Prashanth
<
kprashanth@nvidia.com
>
parent
6306afa6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
209 additions
and
12 deletions
+209
-12
lib/llm/src/protocols/openai/chat_completions.rs
lib/llm/src/protocols/openai/chat_completions.rs
+36
-12
lib/llm/src/protocols/openai/validate.rs
lib/llm/src/protocols/openai/validate.rs
+36
-0
lib/llm/tests/test_stop_behavior.rs
lib/llm/tests/test_stop_behavior.rs
+137
-0
No files found.
lib/llm/src/protocols/openai/chat_completions.rs
View file @
e994caeb
...
@@ -177,19 +177,43 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
...
@@ -177,19 +177,43 @@ impl CommonExtProvider for NvCreateChatCompletionRequest {
return
Some
(
value
);
return
Some
(
value
);
}
}
let
tool_choice
=
self
.inner.tool_choice
.as_ref
()
?
;
// 1) Tool-call guided decoding (highest precedence after explicit guided_json)
let
tools
=
self
.inner.tools
.as_deref
()
?
;
if
let
(
Some
(
tool_choice
),
Some
(
tools
))
=
(
self
.inner.tool_choice
.as_ref
(),
self
.inner.tools
.as_deref
())
match
tools
::
get_json_schema_from_tools
(
Some
(
tool_choice
),
Some
(
tools
))
{
{
Ok
(
schema
)
=>
schema
,
match
tools
::
get_json_schema_from_tools
(
Some
(
tool_choice
),
Some
(
tools
))
{
Err
(
err
)
=>
{
Ok
(
Some
(
schema
))
=>
return
Some
(
schema
),
tracing
::
warn!
(
Ok
(
None
)
=>
{}
error
=
%
err
,
Err
(
err
)
=>
{
"failed to derive guided_json from tool_choice"
tracing
::
warn!
(
);
error
=
%
err
,
None
"failed to derive guided_json from tool_choice"
);
}
}
}
}
}
// 2) OpenAI `response_format` (applies to assistant content, not tool calls)
if
let
Some
(
response_format
)
=
self
.inner.response_format
.as_ref
()
{
use
dynamo_async_openai
::
types
::
ResponseFormat
;
match
response_format
{
ResponseFormat
::
Text
=>
{}
ResponseFormat
::
JsonObject
=>
{
// Minimal JSON Schema for "any JSON object"
return
Some
(
serde_json
::
json!
({
"type"
:
"object"
}));
}
ResponseFormat
::
JsonSchema
{
json_schema
}
=>
{
// validate_response_format ensures schema is present when type=json_schema
if
let
Some
(
schema
)
=
json_schema
.schema
.clone
()
{
return
Some
(
schema
);
}
}
}
}
None
}
}
fn
get_guided_regex
(
&
self
)
->
Option
<
String
>
{
fn
get_guided_regex
(
&
self
)
->
Option
<
String
>
{
...
@@ -325,7 +349,7 @@ impl ValidateRequest for NvCreateChatCompletionRequest {
...
@@ -325,7 +349,7 @@ impl ValidateRequest for NvCreateChatCompletionRequest {
// none for prediction
// none for prediction
// none for audio
// none for audio
validate
::
validate_presence_penalty
(
self
.inner.presence_penalty
)
?
;
validate
::
validate_presence_penalty
(
self
.inner.presence_penalty
)
?
;
// none for
response_format
validate
::
validate_response_format
(
&
self
.inner.
response_format
)
?
;
// none for seed
// none for seed
validate
::
validate_service_tier
(
&
self
.inner.service_tier
)
?
;
validate
::
validate_service_tier
(
&
self
.inner.service_tier
)
?
;
validate
::
validate_stop
(
&
self
.inner.stop
)
?
;
validate
::
validate_stop
(
&
self
.inner.stop
)
?
;
...
...
lib/llm/src/protocols/openai/validate.rs
View file @
e994caeb
...
@@ -111,6 +111,42 @@ pub fn validate_no_unsupported_fields(
...
@@ -111,6 +111,42 @@ pub fn validate_no_unsupported_fields(
Ok
(())
Ok
(())
}
}
/// Validates response_format for chat completions.
///
/// Dynamo currently supports translating:
/// - `{"type":"json_object"}` -> guided decoding JSON object schema
/// - `{"type":"json_schema","json_schema":{"schema": ...}}` -> guided decoding JSON schema
///
/// `{"type":"text"}` is accepted and means no structured constraint.
pub
fn
validate_response_format
(
response_format
:
&
Option
<
dynamo_async_openai
::
types
::
ResponseFormat
>
,
)
->
Result
<
(),
anyhow
::
Error
>
{
use
dynamo_async_openai
::
types
::
ResponseFormat
;
let
Some
(
fmt
)
=
response_format
else
{
return
Ok
(());
};
match
fmt
{
ResponseFormat
::
Text
=>
Ok
(()),
ResponseFormat
::
JsonObject
=>
Ok
(()),
ResponseFormat
::
JsonSchema
{
json_schema
}
=>
{
// Validate name field format
if
json_schema
.name
.is_empty
()
{
anyhow
::
bail!
(
"`response_format.json_schema.name` cannot be empty"
);
}
// Validate schema presence
if
json_schema
.schema
.is_none
()
{
anyhow
::
bail!
(
"`response_format.json_schema.schema` is required when `response_format.type` is `json_schema`"
);
}
Ok
(())
}
}
}
/// Validates the temperature parameter
/// Validates the temperature parameter
pub
fn
validate_temperature
(
temperature
:
Option
<
f32
>
)
->
Result
<
(),
anyhow
::
Error
>
{
pub
fn
validate_temperature
(
temperature
:
Option
<
f32
>
)
->
Result
<
(),
anyhow
::
Error
>
{
if
let
Some
(
temp
)
=
temperature
if
let
Some
(
temp
)
=
temperature
...
...
lib/llm/tests/test_stop_behavior.rs
0 → 100644
View file @
e994caeb
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
std
::
sync
::
Arc
;
use
anyhow
::
Result
;
use
dynamo_llm
::
backend
::{
Decoder
,
StopTrigger
};
use
dynamo_llm
::
protocols
::
common
::
StopConditions
;
use
dynamo_llm
::
tokenizers
::{
self
,
Encoding
,
traits
as
tokenizer_traits
};
const
HI
:
u32
=
1
;
const
STOP
:
u32
=
2
;
const
THERE
:
u32
=
3
;
const
EOS
:
u32
=
99
;
struct
TestTokenizer
;
impl
tokenizer_traits
::
Encoder
for
TestTokenizer
{
fn
encode
(
&
self
,
_
:
&
str
)
->
Result
<
Encoding
>
{
Ok
(
Encoding
::
Sp
(
vec!
[]))
}
fn
encode_batch
(
&
self
,
_
:
&
[
&
str
])
->
Result
<
Vec
<
Encoding
>>
{
Ok
(
vec!
[])
}
}
impl
tokenizer_traits
::
Decoder
for
TestTokenizer
{
fn
decode
(
&
self
,
ids
:
&
[
u32
],
skip_special
:
bool
)
->
Result
<
String
>
{
Ok
(
ids
.iter
()
.filter_map
(|
&
id
|
match
id
{
EOS
if
skip_special
=>
None
,
HI
=>
Some
(
"hi"
),
STOP
=>
Some
(
"STOP"
),
THERE
=>
Some
(
"there"
),
EOS
=>
Some
(
"</s>"
),
_
=>
Some
(
"?"
),
})
.collect
())
}
}
impl
tokenizer_traits
::
Tokenizer
for
TestTokenizer
{}
fn
make_decoder
(
max_tokens
:
Option
<
u32
>
,
min_tokens
:
Option
<
u32
>
,
hidden_stop_ids
:
Option
<
Vec
<
u32
>>
,
stop_sequences
:
Option
<
Vec
<&
str
>>
,
include_stop_str
:
bool
,
)
->
Decoder
{
let
tokenizer
:
Arc
<
dyn
tokenizer_traits
::
Tokenizer
>
=
Arc
::
new
(
TestTokenizer
);
let
decode_stream
=
tokenizers
::
DecodeStream
::
new
(
tokenizer
,
&
[],
false
);
let
stop_conditions
=
StopConditions
{
max_tokens
,
min_tokens
,
stop_token_ids_hidden
:
hidden_stop_ids
,
stop
:
stop_sequences
.map
(|
v
|
v
.into_iter
()
.map
(
String
::
from
)
.collect
()),
..
Default
::
default
()
};
Decoder
::
new
(
decode_stream
,
stop_conditions
,
include_stop_str
)
}
#[test]
fn
normal_completion_no_stop
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
None
,
None
,
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
THERE
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hithere"
));
assert
!
(
result
.stop_trigger
.is_none
());
}
#[test]
fn
hidden_stop_token_excluded
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
Some
(
vec!
[
EOS
]),
None
,
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
EOS
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hi"
));
assert
!
(
matches!
(
result
.stop_trigger
,
Some
(
StopTrigger
::
HiddenStopTokenDetected
(
id
))
if
id
==
EOS
));
}
#[test]
fn
include_stop_str_false_excludes
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
None
,
Some
(
vec!
[
"STOP"
]),
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
STOP
,
THERE
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hi"
));
assert
!
(
matches!
(
result
.stop_trigger
,
Some
(
StopTrigger
::
HiddenStopSequenceDetected
(
ref
s
))
if
s
==
"STOP"
));
}
#[test]
fn
include_stop_str_true_includes
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
None
,
Some
(
vec!
[
"STOP"
]),
true
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
STOP
,
THERE
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hiSTOP"
));
assert
!
(
matches!
(
result
.stop_trigger
,
Some
(
StopTrigger
::
VisibleStopSequenceDetected
(
ref
s
))
if
s
==
"STOP"
));
}
#[test]
fn
trailing_tokens_ignored_after_stop
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
Some
(
vec!
[
EOS
]),
None
,
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
EOS
,
THERE
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hi"
));
assert_eq!
(
result
.tokens
.len
(),
2
);
}
#[test]
fn
min_tokens_delays_stop
()
{
let
mut
decoder
=
make_decoder
(
None
,
Some
(
3
),
Some
(
vec!
[
EOS
]),
None
,
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
EOS
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hi</s>"
));
assert
!
(
result
.stop_trigger
.is_none
());
}
#[test]
fn
stop_token_priority_over_sequence
()
{
let
mut
decoder
=
make_decoder
(
None
,
None
,
Some
(
vec!
[
STOP
]),
Some
(
vec!
[
"STOP"
]),
false
);
let
result
=
decoder
.process_token_ids
(
&
[
HI
,
STOP
])
.unwrap
();
assert_eq!
(
result
.text
.as_deref
(),
Some
(
"hi"
));
assert
!
(
matches!
(
result
.stop_trigger
,
Some
(
StopTrigger
::
HiddenStopTokenDetected
(
id
))
if
id
==
STOP
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment