Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2422b83d
Unverified
Commit
2422b83d
authored
Sep 02, 2025
by
Graham King
Committed by
GitHub
Sep 02, 2025
Browse files
fix: Do not apply chat template to completions (#2718)
Signed-off-by:
Graham King
<
grahamk@nvidia.com
>
parent
2c30e41f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
119 additions
and
90 deletions
+119
-90
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+116
-58
lib/parsers/src/reasoning/base_parser.rs
lib/parsers/src/reasoning/base_parser.rs
+0
-29
lib/parsers/src/reasoning/mod.rs
lib/parsers/src/reasoning/mod.rs
+3
-3
No files found.
lib/llm/src/preprocessor.rs
View file @
2422b83d
...
...
@@ -24,6 +24,7 @@ use tracing;
use
crate
::
model_card
::{
ModelDeploymentCard
,
ModelInfo
,
TokenizerKind
};
use
crate
::
preprocessor
::
prompt
::
OAIChatLikeRequest
;
use
crate
::
protocols
::
common
::
preprocessor
::
PreprocessedRequestBuilder
;
use
crate
::
tokenizers
::
Encoding
;
use
dynamo_runtime
::
engine
::{
AsyncEngine
,
AsyncEngineContextProvider
,
ResponseStream
};
...
...
@@ -151,10 +152,108 @@ impl OpenAIPreprocessor {
&
self
,
request
:
&
R
,
)
->
Result
<
(
PreprocessedRequest
,
HashMap
<
String
,
String
>
)
>
{
let
mut
annotations
=
HashMap
::
new
();
let
mut
builder
=
self
.builder
(
request
)
?
;
let
formatted_prompt
=
self
.apply_template
(
request
)
?
;
let
annotations
=
self
.gather_tokens
(
request
,
&
mut
builder
,
formatted_prompt
)
?
;
Ok
((
builder
.build
()
?
,
annotations
))
}
pub
fn
builder
<
R
:
OAIChatLikeRequest
+
AnnotationsProvider
+
SamplingOptionsProvider
+
StopConditionsProvider
+
OutputOptionsProvider
+
NvExtProvider
,
>
(
&
self
,
request
:
&
R
,
)
->
Result
<
PreprocessedRequestBuilder
>
{
let
mut
builder
=
PreprocessedRequest
::
builder
();
builder
.model
(
request
.model
());
let
mut
stop_conditions
=
request
.extract_stop_conditions
()
?
;
if
let
Some
(
stop_tokens
)
=
&
mut
stop_conditions
.stop_token_ids_hidden
{
for
eos_token
in
self
.model_info
.eos_token_ids
()
{
if
!
stop_tokens
.contains
(
&
eos_token
)
{
stop_tokens
.push
(
eos_token
);
}
}
}
else
{
stop_conditions
.stop_token_ids_hidden
=
Some
(
self
.model_info
.eos_token_ids
());
}
// apply ignore eos if not already set
stop_conditions
.apply_ignore_eos
();
if
!
stop_conditions
.ignore_eos
.unwrap_or
(
false
)
{
builder
.eos_token_ids
(
self
.model_info
.eos_token_ids
());
}
builder
.stop_conditions
(
stop_conditions
);
builder
.sampling_options
(
request
.extract_sampling_options
()
?
);
builder
.output_options
(
request
.extract_output_options
()
?
);
builder
.annotations
(
request
.annotations
()
.unwrap_or_default
());
builder
.mdc_sum
(
Some
(
self
.mdcsum
.clone
()));
builder
.estimated_prefix_hit_num_blocks
(
None
);
// Extract backend_instance_id from nvext if present
if
let
Some
(
nvext
)
=
request
.nvext
()
{
builder
.backend_instance_id
(
nvext
.backend_instance_id
);
}
Ok
(
builder
)
}
pub
fn
apply_template
<
R
:
OAIChatLikeRequest
+
AnnotationsProvider
+
SamplingOptionsProvider
+
StopConditionsProvider
+
OutputOptionsProvider
+
NvExtProvider
,
>
(
&
self
,
request
:
&
R
,
)
->
Result
<
Option
<
String
>>
{
if
let
PromptInput
::
Text
(
_
)
=
request
.prompt_input_type
()
&&
let
Some
(
TextInput
::
Single
(
_
))
=
request
.extract_text
()
{
let
use_raw_prompt
=
request
.nvext
()
.is_some_and
(|
ext
|
ext
.use_raw_prompt
.unwrap_or
(
false
));
let
formatted_prompt
=
if
use_raw_prompt
{
match
request
.raw_prompt
()
{
Some
(
prompt
)
=>
prompt
,
None
=>
{
tracing
::
warn!
(
"Raw prompt requested but not available"
);
self
.formatter
.render
(
request
)
?
}
}
}
else
{
self
.formatter
.render
(
request
)
?
};
Ok
(
Some
(
formatted_prompt
))
}
else
{
Ok
(
None
)
}
}
pub
fn
gather_tokens
<
R
:
OAIChatLikeRequest
+
AnnotationsProvider
+
SamplingOptionsProvider
+
StopConditionsProvider
+
OutputOptionsProvider
+
NvExtProvider
,
>
(
&
self
,
request
:
&
R
,
builder
:
&
mut
PreprocessedRequestBuilder
,
formatted_prompt
:
Option
<
String
>
,
)
->
Result
<
HashMap
<
String
,
String
>>
{
let
mut
annotations
=
HashMap
::
new
();
// match request type before any conversion/processing
match
request
.prompt_input_type
()
{
PromptInput
::
Tokens
(
_
)
=>
{
...
...
@@ -177,22 +276,16 @@ impl OpenAIPreprocessor {
PromptInput
::
Text
(
_
)
=>
{
if
let
Some
(
text_input
)
=
request
.extract_text
()
{
match
text_input
{
TextInput
::
Single
(
_
)
=>
{
let
use_raw_prompt
=
request
.nvext
()
.is_some_and
(|
ext
|
ext
.use_raw_prompt
.unwrap_or
(
false
));
let
formatted_prompt
=
if
use_raw_prompt
{
match
request
.raw_prompt
()
{
Some
(
prompt
)
=>
prompt
,
None
=>
{
tracing
::
warn!
(
"Raw prompt requested but not available"
);
self
.formatter
.render
(
request
)
?
}
TextInput
::
Single
(
raw_prompt
)
=>
{
if
let
Some
(
f
)
=
formatted_prompt
.as_ref
()
&&
request
.has_annotation
(
ANNOTATION_FORMATTED_PROMPT
)
{
annotations
.insert
(
ANNOTATION_FORMATTED_PROMPT
.to_string
(),
f
.to_string
());
}
}
else
{
self
.formatter
.render
(
request
)
?
}
;
// Completions will use raw_prompt, no template
let
prompt
=
formatted_prompt
.unwrap_or
(
raw_prompt
)
;
// Check if backend_instance_id is present and token_data is provided
let
has_backend_instance_id
=
request
...
...
@@ -215,22 +308,15 @@ impl OpenAIPreprocessor {
tracing
::
warn!
(
"backend_instance_id provided but no token_data; tokenizing prompt"
);
let
encoding
=
self
.tokenizer
.encode
(
&
formatted_
prompt
)
?
;
let
encoding
=
self
.tokenizer
.encode
(
&
prompt
)
?
;
(
encoding
.token_ids
()
.to_vec
(),
false
)
}
}
else
{
// No backend_instance_id provided, continue the normal flow.
let
encoding
=
self
.tokenizer
.encode
(
&
formatted_
prompt
)
?
;
let
encoding
=
self
.tokenizer
.encode
(
&
prompt
)
?
;
(
encoding
.token_ids
()
.to_vec
(),
false
)
};
if
request
.has_annotation
(
ANNOTATION_FORMATTED_PROMPT
)
{
annotations
.insert
(
ANNOTATION_FORMATTED_PROMPT
.to_string
(),
formatted_prompt
,
);
}
if
request
.has_annotation
(
ANNOTATION_TOKEN_IDS
)
&&
!
skip_token_annotation
{
...
...
@@ -258,37 +344,7 @@ impl OpenAIPreprocessor {
}
}
}
let
mut
stop_conditions
=
request
.extract_stop_conditions
()
?
;
if
let
Some
(
stop_tokens
)
=
&
mut
stop_conditions
.stop_token_ids_hidden
{
for
eos_token
in
self
.model_info
.eos_token_ids
()
{
if
!
stop_tokens
.contains
(
&
eos_token
)
{
stop_tokens
.push
(
eos_token
);
}
}
}
else
{
stop_conditions
.stop_token_ids_hidden
=
Some
(
self
.model_info
.eos_token_ids
());
}
// apply ignore eos if not already set
stop_conditions
.apply_ignore_eos
();
if
!
stop_conditions
.ignore_eos
.unwrap_or
(
false
)
{
builder
.eos_token_ids
(
self
.model_info
.eos_token_ids
());
}
builder
.stop_conditions
(
stop_conditions
);
builder
.sampling_options
(
request
.extract_sampling_options
()
?
);
builder
.output_options
(
request
.extract_output_options
()
?
);
builder
.annotations
(
request
.annotations
()
.unwrap_or_default
());
builder
.mdc_sum
(
Some
(
self
.mdcsum
.clone
()));
builder
.estimated_prefix_hit_num_blocks
(
None
);
// Extract backend_instance_id from nvext if present
if
let
Some
(
nvext
)
=
request
.nvext
()
{
builder
.backend_instance_id
(
nvext
.backend_instance_id
);
}
Ok
((
builder
.build
()
?
,
annotations
))
Ok
(
annotations
)
}
/// Preprocess an embedding request, handling both text and token ID inputs.
...
...
@@ -581,7 +637,9 @@ impl
let
response_generator
=
request
.response_generator
(
context
.id
()
.to_string
());
let
mut
response_generator
=
Box
::
new
(
response_generator
);
// convert the chat completion request to a common completion request
let
(
common_request
,
annotations
)
=
self
.preprocess_request
(
&
request
)
?
;
let
mut
builder
=
self
.builder
(
&
request
)
?
;
let
annotations
=
self
.gather_tokens
(
&
request
,
&
mut
builder
,
None
)
?
;
let
common_request
=
builder
.build
()
?
;
// update isl
response_generator
.update_isl
(
common_request
.token_ids
.len
()
as
u32
);
...
...
lib/parsers/src/reasoning/base_parser.rs
View file @
2422b83d
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
tracing
as
log
;
use
crate
::{
ParserResult
,
ReasoningParser
};
...
...
@@ -34,13 +33,8 @@ impl BasicReasoningParser {
impl
ReasoningParser
for
BasicReasoningParser
{
fn
detect_and_parse_reasoning
(
&
mut
self
,
text
:
&
str
,
_
token_ids
:
&
[
u32
])
->
ParserResult
{
log
::
debug!
(
"detect_and_parse_reasoning called with text: {:?}"
,
text
);
let
in_reasoning
=
self
._in_reasoning
||
text
.contains
(
&
self
.think_start_token
);
log
::
debug!
(
"in_reasoning: {}"
,
in_reasoning
);
if
!
in_reasoning
{
log
::
debug!
(
"No reasoning detected, returning normal text."
);
return
ParserResult
{
normal_text
:
text
.to_string
(),
reasoning_text
:
String
::
new
(),
...
...
@@ -49,15 +43,8 @@ impl ReasoningParser for BasicReasoningParser {
// The text is considered to be in a reasoning block.
let
processed_text
=
text
.replace
(
&
self
.think_start_token
,
""
)
.trim
()
.to_string
();
log
::
debug!
(
"Processed text after removing think_start_token: {:?}"
,
processed_text
);
if
!
processed_text
.contains
(
&
self
.think_end_token
)
{
log
::
debug!
(
"Reasoning truncated, think_end_token not found. Returning reasoning text."
);
// Assume reasoning was truncated before `think_end_token`
return
ParserResult
{
normal_text
:
String
::
new
(),
...
...
@@ -73,9 +60,6 @@ impl ReasoningParser for BasicReasoningParser {
.map
(|
s
|
s
.trim
()
.to_string
())
.unwrap_or_default
();
log
::
debug!
(
"Extracted reasoning_text: {:?}"
,
reasoning_text
);
log
::
debug!
(
"Extracted normal_text: {:?}"
,
normal_text
);
ParserResult
{
normal_text
,
reasoning_text
,
...
...
@@ -92,19 +76,6 @@ impl ReasoningParser for BasicReasoningParser {
let
mut
current_text
=
self
._buffer
.to_string
();
// If the current text is a prefix of the think token, keep buffering
log
::
debug!
(
"parse_reasoning_streaming_incremental called with text: {:?}"
,
text
);
log
::
debug!
(
"current buffer: {:?}"
,
self
._buffer
);
log
::
debug!
(
"current_text: {:?}"
,
current_text
);
log
::
debug!
(
"in_reasoning: {}, stripped_think_start: {}, stream_reasoning: {}"
,
self
._in_reasoning
,
self
.stripped_think_start
,
self
.stream_reasoning
);
if
self
.think_start_token
.starts_with
(
&
current_text
)
&&
self
.think_start_token
.as_str
()
!=
current_text
.as_str
()
{
...
...
lib/parsers/src/reasoning/mod.rs
View file @
2422b83d
...
...
@@ -144,7 +144,7 @@ impl ReasoningParserType {
}
pub
fn
get_reasoning_parser_from_name
(
name
:
&
str
)
->
ReasoningParserWrapper
{
tracing
::
debug!
(
"Selected reasoning parser
: {}"
,
name
);
tracing
::
debug!
(
parser_name
=
name
,
"Selected reasoning parser
"
);
match
name
.to_lowercase
()
.as_str
()
{
"deepseek_r1"
=>
Self
::
DeepseekR1
.get_reasoning_parser
(),
"basic"
=>
Self
::
Basic
.get_reasoning_parser
(),
...
...
@@ -156,8 +156,8 @@ impl ReasoningParserType {
"mistral"
=>
Self
::
Mistral
.get_reasoning_parser
(),
_
=>
{
tracing
::
warn!
(
"Unknown reasoning parser type '{}', falling back to Basic Reasoning Parser"
,
name
parser_name
=
name
,
"Unknown reasoning parser type, falling back to Basic Reasoning Parser"
,
);
Self
::
Basic
.get_reasoning_parser
()
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment