Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
745d7e4c
Unverified
Commit
745d7e4c
authored
Mar 12, 2026
by
brluo
Committed by
GitHub
Mar 12, 2026
Browse files
fix: respect per-request chat_template_kwargs for DeepSeek V3.2 thinking mode (#7286)
Co-authored-by:
kangclzjc
<
kangz@nvidia.com
>
parent
387100c8
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
338 additions
and
2 deletions
+338
-2
lib/llm/src/preprocessor.rs
lib/llm/src/preprocessor.rs
+61
-1
lib/llm/src/preprocessor/prompt/deepseek_v32.rs
lib/llm/src/preprocessor/prompt/deepseek_v32.rs
+277
-1
No files found.
lib/llm/src/preprocessor.rs
View file @
745d7e4c
...
...
@@ -1090,6 +1090,8 @@ impl OpenAIPreprocessor {
/// For kimi_k25: disabled when chat_template_args contains "thinking": false.
/// For nemotron_nano: disabled when chat_template_args contains "enable_thinking": false
/// or "force_nonempty_content": true.
/// For deepseek_r1: disabled when chat_template_args contains "thinking": false
/// or "thinking_mode": "chat".
fn
is_reasoning_disabled_by_request
(
reasoning_parser
:
Option
<&
str
>
,
chat_template_args
:
Option
<&
std
::
collections
::
HashMap
<
String
,
serde_json
::
Value
>>
,
...
...
@@ -1118,6 +1120,17 @@ impl OpenAIPreprocessor {
}
false
}
Some
(
"deepseek_r1"
)
=>
{
if
let
Some
(
args
)
=
chat_template_args
{
if
let
Some
(
thinking
)
=
args
.get
(
"thinking"
)
{
return
thinking
==
&
serde_json
::
Value
::
Bool
(
false
);
}
if
let
Some
(
mode
)
=
args
.get
(
"thinking_mode"
)
.and_then
(|
v
|
v
.as_str
())
{
return
mode
==
"chat"
;
}
}
false
}
_
=>
false
,
}
}
...
...
@@ -1500,6 +1513,22 @@ mod tests {
);
m
};
let
thinking_mode_chat
=
{
let
mut
m
=
std
::
collections
::
HashMap
::
new
();
m
.insert
(
"thinking_mode"
.to_string
(),
serde_json
::
Value
::
String
(
"chat"
.to_string
()),
);
m
};
let
thinking_mode_thinking
=
{
let
mut
m
=
std
::
collections
::
HashMap
::
new
();
m
.insert
(
"thinking_mode"
.to_string
(),
serde_json
::
Value
::
String
(
"thinking"
.to_string
()),
);
m
};
let
empty_args
=
std
::
collections
::
HashMap
::
new
();
// (parser, args, expected_disabled, description)
...
...
@@ -1528,11 +1557,42 @@ mod tests {
false
,
"kimi_k25 + empty args → enabled"
,
),
// deepseek_r1 uses "thinking" bool or "thinking_mode" string
(
Some
(
"deepseek_r1"
),
Some
(
&
thinking_false
),
true
,
"deepseek_r1 + thinking=false → disabled"
,
),
(
Some
(
"deepseek_r1"
),
Some
(
&
thinking_true
),
false
,
"deepseek_r1 + thinking=true → enabled"
,
),
(
Some
(
"deepseek_r1"
),
Some
(
&
thinking_mode_chat
),
true
,
"deepseek_r1 + thinking_mode=chat → disabled"
,
),
(
Some
(
"deepseek_r1"
),
Some
(
&
thinking_mode_thinking
),
false
,
"deepseek_r1 + thinking_mode=thinking → enabled"
,
),
(
Some
(
"deepseek_r1"
),
None
,
false
,
"deepseek_r1 + no args → enabled"
,
),
(
Some
(
"deepseek_r1"
),
Some
(
&
empty_args
),
false
,
"deepseek_r1
→ never dis
abled"
,
"deepseek_r1
+ empty args → en
abled"
,
),
(
Some
(
"basic"
),
...
...
lib/llm/src/preprocessor/prompt/deepseek_v32.rs
View file @
745d7e4c
...
...
@@ -459,6 +459,33 @@ impl DeepSeekV32Formatter {
pub
fn
new_chat
()
->
Self
{
Self
::
new
(
ThinkingMode
::
Chat
)
}
/// Resolve thinking mode from per-request `chat_template_args`, falling back to the
/// formatter's default. Two conventions are supported:
/// - `{"thinking": bool}` — common across models (e.g. Kimi K25)
/// - `{"thinking_mode": "chat"|"thinking"}` — matches the DSV3.2 Jinja template parameter
fn
resolve_thinking_mode
(
&
self
,
args
:
Option
<&
std
::
collections
::
HashMap
<
String
,
serde_json
::
Value
>>
,
)
->
ThinkingMode
{
if
let
Some
(
args
)
=
args
{
if
let
Some
(
thinking
)
=
args
.get
(
"thinking"
)
.and_then
(|
v
|
v
.as_bool
())
{
return
if
thinking
{
ThinkingMode
::
Thinking
}
else
{
ThinkingMode
::
Chat
};
}
if
let
Some
(
mode
)
=
args
.get
(
"thinking_mode"
)
.and_then
(|
v
|
v
.as_str
())
{
match
mode
{
"chat"
=>
return
ThinkingMode
::
Chat
,
"thinking"
=>
return
ThinkingMode
::
Thinking
,
_
=>
{}
}
}
}
self
.thinking_mode
}
}
impl
super
::
OAIPromptFormatter
for
DeepSeekV32Formatter
{
...
...
@@ -467,6 +494,8 @@ impl super::OAIPromptFormatter for DeepSeekV32Formatter {
}
fn
render
(
&
self
,
req
:
&
dyn
super
::
OAIChatLikeRequest
)
->
Result
<
String
>
{
let
thinking_mode
=
self
.resolve_thinking_mode
(
req
.chat_template_args
());
// Get messages from request
let
messages_value
=
req
.messages
();
...
...
@@ -532,7 +561,7 @@ impl super::OAIPromptFormatter for DeepSeekV32Formatter {
// Encode with native implementation
encode_messages
(
&
messages_array
,
self
.
thinking_mode
,
thinking_mode
,
true
,
// always add BOS token
)
}
...
...
@@ -597,6 +626,7 @@ mod tests {
messages
:
JsonValue
,
tools
:
Option
<
JsonValue
>
,
response_format
:
Option
<
JsonValue
>
,
chat_template_args
:
Option
<
std
::
collections
::
HashMap
<
String
,
JsonValue
>>
,
}
impl
MockRequest
{
...
...
@@ -605,6 +635,7 @@ mod tests {
messages
,
tools
:
None
,
response_format
:
None
,
chat_template_args
:
None
,
}
}
...
...
@@ -617,6 +648,14 @@ mod tests {
self
.response_format
=
Some
(
response_format
);
self
}
fn
with_chat_template_args
(
mut
self
,
args
:
std
::
collections
::
HashMap
<
String
,
JsonValue
>
,
)
->
Self
{
self
.chat_template_args
=
Some
(
args
);
self
}
}
impl
super
::
super
::
OAIChatLikeRequest
for
MockRequest
{
...
...
@@ -643,6 +682,12 @@ mod tests {
fn
should_add_generation_prompt
(
&
self
)
->
bool
{
true
}
fn
chat_template_args
(
&
self
,
)
->
Option
<&
std
::
collections
::
HashMap
<
String
,
serde_json
::
Value
>>
{
self
.chat_template_args
.as_ref
()
}
}
#[test]
...
...
@@ -990,4 +1035,235 @@ mod tests {
"Should not contain Response Format section when not provided"
);
}
// ==================== Thinking Mode Override Tests ====================
#[test]
fn
test_chat_mode_via_thinking_false
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking"
.to_string
(),
json!
(
false
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
// In chat mode, the last user message should be followed by </think> (closing tag)
// rather than <think> (opening tag)
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_END
)),
"Chat mode should end with </think> after Assistant token, got: ...{}"
,
&
result
[
result
.len
()
.saturating_sub
(
80
)
..
],
);
assert
!
(
!
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_START
)),
"Chat mode should NOT end with <think>"
,
);
}
#[test]
fn
test_explicit_thinking_true_via_args
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking"
.to_string
(),
json!
(
true
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_START
)),
"Thinking mode should end with <think> after Assistant token"
,
);
}
#[test]
fn
test_chat_mode_via_thinking_mode_string
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking_mode"
.to_string
(),
json!
(
"chat"
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_END
)),
"thinking_mode='chat' should produce chat mode (ends with </think>)"
,
);
}
#[test]
fn
test_thinking_mode_string_thinking
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking_mode"
.to_string
(),
json!
(
"thinking"
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_START
)),
"thinking_mode='thinking' should produce thinking mode (ends with <think>)"
,
);
}
#[test]
fn
test_default_thinking_mode_without_args
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]));
// No chat_template_args — should default to formatter's thinking mode
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_START
)),
"Default (new_thinking) should produce thinking mode"
,
);
// Verify new_chat() default also works
let
formatter_chat
=
DeepSeekV32Formatter
::
new_chat
();
let
result_chat
=
formatter_chat
.render
(
&
request
)
.unwrap
();
assert
!
(
result_chat
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_END
)),
"Default (new_chat) should produce chat mode"
,
);
}
#[test]
fn
test_thinking_false_overrides_default_thinking
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking"
.to_string
(),
json!
(
false
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
// Formatter defaults to thinking, but request overrides to chat
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_END
)),
"Per-request thinking=false should override new_thinking() default"
,
);
}
#[test]
fn
test_thinking_true_overrides_default_chat
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([(
"thinking"
.to_string
(),
json!
(
true
))]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
// Formatter defaults to chat, but request overrides to thinking
let
formatter
=
DeepSeekV32Formatter
::
new_chat
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_START
)),
"Per-request thinking=true should override new_chat() default"
,
);
}
#[test]
fn
test_thinking_bool_takes_precedence_over_thinking_mode_string
()
{
use
super
::
super
::
OAIPromptFormatter
;
let
args
=
std
::
collections
::
HashMap
::
from
([
(
"thinking"
.to_string
(),
json!
(
false
)),
(
"thinking_mode"
.to_string
(),
json!
(
"thinking"
)),
]);
let
request
=
MockRequest
::
new
(
json!
([
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
}
]))
.with_chat_template_args
(
args
);
let
formatter
=
DeepSeekV32Formatter
::
new_thinking
();
let
result
=
formatter
.render
(
&
request
)
.unwrap
();
// "thinking": false should win over "thinking_mode": "thinking"
assert
!
(
result
.ends_with
(
&
format!
(
"{}{}"
,
tokens
::
ASSISTANT_START
,
tokens
::
THINKING_END
)),
"Boolean 'thinking' key should take precedence over 'thinking_mode' string"
,
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment