Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3bbaacbe
Unverified
Commit
3bbaacbe
authored
Mar 28, 2025
by
Ce Gao
Committed by
GitHub
Mar 28, 2025
Browse files
[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)
Signed-off-by:
Ce Gao
<
cegao@tensorchord.ai
>
parent
a10314c6
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
89 additions
and
18 deletions
+89
-18
tests/reasoning/test_deepseekr1_reasoning_parser.py
tests/reasoning/test_deepseekr1_reasoning_parser.py
+64
-0
vllm/reasoning/deepseek_r1_reasoning_parser.py
vllm/reasoning/deepseek_r1_reasoning_parser.py
+25
-18
No files found.
tests/reasoning/test_deepseekr1_reasoning_parser.py
View file @
3bbaacbe
...
...
@@ -90,6 +90,40 @@ SHORTEST_REASONING_WITH_THINK = {
"content"
:
"This is the rest"
,
"is_reasoning_end"
:
True
,
}
THINK_NO_END
=
{
"output"
:
"<think>This is a reasoning section"
,
"reasoning_content"
:
"This is a reasoning section"
,
"content"
:
None
,
"is_reasoning_end"
:
False
,
}
EMPTY
=
{
"output"
:
""
,
"reasoning_content"
:
""
,
"content"
:
None
,
"is_reasoning_end"
:
False
,
}
EMPTY_STREAMING
=
{
"output"
:
""
,
"reasoning_content"
:
None
,
"content"
:
None
,
"is_reasoning_end"
:
False
,
}
NEW_LINE
=
{
"output"
:
"
\n
<think>This is a reasoning section</think>
\n
This is the rest"
,
"reasoning_content"
:
"This is a reasoning section"
,
"content"
:
"
\n
This is the rest"
,
"is_reasoning_end"
:
True
,
}
# Streaming cannot handle new lines at the beginning of the output
# because we need to support <think>...</think> and </think>...
# We cannot know if the text before <think> is reasoning content
# or not.
NEW_LINE_STREAMING
=
{
"output"
:
"
\n
<think>This is a reasoning section</think>
\n
This is the rest"
,
"reasoning_content"
:
"
\n
This is a reasoning section"
,
"content"
:
"
\n
This is the rest"
,
"is_reasoning_end"
:
True
,
}
TEST_CASES
=
[
pytest
.
param
(
...
...
@@ -182,6 +216,36 @@ TEST_CASES = [
SHORTEST_REASONING_WITH_THINK
,
id
=
"shortest_with_think_streaming"
,
),
pytest
.
param
(
False
,
THINK_NO_END
,
id
=
"think_no_end"
,
),
pytest
.
param
(
True
,
THINK_NO_END
,
id
=
"think_no_end_streaming"
,
),
pytest
.
param
(
False
,
EMPTY
,
id
=
"empty"
,
),
pytest
.
param
(
True
,
EMPTY_STREAMING
,
id
=
"empty_streaming"
,
),
pytest
.
param
(
False
,
NEW_LINE
,
id
=
"new_line"
,
),
pytest
.
param
(
True
,
NEW_LINE_STREAMING
,
id
=
"new_line_streaming"
,
),
]
...
...
vllm/reasoning/deepseek_r1_reasoning_parser.py
View file @
3bbaacbe
# SPDX-License-Identifier: Apache-2.0
import
re
from
collections.abc
import
Sequence
from
typing
import
Optional
,
Union
...
...
@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def
__init__
(
self
,
tokenizer
:
PreTrainedTokenizerBase
):
super
().
__init__
(
tokenizer
)
self
.
reasoning_regex
=
re
.
compile
(
rf
"
{
self
.
start_token
}
(.*?)
{
self
.
end_token
}
"
,
re
.
DOTALL
)
if
not
self
.
model_tokenizer
:
raise
ValueError
(
"The model tokenizer must be passed to the ReasoningParser "
...
...
@@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def
extract_reasoning_content
(
self
,
model_output
:
str
,
request
:
ChatCompletionRequest
)
->
tuple
[
Optional
[
str
],
Optional
[
str
]]:
"""
Extract reasoning content from the model output.
For text <think>abc</think>xyz:
- 'abc' goes to reasoning_content
- 'xyz' goes to content
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""
# Check if the start token is present in the model output, remove it
# if it is present.
model_output_parts
=
model_output
.
partition
(
self
.
start_token
)
model_output
=
model_output_parts
[
2
]
if
model_output_parts
[
1
]
else
model_output_parts
[
0
]
# DeepSeek R1 doesn't generate <think> now.
# Thus we assume the reasoning content is always at the start.
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
if
self
.
end_token
not
in
model_output
:
return
model_output
,
None
else
:
# Add a start token if it's missing to keep compatibility.
if
self
.
start_token
not
in
model_output
:
model_output
=
f
"
{
self
.
start_token
}{
model_output
}
"
# Use a regex to find the reasoning content
reasoning_content
=
self
.
reasoning_regex
.
findall
(
model_output
)[
0
]
end_index
=
len
(
f
"
{
self
.
start_token
}{
reasoning_content
}{
self
.
end_token
}
"
)
final_output
=
model_output
[
end_index
:]
if
len
(
final_output
)
==
0
:
return
reasoning_content
,
None
return
reasoning_content
,
final_output
reasoning_content
,
_
,
content
=
model_output
.
partition
(
self
.
end_token
)
# If the end token is not found, return the model output as is.
# It should not happen since we already checked for the presence
# of the end token.
# If generation stops right after end-of-think, return null content
final_content
=
content
or
None
return
reasoning_content
,
final_content
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment