"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "cd9e5b8340b4c9dc093b7bd6f960fc2fdddf2b98"
Unverified Commit 3bbaacbe authored by Ce Gao's avatar Ce Gao Committed by GitHub
Browse files

[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)


Signed-off-by: default avatarCe Gao <cegao@tensorchord.ai>
parent a10314c6
...@@ -90,6 +90,40 @@ SHORTEST_REASONING_WITH_THINK = { ...@@ -90,6 +90,40 @@ SHORTEST_REASONING_WITH_THINK = {
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
THINK_NO_END = {
"output": "<think>This is a reasoning section",
"reasoning_content": "This is a reasoning section",
"content": None,
"is_reasoning_end": False,
}
EMPTY = {
"output": "",
"reasoning_content": "",
"content": None,
"is_reasoning_end": False,
}
EMPTY_STREAMING = {
"output": "",
"reasoning_content": None,
"content": None,
"is_reasoning_end": False,
}
NEW_LINE = {
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
"reasoning_content": "This is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
# Streaming cannot handle new lines at the beginning of the output
# because we need to support <think>...</think> and </think>...
# We cannot know if the text before <think> is reasoning content
# or not.
NEW_LINE_STREAMING = {
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
"reasoning_content": "\nThis is a reasoning section",
"content": "\nThis is the rest",
"is_reasoning_end": True,
}
TEST_CASES = [ TEST_CASES = [
pytest.param( pytest.param(
...@@ -182,6 +216,36 @@ TEST_CASES = [ ...@@ -182,6 +216,36 @@ TEST_CASES = [
SHORTEST_REASONING_WITH_THINK, SHORTEST_REASONING_WITH_THINK,
id="shortest_with_think_streaming", id="shortest_with_think_streaming",
), ),
pytest.param(
False,
THINK_NO_END,
id="think_no_end",
),
pytest.param(
True,
THINK_NO_END,
id="think_no_end_streaming",
),
pytest.param(
False,
EMPTY,
id="empty",
),
pytest.param(
True,
EMPTY_STREAMING,
id="empty_streaming",
),
pytest.param(
False,
NEW_LINE,
id="new_line",
),
pytest.param(
True,
NEW_LINE_STREAMING,
id="new_line_streaming",
),
] ]
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import re
from collections.abc import Sequence from collections.abc import Sequence
from typing import Optional, Union from typing import Optional, Union
...@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser): ...@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase): def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer) super().__init__(tokenizer)
self.reasoning_regex = re.compile(
rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
if not self.model_tokenizer: if not self.model_tokenizer:
raise ValueError( raise ValueError(
"The model tokenizer must be passed to the ReasoningParser " "The model tokenizer must be passed to the ReasoningParser "
...@@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser): ...@@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def extract_reasoning_content( def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]: ) -> tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content from the model output.
For text <think>abc</think>xyz:
- 'abc' goes to reasoning_content
- 'xyz' goes to content
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""
# Check if the start token is present in the model output, remove it
# if it is present.
model_output_parts = model_output.partition(self.start_token)
model_output = model_output_parts[2] if model_output_parts[
1] else model_output_parts[0]
# DeepSeek R1 doesn't generate <think> now. # DeepSeek R1 doesn't generate <think> now.
# Thus we assume the reasoning content is always at the start. # Thus we assume the reasoning content is always at the start.
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
if self.end_token not in model_output: if self.end_token not in model_output:
return model_output, None return model_output, None
else: else:
# Add a start token if it's missing to keep compatibility. reasoning_content, _, content = model_output.partition(
if self.start_token not in model_output: self.end_token)
model_output = f"{self.start_token}{model_output}" # If the end token is not found, return the model output as is.
# Use a regex to find the reasoning content # It should not happen since we already checked for the presence
reasoning_content = self.reasoning_regex.findall(model_output)[0] # of the end token.
# If generation stops right after end-of-think, return null content
end_index = len( final_content = content or None
f"{self.start_token}{reasoning_content}{self.end_token}") return reasoning_content, final_content
final_output = model_output[end_index:]
if len(final_output) == 0:
return reasoning_content, None
return reasoning_content, final_output
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment