[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)

Signed-off-by: Ce Gao <cegao@tensorchord.ai>

[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)
Signed-off-by: Ce Gao <cegao@tensorchord.ai>
3bbaacbe · Ce Gao · GitHub · a10314c6 · 3bbaacbe · 3bbaacbe
Unverified Commit 3bbaacbe authored Mar 28, 2025 by Ce Gao Committed by GitHub Mar 28, 2025
Showing with 89 additions and 18 deletions

tests/reasoning/test_deepseekr1_reasoning_parser.py tests/reasoning/test_deepseekr1_reasoning_parser.py +64 -0

vllm/reasoning/deepseek_r1_reasoning_parser.py vllm/reasoning/deepseek_r1_reasoning_parser.py +25 -18

No files found.
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -90,6 +90,40 @@ SHORTEST_REASONING_WITH_THINK = {
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
+THINK_NO_END = {
+    "output": "<think>This is a reasoning section",
+    "reasoning_content": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY = {
+    "output": "",
+    "reasoning_content": "",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning_content": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+NEW_LINE = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "This is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+# Streaming cannot handle new lines at the beginning of the output
+# because we need to support <think>...</think> and </think>...
+# We cannot know if the text before <think> is reasoning content
+# or not.
+NEW_LINE_STREAMING = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "\nThis is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
 TEST_CASES = [
    pytest.param(
@@ -182,6 +216,36 @@ TEST_CASES = [
        SHORTEST_REASONING_WITH_THINK,
        id="shortest_with_think_streaming",
    ),
+    pytest.param(
+        False,
+        THINK_NO_END,
+        id="think_no_end",
+    ),
+    pytest.param(
+        True,
+        THINK_NO_END,
+        id="think_no_end_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        NEW_LINE,
+        id="new_line",
+    ),
+    pytest.param(
+        True,
+        NEW_LINE_STREAMING,
+        id="new_line_streaming",
+    ),
 ]

--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
 # SPDX-License-Identifier: Apache-2.0
-import re
 from collections.abc import Sequence
 from typing import Optional, Union
@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
    def __init__(self, tokenizer: PreTrainedTokenizerBase):
        super().__init__(tokenizer)
-        self.reasoning_regex = re.compile(
-            rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ReasoningParser "
@@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
    def extract_reasoning_content(
            self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[Optional[str], Optional[str]]:
+        """
+        Extract reasoning content from the model output.
+        For text <think>abc</think>xyz:
+        - 'abc' goes to reasoning_content
+        - 'xyz' goes to content
+        Returns:
+            tuple[Optional[str], Optional[str]]: reasoning content and content
+        """
+        # Check if the start token is present in the model output, remove it
+        # if it is present.
+        model_output_parts = model_output.partition(self.start_token)
+        model_output = model_output_parts[2] if model_output_parts[
+            1] else model_output_parts[0]
        # DeepSeek R1 doesn't generate <think> now.
        # Thus we assume the reasoning content is always at the start.
        # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
        if self.end_token not in model_output:
            return model_output, None
        else:
-            # Add a start token if it's missing to keep compatibility.
+            reasoning_content, _, content = model_output.partition(
-            if self.start_token not in model_output:
+                self.end_token)
-                model_output = f"{self.start_token}{model_output}"
+            # If the end token is not found, return the model output as is.
-            # Use a regex to find the reasoning content
+            # It should not happen since we already checked for the presence
-            reasoning_content = self.reasoning_regex.findall(model_output)[0]
+            # of the end token.
+            # If generation stops right after end-of-think, return null content
-            end_index = len(
+            final_content = content or None
-                f"{self.start_token}{reasoning_content}{self.end_token}")
+            return reasoning_content, final_content
-            final_output = model_output[end_index:]
-            if len(final_output) == 0:
-                return reasoning_content, None
-            return reasoning_content, final_output