`reasoning_content` -> `reasoning` (#27752)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

`reasoning_content` -> `reasoning` (#27752)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
d9ab1ad9 · Harry Mellor · GitHub · 608bb144 · d9ab1ad9 · d9ab1ad9
Unverified Commit d9ab1ad9 authored Nov 08, 2025 by Harry Mellor Committed by GitHub Nov 08, 2025
6 changed files
--- a/vllm/reasoning/minimax_m2_reasoning_parser.py
+++ b/vllm/reasoning/minimax_m2_reasoning_parser.py
@@ -48,7 +48,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        return input_ids

-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -61,7 +61,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
            delta_text = "<think>" + delta_text
        return DeltaMessage(content=delta_text)

-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
        return None, "<think>" + model_output
--- a/vllm/reasoning/olmo3_reasoning_parser.py
+++ b/vllm/reasoning/olmo3_reasoning_parser.py
@@ -115,7 +115,7 @@ class Olmo3ReasoningBuffer:
            if end_think_idx > 0:
                # this covers the case there's content before
                # the end of the reasoning block
-                return DeltaMessage(reasoning_content=pretext)
+                return DeltaMessage(reasoning=pretext)

        if self.state == Olmo3ReasoningState.REASONING:
            # we are inside reasoning block, return and empty
@@ -124,7 +124,7 @@ class Olmo3ReasoningBuffer:
                text_buffer,
                self.buffer,
            ) = self.buffer, ""
-            return DeltaMessage(reasoning_content=text_buffer)
+            return DeltaMessage(reasoning=text_buffer)

        if self.state == Olmo3ReasoningState.CONTENT:
            # we are outside reasoning block, return and empty
@@ -250,7 +250,7 @@ class Olmo3ReasoningParser(ReasoningParser):
        # this id is not part of content, so just return [] here.
        return []

-    def extract_reasoning_content(
+    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
@@ -271,14 +271,14 @@ class Olmo3ReasoningParser(ReasoningParser):

        re_match = self.reasoning_regex.match(model_output)
        if re_match:
-            reasoning_content = re_match.group("reasoning") or None
+            reasoning = re_match.group("reasoning") or None
            content = re_match.group("content") or None
-            return reasoning_content, content
+            return reasoning, content

        # no reasoning content
        return None, model_output

-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,

--- a/vllm/reasoning/qwen3_reasoning_parser.py
+++ b/vllm/reasoning/qwen3_reasoning_parser.py
@@ -27,7 +27,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
        """The token that ends reasoning content."""
        return "</think>"

-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
        """
@@ -37,7 +37,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
        to be present, unlike other models that work with just the end token.

        For text <think>abc</think>xyz:
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'xyz' goes to content

        Returns:
@@ -61,7 +61,7 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
            return None, model_output

        # Extract reasoning content from the model output.
-        reasoning_content, _, content = model_output.partition(self.end_token)
+        reasoning, _, content = model_output.partition(self.end_token)

        final_content = content or None
-        return reasoning_content, final_content
+        return reasoning, final_content
--- a/vllm/reasoning/step3_reasoning_parser.py
+++ b/vllm/reasoning/step3_reasoning_parser.py
@@ -40,7 +40,7 @@ class Step3ReasoningParser(ReasoningParser):
                "token in the tokenizer!"
            )

-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -54,7 +54,7 @@ class Step3ReasoningParser(ReasoningParser):
        Handles streaming output where previous + delta = current.
        Uses token IDs for faster processing.
        For text "abc</think>xyz":
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'xyz' goes to content
        """
        # Skip single special token
@@ -64,10 +64,10 @@ class Step3ReasoningParser(ReasoningParser):
        if self.think_end_token_id in delta_token_ids:
            # </think> in delta, extract reasoning content and remaining content
            end_index = delta_text.find(self.think_end_token)
-            reasoning_content = delta_text[:end_index]
+            reasoning = delta_text[:end_index]
            content = delta_text[end_index + len(self.think_end_token) :]
            return DeltaMessage(
-                reasoning_content=reasoning_content,
+                reasoning=reasoning,
                content=content if content else None,
            )
        elif self.think_end_token_id in previous_token_ids:
@@ -75,9 +75,9 @@ class Step3ReasoningParser(ReasoningParser):
            return DeltaMessage(content=delta_text)
        else:
            # No </think> seen yet, everything is reasoning
-            return DeltaMessage(reasoning_content=delta_text)
+            return DeltaMessage(reasoning=delta_text)

-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
        # Check if the model output contains the </think> token
@@ -87,7 +87,7 @@ class Step3ReasoningParser(ReasoningParser):
        else:
            # Find the first occurrence of </think>
            end_index = model_output.find(self.think_end_token)
-            reasoning_content = model_output[:end_index]
+            reasoning = model_output[:end_index]

            # Content after </think> token
            content = model_output[end_index + len(self.think_end_token) :]
@@ -95,7 +95,7 @@ class Step3ReasoningParser(ReasoningParser):
            if len(content) == 0:
                content = None

-            return reasoning_content, content
+            return reasoning, content

    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        return self.think_end_token_id in input_ids

--- a/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja
+++ b/vllm/transformers_utils/chat_templates/template_minicpmv45.jinja
@@ -30,18 +30,18 @@
        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
    {%- elif message.role == "assistant" %}
        {%- set content = message.content %}
-        {%- set reasoning_content = '' %}
-        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
-            {%- set reasoning_content = message.reasoning_content %}
+        {%- set reasoning = '' %}
+        {%- if message.reasoning is defined and message.reasoning is not none %}
+            {%- set reasoning = message.reasoning %}
        {%- else %}
            {%- if '</think>' in message.content %}
                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
-                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set reasoning = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
            {%- endif %}
        {%- endif %}
        {%- if loop.index0 > ns.last_query_index %}
-            {%- if loop.last or (not loop.last and reasoning_content) %}
-                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- if loop.last or (not loop.last and reasoning) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
            {%- else %}
                {{- '<|im_start|>' + message.role + '\n' + content }}
            {%- endif %}

--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -121,8 +121,8 @@ def _prepare_apply_chat_template_tools_and_messages(
    #
    # [1]: https://github.com/mistralai/mistral-common/blob/f4a06998b75ed78bbf5aaf569590b772ea26c9f6/src/mistral_common/protocol/instruct/messages.py#L80
    for message in messages:
-        # Remove reasoning_content as unsupported by Mistral
-        _ = message.pop("reasoning_content", None)  # type: ignore
+        # Remove reasoning as unsupported by Mistral
+        _ = message.pop("reasoning", None)  # type: ignore

    # The Mistral client, in comparison to the OpenAI client, requires the
    # "parameters" dict and the "description" string to be present