`reasoning_content` -> `reasoning` (#27752)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

`reasoning_content` -> `reasoning` (#27752)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
d9ab1ad9 · Harry Mellor · GitHub · 608bb144 · d9ab1ad9 · d9ab1ad9
Unverified Commit d9ab1ad9 authored Nov 08, 2025 by Harry Mellor Committed by GitHub Nov 08, 2025
20 changed files
--- a/tests/reasoning/test_seedoss_reasoning_parser.py
+++ b/tests/reasoning/test_seedoss_reasoning_parser.py
@@ -28,49 +28,49 @@ def seedoss_tokenizer():
 SIMPLE_REASONING: dict[str, Any] = {
    "output": "This is a reasoning section</seed:think>This is the rest",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 COMPLETE_REASONING: dict[str, Any] = {
    "output": "This is a reasoning section</seed:think>",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": True,
 }
 NO_CONTENT: dict[str, Any] = {
    "output": "This is content",
-    "reasoning_content": "This is content",
+    "reasoning": "This is content",
    "content": None,
    "is_reasoning_end": False,
 }
 NO_REASONING_STREAMING: dict[str, Any] = {
    "output": "This is a reasoning section",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": False,
 }
 MULTIPLE_LINES: dict[str, Any] = {
    "output": "This\nThat</seed:think>This is the rest\nThat",
-    "reasoning_content": "This\nThat",
+    "reasoning": "This\nThat",
    "content": "This is the rest\nThat",
    "is_reasoning_end": True,
 }
 WITH_START_TOKEN: dict[str, Any] = {
    "output": ("<seed:think>This is a reasoning section</seed:think>This is the rest"),
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 ONLY_END_TOKEN: dict[str, Any] = {
    "output": "Some reasoning</seed:think>This is the rest",
-    "reasoning_content": "Some reasoning",
+    "reasoning": "Some reasoning",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 NO_TOKENS: dict[str, Any] = {
    "output": "This is just content without any reasoning tokens",
-    "reasoning_content": "This is just content without any reasoning tokens",
+    "reasoning": "This is just content without any reasoning tokens",
    "content": None,
    "is_reasoning_end": False,
 }
@@ -95,7 +95,7 @@ def test_simple_reasoning(seedoss_tokenizer, streaming):
        parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
    )
-    assert reasoning == SIMPLE_REASONING["reasoning_content"]
+    assert reasoning == SIMPLE_REASONING["reasoning"]
    assert content == SIMPLE_REASONING["content"]
@@ -109,7 +109,7 @@ def test_complete_reasoning(seedoss_tokenizer, streaming):
        parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
    )
-    assert reasoning == COMPLETE_REASONING["reasoning_content"]
+    assert reasoning == COMPLETE_REASONING["reasoning"]
    assert content == COMPLETE_REASONING["content"]
@@ -123,7 +123,7 @@ def test_no_content(seedoss_tokenizer, streaming):
        parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
    )
-    assert reasoning == NO_CONTENT["reasoning_content"]
+    assert reasoning == NO_CONTENT["reasoning"]
    assert content == NO_CONTENT["content"]
@@ -137,7 +137,7 @@ def test_multiple_lines(seedoss_tokenizer, streaming):
        parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
    )
-    assert reasoning == MULTIPLE_LINES["reasoning_content"]
+    assert reasoning == MULTIPLE_LINES["reasoning"]
    assert content == MULTIPLE_LINES["content"]
@@ -151,7 +151,7 @@ def test_with_start_token(seedoss_tokenizer, streaming):
        parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
    )
-    assert reasoning == WITH_START_TOKEN["reasoning_content"]
+    assert reasoning == WITH_START_TOKEN["reasoning"]
    assert content == WITH_START_TOKEN["content"]
@@ -168,7 +168,7 @@ def test_only_end_token(seedoss_tokenizer, streaming):
        parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
    )
-    assert reasoning == ONLY_END_TOKEN["reasoning_content"]
+    assert reasoning == ONLY_END_TOKEN["reasoning"]
    assert content == ONLY_END_TOKEN["content"]
@@ -182,7 +182,7 @@ def test_no_tokens(seedoss_tokenizer, streaming):
        parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
    )
-    assert reasoning == NO_TOKENS["reasoning_content"]
+    assert reasoning == NO_TOKENS["reasoning"]
    assert content == NO_TOKENS["content"]

--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -9,25 +9,28 @@ from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
 class StreamingReasoningReconstructor:
    def __init__(self):
-        self.reasoning_content = None
+        self.reasoning = None
        self.other_content = None
    def append_delta(self, delta: DeltaMessage):
        # content and the reasoning content should not be present
        # at the same time
-        assert delta.content is None or delta.reasoning_content is None, (
+        assert delta.content is None or delta.reasoning is None, (
            "Both content and reasoning content are present in the delta message"
        )
+        assert delta.reasoning == delta.reasoning_content, (
+            "reasoning_content should be present for backwards compatibility"
+        )
        if delta.content is not None:
            if self.other_content is None:
                self.other_content = delta.content
            else:
                self.other_content += delta.content
        else:
-            if self.reasoning_content is None:
+            if self.reasoning is None:
-                self.reasoning_content = delta.reasoning_content
+                self.reasoning = delta.reasoning
            else:
-                self.reasoning_content += delta.reasoning_content
+                self.reasoning += delta.reasoning
 def run_reasoning_extraction(
@@ -43,7 +46,7 @@ def run_reasoning_extraction(
            request,
        )
        return (
-            reconstructor.reasoning_content,
+            reconstructor.reasoning,
            reconstructor.other_content or None,
        )
    else:
@@ -69,7 +72,7 @@ def run_reasoning_extraction_mistral(
            request,
        )
        return (
-            reconstructor.reasoning_content,
+            reconstructor.reasoning,
            reconstructor.other_content or None,
        )
    else:
@@ -88,7 +91,7 @@ def run_reasoning_extraction_nonstreaming(
    request: ChatCompletionRequest | None = None,
 ) -> tuple[str | None, str | None]:
    request = request or ChatCompletionRequest(messages=[], model="test-model")
-    return reasoning_parser.extract_reasoning_content(
+    return reasoning_parser.extract_reasoning(
        model_output="".join(model_output), request=request
    )
@@ -110,7 +113,7 @@ def run_reasoning_extraction_streaming(
        ]
        current_text = previous_text + delta
        current_tokens = previous_tokens + token_delta
-        delta_message = reasoning_parser.extract_reasoning_content_streaming(
+        delta_message = reasoning_parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta,
@@ -142,7 +145,7 @@ def run_reasoning_extraction_streaming_mistral(
        delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
        current_text = previous_text + delta
        current_tokens = previous_tokens + token_delta
-        delta_message = reasoning_parser.extract_reasoning_content_streaming(
+        delta_message = reasoning_parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta,

--- a/tests/tokenization/test_mistral_tokenizer.py
+++ b/tests/tokenization/test_mistral_tokenizer.py
@@ -102,7 +102,7 @@ def test_prepare_apply_chat_template_tools_and_messages(
    assert actual_request == expected_mistral_output
-# Tool use with list content and reasoning_content
+# Tool use with list content and reasoning
 @pytest.mark.parametrize(
    "openai_request,expected_mistral_output",
    [
@@ -115,7 +115,7 @@ def test_prepare_apply_chat_template_tools_and_messages(
                    },
                    {
                        "role": "assistant",
-                        "reasoning_content": None,
+                        "reasoning": None,
                        "content": None,
                        "tool_calls": [
                            {

--- a/tests/tool_use/test_ernie45_moe_tool_parser.py
+++ b/tests/tool_use/test_ernie45_moe_tool_parser.py
@@ -337,7 +337,7 @@ def test_extract_tool_calls_streaming_incremental(
        if (
            delta_message.role is None
            and delta_message.content is None
-            and delta_message.reasoning_content is None
+            and delta_message.reasoning is None
            and len(delta_message.tool_calls) == 0
        ):
            continue

--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -674,10 +674,10 @@ def test_structured_output_with_reasoning_matrices(
    assert output is not None and isinstance(output, RequestOutput)
    prompt = output.prompt
    generated_text = output.outputs[0].text
-    reasoning_content, content = run_reasoning_extraction(reasoner, [generated_text])
+    reasoning, content = run_reasoning_extraction(reasoner, [generated_text])
-    print(f"Prompt: {prompt!r}\nReasoning: {reasoning_content!r}\nContent: {content!r}")
+    print(f"Prompt: {prompt!r}\nReasoning: {reasoning!r}\nContent: {content!r}")
-    assert content is not None and reasoning_content is not None
+    assert content is not None and reasoning is not None
    output_json = json.loads(content)
    jsonschema.validate(instance=output_json, schema=reasoning_schema)

--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -521,15 +521,15 @@ def parse_chat_output(
    is_tool_call = False  # TODO: update this when tool call is supported
    if len(output_msgs) == 0:
        # The generation has stopped during reasoning.
-        reasoning_content = parser.current_content
+        reasoning = parser.current_content
        final_content = None
    elif len(output_msgs) == 1:
        # The generation has stopped during final message.
-        reasoning_content = output_msgs[0].content[0].text
+        reasoning = output_msgs[0].content[0].text
        final_content = parser.current_content
    else:
        reasoning_msg = output_msgs[:-1]
        final_msg = output_msgs[-1]
-        reasoning_content = "\n".join([msg.content[0].text for msg in reasoning_msg])
+        reasoning = "\n".join([msg.content[0].text for msg in reasoning_msg])
        final_content = final_msg.content[0].text
-    return reasoning_content, final_content, is_tool_call
+    return reasoning, final_content, is_tool_call
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -2102,7 +2102,15 @@ class ChatMessage(OpenAIBaseModel):
    tool_calls: list[ToolCall] = Field(default_factory=list)
    # vLLM-specific fields that are not in OpenAI spec
+    reasoning: str | None = None
    reasoning_content: str | None = None
+    """Deprecated: use `reasoning` instead."""
+    @model_validator(mode="after")
+    def handle_deprecated_reasoning_content(self):
+        """Copy reasoning to reasoning_content for backward compatibility."""
+        self.reasoning_content = self.reasoning
+        return self
 class ChatCompletionLogProb(OpenAIBaseModel):
@@ -2156,9 +2164,17 @@ class ChatCompletionResponse(OpenAIBaseModel):
 class DeltaMessage(OpenAIBaseModel):
    role: str | None = None
    content: str | None = None
+    reasoning: str | None = None
    reasoning_content: str | None = None
+    """Deprecated: use `reasoning` instead."""
    tool_calls: list[DeltaToolCall] = Field(default_factory=list)
+    @model_validator(mode="after")
+    def handle_deprecated_reasoning_content(self):
+        """Copy reasoning to reasoning_content for backward compatibility."""
+        self.reasoning_content = self.reasoning
+        return self
 class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
    index: int

--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -759,9 +759,7 @@ class OpenAIServingChat(OpenAIServing):
                            delta_message = DeltaMessage(content=delta_text)
                        elif cur_channel == "analysis":
                            if request.include_reasoning:
-                                delta_message = DeltaMessage(
+                                delta_message = DeltaMessage(reasoning=delta_text)
-                                    reasoning_content=delta_text
-                                )
                            else:
                                delta_message = None
                        elif (
@@ -823,7 +821,7 @@ class OpenAIServingChat(OpenAIServing):
                        ):
                            assert reasoning_parser is not None
                            delta_message = (
-                                reasoning_parser.extract_reasoning_content_streaming(
+                                reasoning_parser.extract_reasoning_streaming(
                                    previous_text,
                                    current_text,
                                    delta_text,
@@ -836,7 +834,7 @@ class OpenAIServingChat(OpenAIServing):
                            # or think end id in prompt_token_ids
                            # i.e {"enable_thinking": False},
                            # set reasoning status to end.
-                            # Only keep 'content', remove 'reasoning_content'.
+                            # Only keep 'content', remove 'reasoning'.
                            if reasoning_parser.is_reasoning_end(
                                as_list(output.token_ids)
                            ) or (
@@ -899,7 +897,7 @@ class OpenAIServingChat(OpenAIServing):
                        if self.reasoning_parser and not reasoning_end_arr[i]:
                            delta_message = (
-                                reasoning_parser.extract_reasoning_content_streaming(
+                                reasoning_parser.extract_reasoning_streaming(
                                    previous_text,
                                    current_text,
                                    delta_text,
@@ -948,7 +946,7 @@ class OpenAIServingChat(OpenAIServing):
                        output_token_ids = as_list(output.token_ids)
                        if not reasoning_end_arr[i]:
                            delta_message = (
-                                reasoning_parser.extract_reasoning_content_streaming(
+                                reasoning_parser.extract_reasoning_streaming(
                                    previous_text,
                                    current_text,
                                    delta_text,
@@ -961,7 +959,7 @@ class OpenAIServingChat(OpenAIServing):
                            # i.e {"enable_thinking": False},
                            # set reasoning status to end.
                            # Remove the text and token ids related
-                            # to 'reasoning_content'.
+                            # to 'reasoning'.
                            if (
                                res.prompt_token_ids
                                and reasoning_parser.is_reasoning_end(
@@ -978,7 +976,7 @@ class OpenAIServingChat(OpenAIServing):
                            # When encountering think end id in delta_token_ids,
                            # set reasoning status to end.
                            # Remove the text and token ids related
-                            # to 'reasoning_content'.
+                            # to 'reasoning'.
                            if reasoning_parser.is_reasoning_end(output_token_ids):
                                reasoning_end_arr[i] = True
                                current_token_ids = (
@@ -1033,15 +1031,13 @@ class OpenAIServingChat(OpenAIServing):
                    # when only reasoning
                    elif self.reasoning_parser:
-                        delta_message = (
+                        delta_message = reasoning_parser.extract_reasoning_streaming(
-                            reasoning_parser.extract_reasoning_content_streaming(
+                            previous_text,
-                                previous_text,
+                            current_text,
-                                current_text,
+                            delta_text,
-                                delta_text,
+                            previous_token_ids,
-                                previous_token_ids,
+                            current_token_ids,
-                                current_token_ids,
+                            output.token_ids,
-                                output.token_ids,
-                            )
                        )
                    # handle streaming just a content delta
                    else:
@@ -1334,9 +1330,9 @@ class OpenAIServingChat(OpenAIServing):
                logprobs = None
            if self.use_harmony:
-                reasoning_content, content, _ = parse_chat_output(token_ids)
+                reasoning, content, _ = parse_chat_output(token_ids)
                if not request.include_reasoning:
-                    reasoning_content = None
+                    reasoning = None
                if self.tool_parser is not None:
                    tool_parser = self.tool_parser(tokenizer)
@@ -1349,14 +1345,14 @@ class OpenAIServingChat(OpenAIServing):
                    content = tool_call_info.content
                    message = ChatMessage(
                        role=role,
-                        reasoning_content=reasoning_content,
+                        reasoning=reasoning,
                        content=content,
                        tool_calls=tool_call_info.tool_calls,
                    )
                else:
                    message = ChatMessage(
                        role=role,
-                        reasoning_content=reasoning_content,
+                        reasoning=reasoning,
                        content=content,
                    )
@@ -1390,13 +1386,13 @@ class OpenAIServingChat(OpenAIServing):
                    return self.create_error_response(str(e))
                # If the reasoning parser is enabled,
                # tool calls are extracted exclusively from the content.
-                reasoning_content, content = reasoning_parser.extract_reasoning_content(
+                reasoning, content = reasoning_parser.extract_reasoning(
                    output.text, request=request
                )
                if not request.include_reasoning:
-                    reasoning_content = None
+                    reasoning = None
            else:
-                reasoning_content = None
+                reasoning = None
                content = output.text
            auto_tools_called = False
@@ -1416,9 +1412,7 @@ class OpenAIServingChat(OpenAIServing):
                not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
                and request.tool_choice != "required"
            ):
-                message = ChatMessage(
+                message = ChatMessage(role=role, reasoning=reasoning, content=content)
-                    role=role, reasoning_content=reasoning_content, content=content
-                )
            # if the request uses tools and specified a tool choice
            elif (
@@ -1428,7 +1422,7 @@ class OpenAIServingChat(OpenAIServing):
                assert tool_calls is not None and len(tool_calls) > 0
                message = ChatMessage(
                    role=role,
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning,
                    content="",
                    tool_calls=[tool_call_class(function=tc) for tc in tool_calls],
                )
@@ -1452,15 +1446,13 @@ class OpenAIServingChat(OpenAIServing):
                    role=role,
                    content="",
                    tool_calls=tool_call_class_items,
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning,
                )
            # if the request doesn't use tool choice
            # OR specifies to not use a tool
            elif not request.tool_choice or request.tool_choice == "none":
-                message = ChatMessage(
+                message = ChatMessage(role=role, reasoning=reasoning, content=content)
-                    role=role, reasoning_content=reasoning_content, content=content
-                )
            # handle when there are tools and tool choice is auto
            elif (
@@ -1476,7 +1468,7 @@ class OpenAIServingChat(OpenAIServing):
                if tool_calls:
                    message = ChatMessage(
                        role=role,
-                        reasoning_content=reasoning_content,
+                        reasoning=reasoning,
                        content=content,
                        tool_calls=[
                            ToolCall(
@@ -1498,7 +1490,7 @@ class OpenAIServingChat(OpenAIServing):
                        ret_content = content
                    message = ChatMessage(
                        role=role,
-                        reasoning_content=reasoning_content,
+                        reasoning=reasoning,
                        content=ret_content,
                    )
@@ -1509,9 +1501,7 @@ class OpenAIServingChat(OpenAIServing):
                    " if tools should be extracted. Returning a standard chat "
                    "completion."
                )
-                message = ChatMessage(
+                message = ChatMessage(role=role, reasoning=reasoning, content=content)
-                    role=role, reasoning_content=reasoning_content, content=content
-                )
            # In OpenAI's API, when a tool is called, the finish_reason is:
            # "tool_calls" for "auto" or "required" tool calls,
            # and "stop" for named tool calls.

--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -778,11 +778,11 @@ class OpenAIServingResponses(OpenAIServing):
                logger.exception("Error in reasoning parser creation.")
                raise e
-            reasoning_content, content = reasoning_parser.extract_reasoning_content(
+            reasoning, content = reasoning_parser.extract_reasoning(
                final_output.text, request=request
            )
        else:
-            reasoning_content = None
+            reasoning = None
            content = final_output.text
        # Log complete response if output logging is enabled
@@ -790,8 +790,8 @@ class OpenAIServingResponses(OpenAIServing):
            output_text = ""
            if content:
                output_text = content
-            elif reasoning_content:
+            elif reasoning:
-                output_text = f"[reasoning: {reasoning_content}]"
+                output_text = f"[reasoning: {reasoning}]"
            if output_text:
                self.request_logger.log_outputs(
@@ -805,15 +805,13 @@ class OpenAIServingResponses(OpenAIServing):
        reasoning_item = None
        message_item = None
-        if reasoning_content:
+        if reasoning:
            reasoning_item = ResponseReasoningItem(
                id=f"rs_{random_uuid()}",
                summary=[],
                type="reasoning",
                content=[
-                    ResponseReasoningTextContent(
+                    ResponseReasoningTextContent(text=reasoning, type="reasoning_text")
-                        text=reasoning_content, type="reasoning_text"
-                    )
                ],
                status=None,  # NOTE: Only the last output item has status.
            )
@@ -1208,15 +1206,13 @@ class OpenAIServingResponses(OpenAIServing):
            if ctx.last_output.outputs:
                output = ctx.last_output.outputs[0]
                if reasoning_parser:
-                    delta_message = (
+                    delta_message = reasoning_parser.extract_reasoning_streaming(
-                        reasoning_parser.extract_reasoning_content_streaming(
+                        previous_text=previous_text,
-                            previous_text=previous_text,
+                        current_text=previous_text + output.text,
-                            current_text=previous_text + output.text,
+                        delta_text=output.text,
-                            delta_text=output.text,
+                        previous_token_ids=previous_token_ids,
-                            previous_token_ids=previous_token_ids,
+                        current_token_ids=previous_token_ids + output.token_ids,
-                            current_token_ids=previous_token_ids + output.token_ids,
+                        delta_token_ids=output.token_ids,
-                            delta_token_ids=output.token_ids,
-                        )
                    )
                else:
                    delta_message = DeltaMessage(
@@ -1228,7 +1224,7 @@ class OpenAIServingResponses(OpenAIServing):
                    continue
                if not first_delta_sent:
                    current_item_id = str(uuid.uuid4())
-                    if delta_message.reasoning_content:
+                    if delta_message.reasoning:
                        yield _increment_sequence_number_and_return(
                            ResponseOutputItemAddedEvent(
                                type="response.output_item.added",
@@ -1280,15 +1276,15 @@ class OpenAIServingResponses(OpenAIServing):
                # same as content or reasoning content
                if (
                    previous_delta_messages
-                    and previous_delta_messages[-1].reasoning_content is not None
+                    and previous_delta_messages[-1].reasoning is not None
                    and delta_message.content is not None
                ):
                    # from reasoning to normal content, send done
                    # event for reasoning
                    reason_content = "".join(
-                        pm.reasoning_content
+                        pm.reasoning
                        for pm in previous_delta_messages
-                        if pm.reasoning_content is not None
+                        if pm.reasoning is not None
                    )
                    yield _increment_sequence_number_and_return(
                        ResponseReasoningTextDoneEvent(
@@ -1356,7 +1352,7 @@ class OpenAIServingResponses(OpenAIServing):
                    # reset previous delta messages
                    previous_delta_messages = []
-                if delta_message.reasoning_content is not None:
+                if delta_message.reasoning is not None:
                    yield _increment_sequence_number_and_return(
                        ResponseReasoningTextDeltaEvent(
                            type="response.reasoning_text.delta",
@@ -1364,7 +1360,7 @@ class OpenAIServingResponses(OpenAIServing):
                            content_index=current_content_index,
                            output_index=current_output_index,
                            item_id=current_item_id,
-                            delta=delta_message.reasoning_content,
+                            delta=delta_message.reasoning,
                        )
                    )
                elif delta_message.content is not None:
@@ -1392,11 +1388,11 @@ class OpenAIServingResponses(OpenAIServing):
                previous_delta_messages.append(delta_message)
        if previous_delta_messages:
-            if previous_delta_messages[-1].reasoning_content is not None:
+            if previous_delta_messages[-1].reasoning is not None:
                reason_content = "".join(
-                    pm.reasoning_content
+                    pm.reasoning
                    for pm in previous_delta_messages
-                    if pm.reasoning_content is not None
+                    if pm.reasoning is not None
                )
                yield _increment_sequence_number_and_return(
                    ResponseReasoningTextDoneEvent(

--- a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py
@@ -279,7 +279,7 @@ class StreamingXMLToolCallParser:
                    final_delta = DeltaMessage(
                        role=None,
                        content=None,
-                        reasoning_content=None,
+                        reasoning=None,
                        tool_calls=[
                            DeltaToolCall(
                                index=self.tool_call_index - 1,

--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -76,7 +76,7 @@ class ReasoningParser:
        """
    @abstractmethod
-    def extract_reasoning_content(
+    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
@@ -100,7 +100,7 @@ class ReasoningParser:
        """
    @abstractmethod
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,

--- a/vllm/reasoning/basic_parsers.py
+++ b/vllm/reasoning/basic_parsers.py
@@ -76,7 +76,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
        else:
            return input_ids[input_ids.index(self.end_token_id) + 1 :]
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -103,11 +103,10 @@ class BaseThinkingReasoningParser(ReasoningParser):
                # start token in previous, end token in delta,
                # extract reasoning content
                end_index = delta_text.find(self.end_token)
-                reasoning_content = delta_text[:end_index]
+                reasoning = delta_text[:end_index]
                content = delta_text[end_index + len(self.end_token) :]
                return DeltaMessage(
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning, content=content if content else None
-                    content=content if content else None,
                )
            elif self.end_token_id in previous_token_ids:
                # start token in previous, end token in previous,
@@ -116,30 +115,27 @@ class BaseThinkingReasoningParser(ReasoningParser):
            else:
                # start token in previous, no end token in previous or delta,
                # reasoning content continues
-                return DeltaMessage(reasoning_content=delta_text)
+                return DeltaMessage(reasoning=delta_text)
        elif self.start_token_id in delta_token_ids:
            if self.end_token_id in delta_token_ids:
                # start token in delta, end token in delta,
                # extract reasoning content
                start_index = delta_text.find(self.start_token)
                end_index = delta_text.find(self.end_token)
-                reasoning_content = delta_text[
+                reasoning = delta_text[start_index + len(self.start_token) : end_index]
-                    start_index + len(self.start_token) : end_index
-                ]
                content = delta_text[end_index + len(self.end_token) :]
                return DeltaMessage(
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning, content=content if content else None
-                    content=content if content else None,
                )
            else:
                # start token in delta, no end token in delta,
                # reasoning content continues
-                return DeltaMessage(reasoning_content=delta_text)
+                return DeltaMessage(reasoning=delta_text)
        else:
            # not find thinking start token
            return DeltaMessage(content=delta_text)
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
        """
@@ -160,7 +156,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
        if self.end_token not in model_output:
            return model_output, None
        else:
-            reasoning_content, _, content = model_output.partition(self.end_token)
+            reasoning, _, content = model_output.partition(self.end_token)
            # If generation stops right after end-of-think, return null content
            final_content = content or None
-            return reasoning_content, final_content
+            return reasoning, final_content
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -25,7 +25,7 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
        """The token that ends reasoning content."""
        return "</think>"
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -34,7 +34,7 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
-        ret = super().extract_reasoning_content_streaming(
+        ret = super().extract_reasoning_streaming(
            previous_text,
            current_text,
            delta_text,
@@ -51,10 +51,10 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
                # end token in delta with more tokens,
                # extract reasoning content and content
                end_index = delta_text.find(self.end_token)
-                reasoning_content = delta_text[:end_index]
+                reasoning = delta_text[:end_index]
                content = delta_text[end_index + len(self.end_token) :]
                return DeltaMessage(
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning,
                    content=content if content else None,
                )
            elif self.end_token_id in previous_token_ids:
@@ -62,6 +62,6 @@ class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
                return DeltaMessage(content=delta_text)
            else:
                # no end token in previous or delta, reasoning content continues
-                return DeltaMessage(reasoning_content=delta_text)
+                return DeltaMessage(reasoning=delta_text)
        return ret
--- a/vllm/reasoning/deepseek_v3_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_v3_reasoning_parser.py
@@ -38,12 +38,12 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        return self._parser.extract_content_ids(input_ids)
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
-        return self._parser.extract_reasoning_content(model_output, request)
+        return self._parser.extract_reasoning(model_output, request)
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -52,7 +52,7 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
-        return self._parser.extract_reasoning_content_streaming(
+        return self._parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta_text,

--- a/vllm/reasoning/ernie45_reasoning_parser.py
+++ b/vllm/reasoning/ernie45_reasoning_parser.py
@@ -57,7 +57,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
                "tokens in the tokenizer!"
            )
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -73,7 +73,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
        The Ernie45 thinking model ouput format is
            abc\n</think>\n\n<response>\ndef\n</response>\n
        or  abc\n</think>\ndef
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'def' goes to content
        """
        # Skip single special tokens
@@ -94,7 +94,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
            # </think> in delta with more tokens,
            # extract reasoning content and content
            think_end_index = delta_text.find(self.end_token)
-            reasoning_content = delta_text[:think_end_index]
+            reasoning = delta_text[:think_end_index]
            content = delta_text[think_end_index + len(self.end_token) :]
            content = content.lstrip("\n")
            response_start_idx = content.find(self.response_start_token)
@@ -104,7 +104,7 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
            if response_end_idx != -1:
                content = content[:response_end_idx]
            return DeltaMessage(
-                reasoning_content=reasoning_content,
+                reasoning=reasoning,
                content=content if content else None,
            )
        elif self.end_token_id in previous_token_ids:
@@ -138,9 +138,9 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
            return DeltaMessage(content=content if content else None)
        else:
            # no </think> in previous or delta, reasoning content continues
-            return DeltaMessage(reasoning_content=delta_text)
+            return DeltaMessage(reasoning=delta_text)
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
        """
@@ -148,14 +148,12 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
        The Ernie45 thinking model ouput format is
            abc\n</think>\n\n\n<response>\ndef\n</response>\n
        or  abc\n</think>\ndef
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'def' goes to content
        Returns:
            tuple[Optional[str], Optional[str]]: reasoning content and content
        """
-        reasoning_content, content = super().extract_reasoning_content(
+        reasoning, content = super().extract_reasoning(model_output, request)
-            model_output, request
-        )
        if content:
            start_idx = content.find(self.response_start_token)
            end_idx = content.rfind(self.response_end_token)
@@ -164,4 +162,4 @@ class Ernie45ReasoningParser(BaseThinkingReasoningParser):
                content = content[start_idx + len(self.response_start_token) : end_idx]
        final_content = content or None
-        return reasoning_content, final_content
+        return reasoning, final_content
--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -70,7 +70,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
        else:
            return input_ids[input_ids.index(self.think_end_token_id) + 1 :]
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -84,7 +84,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
        Handles streaming output where previous + delta = current.
        Uses token IDs for faster processing.
        For text <think>abc</think>xyz:
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'xyz' goes to content
        """
        # Skip single special tokens
@@ -98,10 +98,10 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
                # <think> in previous, </think> in delta,
                # extract reasoning content
                end_index = delta_text.find(self.think_end_token)
-                reasoning_content = delta_text[:end_index]
+                reasoning = delta_text[:end_index]
                content = delta_text[end_index + len(self.think_end_token) :]
                return DeltaMessage(
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning,
                    content=content if content else None,
                )
            elif self.think_end_token_id in previous_token_ids:
@@ -111,36 +111,36 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
            else:
                # <think> in previous, no </think> in previous or delta,
                # reasoning content continues
-                return DeltaMessage(reasoning_content=delta_text)
+                return DeltaMessage(reasoning=delta_text)
        elif self.think_start_token_id in delta_token_ids:
            if self.think_end_token_id in delta_token_ids:
                # <think> in delta, </think> in delta, extract reasoning content
                start_index = delta_text.find(self.think_start_token)
                end_index = delta_text.find(self.think_end_token)
-                reasoning_content = delta_text[
+                reasoning = delta_text[
                    start_index + len(self.think_start_token) : end_index
                ]
                content = delta_text[end_index + len(self.think_end_token) :]
                return DeltaMessage(
-                    reasoning_content=reasoning_content,
+                    reasoning=reasoning,
                    content=content if content else None,
                )
            else:
                # <think> in delta, no </think> in delta,
                # reasoning content continues
-                return DeltaMessage(reasoning_content=delta_text)
+                return DeltaMessage(reasoning=delta_text)
        else:
            # thinking is disabled, just content
            return DeltaMessage(content=delta_text)
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
        """
        Extract reasoning content from the model output.
        For text <think>abc</think>xyz:
-        - 'abc' goes to reasoning_content
+        - 'abc' goes to reasoning
        - 'xyz' goes to content
        Returns:
@@ -165,7 +165,7 @@ class Glm4MoeModelReasoningParser(ReasoningParser):
            return None, model_output
        # Extract reasoning content from the model output.
-        reasoning_content, _, content = model_output.partition(self.think_end_token)
+        reasoning, _, content = model_output.partition(self.think_end_token)
        final_content = content or None
-        return reasoning_content, final_content
+        return reasoning, final_content
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -104,7 +104,7 @@ class GptOssReasoningParser(ReasoningParser):
            return []
        return self.model_tokenizer.encode(content)
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -131,9 +131,9 @@ class GptOssReasoningParser(ReasoningParser):
                content_delta = cur_content
        if reasoning_delta is None and content_delta is None:
            return None
-        return DeltaMessage(reasoning_content=reasoning_delta, content=content_delta)
+        return DeltaMessage(reasoning=reasoning_delta, content=content_delta)
-    def extract_reasoning_content(
+    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest,

--- a/vllm/reasoning/granite_reasoning_parser.py
+++ b/vllm/reasoning/granite_reasoning_parser.py
@@ -49,7 +49,7 @@ class GraniteReasoningParser(ReasoningParser):
            len(think_start) for think_start in self.valid_think_starts
        )
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
        """Extract the reasoning content & content sections, respectively.
@@ -67,12 +67,12 @@ class GraniteReasoningParser(ReasoningParser):
        re_match = self.reasoning_regex.findall(model_output)
        if not re_match:
            return None, model_output
-        reasoning_content, response_content = re_match[0]
+        reasoning, response_content = re_match[0]
        if not response_content:
-            return reasoning_content, None
+            return reasoning, None
-        return reasoning_content, response_content
+        return reasoning, response_content
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -107,12 +107,10 @@ class GraniteReasoningParser(ReasoningParser):
            Union[DeltaMessage, None]
                DeltaMessage with either reasoning content or content, or None.
        """
-        reasoning_content, resp_seq_len, content = self._get_content_sections(
+        reasoning, resp_seq_len, content = self._get_content_sections(current_text)
-            current_text
-        )
        # Either we haven't finished the start of the reasoning sequence,
        # or the model is generating something unexpected.
-        if not reasoning_content:
+        if not reasoning:
            delta_message = self._get_delta_message_with_no_reasoning_bounds(
                current_text, delta_text
            )
@@ -120,16 +118,16 @@ class GraniteReasoningParser(ReasoningParser):
        # the start of response sequence.
        elif not content:
            delta_message = self._get_delta_message_with_no_response_bounds(
-                current_text, reasoning_content, delta_text
+                current_text, reasoning, delta_text
            )
        # We've finished both the start of reasoning and start of response seq.
        else:
            # This should never happen since we matched on the response
            assert resp_seq_len is not None
            delta_message = self._get_delta_message_with_both_bounds(
-                delta_text, reasoning_content, content, current_text, resp_seq_len
+                delta_text, reasoning, content, current_text, resp_seq_len
            )
-        if not delta_message.content and not delta_message.reasoning_content:
+        if not delta_message.content and not delta_message.reasoning:
            return None
        return delta_message
@@ -185,20 +183,20 @@ class GraniteReasoningParser(ReasoningParser):
        # message and append everything to content in the future.
        if was_substr and not is_substr:
            return DeltaMessage(
-                reasoning_content=None,
+                reasoning=None,
                content=current_text,
            )
        if is_substr:
            # Might still be in the special token sequence; return nothing
-            return DeltaMessage(reasoning_content=None, content=None)
+            return DeltaMessage(reasoning=None, content=None)
        # Otherwise the sequence has already been broken and we already
        # corrected; just return the delta text as normal content.
-        return DeltaMessage(reasoning_content=None, content=delta_text)
+        return DeltaMessage(reasoning=None, content=delta_text)
    def _get_delta_message_with_no_response_bounds(
        self,
        current_text: str,
-        reasoning_content: str,
+        reasoning: str,
        delta_text: str,
    ) -> DeltaMessage:
        """Parse the delta message when the current text has both reasoning
@@ -208,7 +206,7 @@ class GraniteReasoningParser(ReasoningParser):
        Args:
            current_text (str): The full previous + delta text.
-            reasoning_content (str): reasoning content from current_text.
+            reasoning (str): reasoning content from current_text.
            delta_text (str): Text to consider and parse content from.
        Returns:
@@ -222,12 +220,12 @@ class GraniteReasoningParser(ReasoningParser):
            current_text.endswith(response_start)
            for response_start in self.valid_response_starts
        )
-        if reasoning_content is None or ends_with_start_response_seq:
+        if reasoning is None or ends_with_start_response_seq:
-            return DeltaMessage(reasoning_content=None, content=None)
+            return DeltaMessage(reasoning=None, content=None)
        # Consider previous / current text only within context of the reasoning
-        previous_text = reasoning_content[: -len(delta_text)]
+        previous_text = reasoning[: -len(delta_text)]
-        current_text = reasoning_content
+        current_text = reasoning
        # We need to be careful about adding unfinished response sequences;
        # Find the place at which we MIGHT be starting a response sequence
@@ -253,32 +251,30 @@ class GraniteReasoningParser(ReasoningParser):
        # Delta only contains potential continued response sequence text.
        if delta_continues_substr:
-            return DeltaMessage(reasoning_content=None, content=None)
+            return DeltaMessage(reasoning=None, content=None)
        if not prev_was_substr:
            # Delta may be starting a new response seq but has other text too.
            if delta_new_substr:
-                return DeltaMessage(
+                return DeltaMessage(reasoning=delta_text[:delta_idx], content=None)
-                    reasoning_content=delta_text[:delta_idx], content=None
-                )
            # Normal case for most reasoning text (no potential special seqs).
-            return DeltaMessage(reasoning_content=delta_text, content=None)
+            return DeltaMessage(reasoning=delta_text, content=None)
        # The substring that previously seemed to be a potential response
        # seq wasn't one; we need to add the content to the delta message,
        # and also slice off the potential response sequence
        elif delta_new_substr:
-            reasoning_content = previous_text[prev_idx:] + delta_text[:delta_idx]
+            reasoning = previous_text[prev_idx:] + delta_text[:delta_idx]
-            return DeltaMessage(reasoning_content=reasoning_content, content=None)
+            return DeltaMessage(reasoning=reasoning, content=None)
        # No new substring yet, and we broke our old one; take the whole delta
        return DeltaMessage(
-            reasoning_content=previous_text[prev_idx:] + delta_text,
+            reasoning=previous_text[prev_idx:] + delta_text,
            content=None,
        )
    def _get_delta_message_with_both_bounds(
        self,
        delta_text: str,
-        reasoning_content: str,
+        reasoning: str,
        response_content: str,
        current_text: str,
        response_seq_len: int,
@@ -288,7 +284,7 @@ class GraniteReasoningParser(ReasoningParser):
        Args:
            delta_text: Text to consider and parse content from.
-            reasoning_content: reasoning content from current_text.
+            reasoning: reasoning content from current_text.
            response_content: response content from current_text.
            current_text: The full previous + delta text.
            response_seq_len: Len of the complete response sequence used.
@@ -301,20 +297,20 @@ class GraniteReasoningParser(ReasoningParser):
        reasoning_end_idx = len(delta_text) - (len(response_content) + response_seq_len)
        if reasoning_end_idx < 0:
-            delta_reasoning_content = None
+            delta_reasoning = None
        else:
            # Get the starting offset
-            start_reasoning_content_idx = (
+            start_reasoning_idx = (
-                len(reasoning_content) + response_seq_len + len(response_content) - 1
+                len(reasoning) + response_seq_len + len(response_content) - 1
            )
            delta_offset = len(current_text) - len(delta_text)
-            start_offset = start_reasoning_content_idx - delta_offset
+            start_offset = start_reasoning_idx - delta_offset
            if start_offset < 0:
                start_offset = 0
-            delta_reasoning_content = delta_text[start_offset:reasoning_end_idx]
+            delta_reasoning = delta_text[start_offset:reasoning_end_idx]
        return DeltaMessage(
-            reasoning_content=delta_reasoning_content,
+            reasoning=delta_reasoning,
            content=delta_content,
        )
@@ -333,7 +329,7 @@ class GraniteReasoningParser(ReasoningParser):
            (if there is one) and the non-reasoning content.
        """
        current_chunk_start = 0
-        start_reasoning_content = None
+        start_reasoning = None
        parsed_content = False
        delimiter_idxs = [
            idx
@@ -344,10 +340,10 @@ class GraniteReasoningParser(ReasoningParser):
        for current_chunk_end in delimiter_idxs:
            current_chunk = current_text[current_chunk_start:current_chunk_end]
            # Check to see if the start of reasoning seq if complete
-            if start_reasoning_content is None:
+            if start_reasoning is None:
                for think_start in self.valid_think_starts:
                    if current_chunk == think_start[:-1]:
-                        start_reasoning_content = current_chunk_end + 1
+                        start_reasoning = current_chunk_end + 1
                        current_chunk_start = current_chunk_end + 1
                        break
@@ -357,13 +353,11 @@ class GraniteReasoningParser(ReasoningParser):
                    if current_chunk[-len(response_start) + 1 :] == response_start[:-1]:
                        # Mark end of reasoning and start response content
                        # after the start of response sequence.
-                        end_reasoning_content = current_chunk_end - len(response_start)
+                        end_reasoning = current_chunk_end - len(response_start)
-                        reasoning_content = current_text[
+                        reasoning = current_text[start_reasoning:end_reasoning]
-                            start_reasoning_content:end_reasoning_content
-                        ]
                        response_content = current_text[current_chunk_end + 1 :]
-                        return reasoning_content, len(response_start), response_content
+                        return reasoning, len(response_start), response_content
-        if start_reasoning_content and not parsed_content:
+        if start_reasoning and not parsed_content:
-            return current_text[start_reasoning_content:], None, None
+            return current_text[start_reasoning:], None, None
        return None, None, None
--- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
+++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
@@ -86,7 +86,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
        # this id is not part of content, so just return [] here.
        return []
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
        """Extract the reasoning content & content sections, respectively.
@@ -104,27 +104,27 @@ class HunyuanA13BReasoningParser(ReasoningParser):
        re_match = self.full_match_reasoning_regex.findall(model_output)
        if re_match:
-            reasoning_content, response_content = re_match[0]
+            reasoning, response_content = re_match[0]
-            if len(reasoning_content) == 0:
+            if len(reasoning) == 0:
-                reasoning_content = None
+                reasoning = None
            if len(response_content) == 0:
                response_content = None
-            return reasoning_content, response_content
+            return reasoning, response_content
        fallback_regex = self.half_match_reasoning_regex
        fallback_match = fallback_regex.findall(model_output)
        if fallback_match:
-            reasoning_content, response_content = fallback_match[0]
+            reasoning, response_content = fallback_match[0]
            if response_content.endswith(self.response_end_expr):
                response_content = response_content[: -len(self.response_end_expr)]
-            if len(reasoning_content) == 0:
+            if len(reasoning) == 0:
-                reasoning_content = None
+                reasoning = None
            if len(response_content) == 0:
                response_content = None
-            return reasoning_content, response_content
+            return reasoning, response_content
        return None, model_output
@@ -140,7 +140,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
                sub_idx += 1
        return sub_idx == len(subsequence)
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -223,19 +223,15 @@ class HunyuanA13BReasoningParser(ReasoningParser):
                # Return content based on current state
                if self.current_state == "think":
-                    return DeltaMessage(
+                    return DeltaMessage(reasoning=buffered_content, content=None)
-                        reasoning_content=buffered_content, content=None
-                    )
                else:
-                    return DeltaMessage(
+                    return DeltaMessage(reasoning=None, content=buffered_content)
-                        reasoning_content=None, content=buffered_content
-                    )
            else:
                # No buffered content, send normally
                if self.current_state == "think":
-                    return DeltaMessage(reasoning_content=delta_text, content=None)
+                    return DeltaMessage(reasoning=delta_text, content=None)
                else:
-                    return DeltaMessage(reasoning_content=None, content=delta_text)
+                    return DeltaMessage(reasoning=None, content=delta_text)
        # If no content to send in this delta
        return None
--- a/vllm/reasoning/identity_reasoning_parser.py
+++ b/vllm/reasoning/identity_reasoning_parser.py
@@ -36,7 +36,7 @@ class IdentityReasoningParser(ReasoningParser):
        # Identity: return all tokens as content
        return input_ids
-    def extract_reasoning_content_streaming(
+    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
@@ -50,9 +50,9 @@ class IdentityReasoningParser(ReasoningParser):
            return DeltaMessage(content=delta_text)
        return None
-    def extract_reasoning_content(
+    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
-        # No reasoning separation: return None for reasoning_content,
+        # No reasoning separation: return None for reasoning,
        # and full model_output as content
        return None, model_output