Unverified Commit 104605cb authored by Ifta khairul Alam Adil's avatar Ifta khairul Alam Adil Committed by GitHub
Browse files
parent 96266f11
...@@ -5,7 +5,7 @@ vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface. ...@@ -5,7 +5,7 @@ vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.
Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models. Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
!!! warning !!! warning
`reasoning` used to be called `reasoning_content`. For now, `reasoning_content` will continue to work. However, we encourage you to migrate to `reasoning` in case `reasoning_content` is removed in future. `reasoning` used to be called `reasoning_content`. To migrate, directly replace `reasoning_content` with `reasoning`.
## Supported Models ## Supported Models
......
...@@ -484,7 +484,7 @@ class TestGPTOSSSpeculativeChat: ...@@ -484,7 +484,7 @@ class TestGPTOSSSpeculativeChat:
) )
content = "" content = ""
reasoning_content = "" reasoning = ""
async for chunk in stream: async for chunk in stream:
delta = chunk.choices[0].delta delta = chunk.choices[0].delta
if delta.content: if delta.content:
...@@ -492,9 +492,9 @@ class TestGPTOSSSpeculativeChat: ...@@ -492,9 +492,9 @@ class TestGPTOSSSpeculativeChat:
chunk_reasoning = getattr(delta, "reasoning", None) chunk_reasoning = getattr(delta, "reasoning", None)
if chunk_reasoning: if chunk_reasoning:
reasoning_content += delta.reasoning reasoning += delta.reasoning
assert len(reasoning_content) > 0, "No reasoning was generated." assert len(reasoning) > 0, "No reasoning was generated."
assert content.strip() == "4" assert content.strip() == "4"
......
...@@ -21,119 +21,119 @@ def step3p5_tokenizer(): ...@@ -21,119 +21,119 @@ def step3p5_tokenizer():
SIMPLE_REASONING = { SIMPLE_REASONING = {
"output": "This is a reasoning section</think>This is the rest", "output": "This is a reasoning section</think>This is the rest",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
# need to get into parser again to remove newline after </think> # need to get into parser again to remove newline after </think>
COMPLETE_REASONING = { COMPLETE_REASONING = {
"output": "This is a reasoning section</think>", "output": "This is a reasoning section</think>",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
NO_CONTENT = { NO_CONTENT = {
"output": "This is content", "output": "This is content",
"reasoning_content": "This is content", "reasoning": "This is content",
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
NO_REASONING_STREAMING = { NO_REASONING_STREAMING = {
"output": "This is a reasoning section", "output": "This is a reasoning section",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
MULTIPLE_LINES = { MULTIPLE_LINES = {
"output": "This\nThat</think>This is the rest\nThat", "output": "This\nThat</think>This is the rest\nThat",
"reasoning_content": "This\nThat", "reasoning": "This\nThat",
"content": "This is the rest\nThat", "content": "This is the rest\nThat",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
SHORTEST_REASONING_NO_STREAMING = { SHORTEST_REASONING_NO_STREAMING = {
"output": "</think>This is the rest", "output": "</think>This is the rest",
"reasoning_content": None, "reasoning": None,
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
SHORTEST_REASONING = { SHORTEST_REASONING = {
"output": "</think>This is the rest", "output": "</think>This is the rest",
"reasoning_content": None, "reasoning": None,
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
REASONING_WITH_THINK = { REASONING_WITH_THINK = {
"output": "<think>This is a reasoning section</think>This is the rest", "output": "<think>This is a reasoning section</think>This is the rest",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
COMPLETE_REASONING_WITH_THINK = { COMPLETE_REASONING_WITH_THINK = {
"output": "<think>This is a reasoning section</think>", "output": "<think>This is a reasoning section</think>",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
MULTIPLE_LINES_WITH_THINK = { MULTIPLE_LINES_WITH_THINK = {
"output": "<think>This\nThat</think>This is the rest\nThat", "output": "<think>This\nThat</think>This is the rest\nThat",
"reasoning_content": "This\nThat", "reasoning": "This\nThat",
"content": "This is the rest\nThat", "content": "This is the rest\nThat",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
SHORTEST_REASONING_NO_STREAMING_WITH_THINK = { SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
"output": "</think>This is the rest", "output": "</think>This is the rest",
"reasoning_content": None, "reasoning": None,
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
SHORTEST_REASONING_WITH_THINK = { SHORTEST_REASONING_WITH_THINK = {
"output": "</think>This is the rest", "output": "</think>This is the rest",
"reasoning_content": None, "reasoning": None,
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
THINK_NO_END = { THINK_NO_END = {
"output": "<think>This is a reasoning section", "output": "<think>This is a reasoning section",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
EMPTY = { EMPTY = {
"output": "", "output": "",
"reasoning_content": None, "reasoning": None,
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
EMPTY_STREAMING = { EMPTY_STREAMING = {
"output": "", "output": "",
"reasoning_content": None, "reasoning": None,
"content": None, "content": None,
"is_reasoning_end": False, "is_reasoning_end": False,
} }
NEW_LINE = { NEW_LINE = {
"output": "\n<think>This is a reasoning section</think>\nThis is the rest", "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
"reasoning_content": "This is a reasoning section", "reasoning": "This is a reasoning section",
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
NEW_LINE_STREAMING = { NEW_LINE_STREAMING = {
"output": "\n<think>This is a reasoning section\n</think>\nThis is the rest", "output": "\n<think>This is a reasoning section\n</think>\nThis is the rest",
"reasoning_content": "\nThis is a reasoning section", "reasoning": "\nThis is a reasoning section",
"content": "This is the rest", "content": "This is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
NEW_LINE_STREAMING_COMPLEX_CONTENT = { NEW_LINE_STREAMING_COMPLEX_CONTENT = {
"output": "\n This is a \n reasoning section\n\n\n</think>\n\nThis is the rest", "output": "\n This is a \n reasoning section\n\n\n</think>\n\nThis is the rest",
"reasoning_content": "\n This is a \n reasoning section\n\n", "reasoning": "\n This is a \n reasoning section\n\n",
"content": "\nThis is the rest", "content": "\nThis is the rest",
"is_reasoning_end": True, "is_reasoning_end": True,
} }
MULTI_TURN_PROMPT_CONTENT = { MULTI_TURN_PROMPT_CONTENT = {
"output": "<think> This is last turn's reasoning section </think> hello <think>", "output": "<think> This is last turn's reasoning section </think> hello <think>",
"reasoning_content": "", "reasoning": "",
"content": "", "content": "",
"is_reasoning_end": False, "is_reasoning_end": False,
} }
...@@ -296,7 +296,7 @@ def test_reasoning( ...@@ -296,7 +296,7 @@ def test_reasoning(
print(f"content: {content}") print(f"content: {content}")
test_id = request.node.callspec.id if hasattr(request.node, "callspec") else None test_id = request.node.callspec.id if hasattr(request.node, "callspec") else None
if request.node.callspec.id != "multi_turn_prompt_content": if request.node.callspec.id != "multi_turn_prompt_content":
assert reasoning == param_dict["reasoning_content"] assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"] assert content == param_dict["content"]
# Test is_reasoning_end # Test is_reasoning_end
......
...@@ -61,10 +61,10 @@ class ResponsesParser: ...@@ -61,10 +61,10 @@ class ResponsesParser:
# Store the finish_reason from the output # Store the finish_reason from the output
self.finish_reason = output.finish_reason self.finish_reason = output.finish_reason
reasoning_content, content = self.reasoning_parser_instance.extract_reasoning( reasoning, content = self.reasoning_parser_instance.extract_reasoning(
output.text, request=self.request output.text, request=self.request
) )
if reasoning_content: if reasoning:
self.response_messages.append( self.response_messages.append(
ResponseReasoningItem( ResponseReasoningItem(
type="reasoning", type="reasoning",
...@@ -73,7 +73,7 @@ class ResponsesParser: ...@@ -73,7 +73,7 @@ class ResponsesParser:
content=[ content=[
Content( Content(
type="reasoning_text", type="reasoning_text",
text=reasoning_content, text=reasoning,
) )
], ],
) )
......
...@@ -191,13 +191,13 @@ def _construct_single_message_from_response_item( ...@@ -191,13 +191,13 @@ def _construct_single_message_from_response_item(
], ],
) )
elif isinstance(item, ResponseReasoningItem): elif isinstance(item, ResponseReasoningItem):
reasoning_content = "" reasoning = ""
if item.encrypted_content: if item.encrypted_content:
raise ValueError("Encrypted content is not supported.") raise ValueError("Encrypted content is not supported.")
elif item.content and len(item.content) >= 1: elif item.content and len(item.content) >= 1:
reasoning_content = item.content[0].text reasoning = item.content[0].text
elif len(item.summary) >= 1: elif len(item.summary) >= 1:
reasoning_content = item.summary[0].text reasoning = item.summary[0].text
logger.warning( logger.warning(
"Using summary text as reasoning content for item %s. " "Using summary text as reasoning content for item %s. "
"Please use content instead of summary for " "Please use content instead of summary for "
...@@ -206,7 +206,7 @@ def _construct_single_message_from_response_item( ...@@ -206,7 +206,7 @@ def _construct_single_message_from_response_item(
) )
return { return {
"role": "assistant", "role": "assistant",
"reasoning": reasoning_content, "reasoning": reasoning,
} }
elif isinstance(item, ResponseOutputMessage): elif isinstance(item, ResponseOutputMessage):
return { return {
......
...@@ -199,7 +199,7 @@ class Parser: ...@@ -199,7 +199,7 @@ class Parser:
request: The request object used to generate the output. request: The request object used to generate the output.
Returns: Returns:
A tuple of (reasoning_content, response_content). A tuple of (reasoning, response_content).
""" """
@abstractmethod @abstractmethod
......
...@@ -17,9 +17,7 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser): ...@@ -17,9 +17,7 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
def extract_reasoning( def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]: ) -> tuple[str | None, str | None]:
reasoning_content, final_content = super().extract_reasoning( reasoning, final_content = super().extract_reasoning(model_output, request)
model_output, request
)
chat_template_kwargs = getattr(request, "chat_template_kwargs", None) chat_template_kwargs = getattr(request, "chat_template_kwargs", None)
if ( if (
...@@ -30,6 +28,6 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser): ...@@ -30,6 +28,6 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
) )
and final_content is None and final_content is None
): ):
reasoning_content, final_content = final_content, reasoning_content reasoning, final_content = final_content, reasoning
return reasoning_content, final_content return reasoning, final_content
...@@ -295,7 +295,7 @@ class StreamingXMLToolCallParser: ...@@ -295,7 +295,7 @@ class StreamingXMLToolCallParser:
final_delta = DeltaMessage( final_delta = DeltaMessage(
role=None, role=None,
content=None, content=None,
reasoning_content=None, reasoning=None,
tool_calls=[ tool_calls=[
DeltaToolCall( DeltaToolCall(
index=self.tool_call_index - 1, index=self.tool_call_index - 1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment