Commit 54f62b3c authored by chenych's avatar chenych
Browse files

Update MiniMaxM2ReasoningParser

parent 44181448
...@@ -6,18 +6,23 @@ from collections.abc import Sequence ...@@ -6,18 +6,23 @@ from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest, ChatCompletionRequest,
DeltaMessage, DeltaMessage,
ResponsesRequest,
) )
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer
logger = init_logger(__name__) logger = init_logger(__name__)
@ReasoningParserManager.register_module("minimax_m2")
class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser): class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
""" """
Reasoning parser for MiniMax M2 model. Reasoning parser for MiniMax M2 model.
MiniMax M2 models don't generate <think> start token, only </think> end
token. All content before </think> is reasoning, content after is the
actual response.
""" """
@property @property
...@@ -30,6 +35,45 @@ class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser): ...@@ -30,6 +35,45 @@ class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
"""The token that ends reasoning content.""" """The token that ends reasoning content."""
return "</think>" return "</think>"
def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
"""
Extract reasoning content from a delta message for streaming.
MiniMax M2 models don't generate <think> start token, so we assume
all content is reasoning until we encounter the </think> end token.
"""
# Skip single end token
if len(delta_token_ids) == 1 and delta_token_ids[0] == self.end_token_id:
return None
# Check if end token has already appeared in previous tokens
# meaning we're past the reasoning phase
if self.end_token_id in previous_token_ids:
# We're past the reasoning phase, this is content
return DeltaMessage(content=delta_text)
# Check if end token is in delta tokens
if self.end_token_id in delta_token_ids:
# End token in delta, split reasoning and content
end_index = delta_text.find(self.end_token)
reasoning = delta_text[:end_index]
content = delta_text[end_index + len(self.end_token) :]
return DeltaMessage(
reasoning=reasoning if reasoning else None,
content=content if content else None,
)
# No end token yet, all content is reasoning
return DeltaMessage(reasoning=delta_text)
class MiniMaxM2AppendThinkReasoningParser(ReasoningParser): class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
""" """
...@@ -40,7 +84,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser): ...@@ -40,7 +84,7 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
super().__init__(tokenizer, *args, **kwargs) super().__init__(tokenizer, *args, **kwargs)
self.end_token_id = self.vocab.get("</think>") self.end_token_id = self.vocab.get("</think>")
def is_reasoning_end(self, input_ids: list[int]) -> bool: def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
end_token_id = self.end_token_id end_token_id = self.end_token_id
return any(input_id == end_token_id for input_id in reversed(input_ids)) return any(input_id == end_token_id for input_id in reversed(input_ids))
...@@ -61,6 +105,6 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser): ...@@ -61,6 +105,6 @@ class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
return DeltaMessage(content=delta_text) return DeltaMessage(content=delta_text)
def extract_reasoning( def extract_reasoning(
self, model_output: str, request: ChatCompletionRequest self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]: ) -> tuple[str | None, str | None]:
return None, "<think>" + model_output return None, "<think>" + model_output
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment