Unverified Commit 720af6ab authored by Roger Young's avatar Roger Young Committed by GitHub
Browse files

[Model][MiniMax-M2] Support MiniMax-M2 Model (#27535)


Signed-off-by: default avatarxuebi <xuebi@minimaxi.com>
Co-authored-by: default avatarxuebi <xuebi@minimaxi.com>
parent 55cba4a0
......@@ -341,6 +341,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"MiniMaxM1ForCausalLM": _HfExamplesInfo(
"MiniMaxAI/MiniMax-M1-40k", trust_remote_code=True
),
"MiniMaxM2ForCausalLM": _HfExamplesInfo(
"MiniMaxAI/MiniMax-M2", trust_remote_code=True
),
"MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
"MixtralForCausalLM": _HfExamplesInfo(
"mistralai/Mixtral-8x7B-Instruct-v0.1",
......
......@@ -16,6 +16,7 @@ from .kimi_k2_tool_parser import KimiK2ToolParser
from .llama4_pythonic_tool_parser import Llama4PythonicToolParser
from .llama_tool_parser import Llama3JsonToolParser
from .longcat_tool_parser import LongcatFlashToolParser
from .minimax_m2_tool_parser import MinimaxM2ToolParser
from .minimax_tool_parser import MinimaxToolParser
from .mistral_tool_parser import MistralToolParser
from .olmo3_tool_parser import Olmo3PythonicToolParser
......@@ -56,4 +57,5 @@ __all__ = [
"SeedOssToolParser",
"Step3ToolParser",
"OpenAIToolParser",
"MinimaxM2ToolParser",
]
This diff is collapsed.
This diff is collapsed.
......@@ -131,6 +131,7 @@ _TEXT_GENERATION_MODELS = {
"MiniMaxForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
"MiniMaxText01ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
"MiniMaxM1ForCausalLM": ("minimax_text_01", "MiniMaxText01ForCausalLM"),
"MiniMaxM2ForCausalLM": ("minimax_m2", "MiniMaxM2ForCausalLM"),
"MistralForCausalLM": ("llama", "LlamaForCausalLM"),
"MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
# transformers's mpt class has lower case
......
......@@ -11,6 +11,7 @@ from .gptoss_reasoning_parser import GptOssReasoningParser
from .granite_reasoning_parser import GraniteReasoningParser
from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser
from .identity_reasoning_parser import IdentityReasoningParser
from .minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
from .mistral_reasoning_parser import MistralReasoningParser
from .olmo3_reasoning_parser import Olmo3ReasoningParser
from .qwen3_reasoning_parser import Qwen3ReasoningParser
......@@ -34,4 +35,5 @@ __all__ = [
"Step3ReasoningParser",
"GptOssReasoningParser",
"SeedOSSReasoningParser",
"MiniMaxM2ReasoningParser",
]
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
DeltaMessage,
ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
logger = init_logger(__name__)
@ReasoningParserManager.register_module("minimax_m2")
class MiniMaxM2ReasoningParser(BaseThinkingReasoningParser):
"""
Reasoning parser for MiniMax M2 model.
"""
@property
def start_token(self) -> str:
"""The token that starts reasoning content."""
return "<think>"
@property
def end_token(self) -> str:
"""The token that ends reasoning content."""
return "</think>"
@ReasoningParserManager.register_module("minimax_m2_append_think")
class MiniMaxM2AppendThinkReasoningParser(ReasoningParser):
"""
Reasoning parser for MiniMax M2 model.
"""
def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs):
super().__init__(tokenizer, *args, **kwargs)
self.end_token_id = self.vocab.get("</think>")
def is_reasoning_end(self, input_ids: list[int]) -> bool:
end_token_id = self.end_token_id
return any(input_id == end_token_id for input_id in reversed(input_ids))
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
return input_ids
def extract_reasoning_content_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
if len(previous_token_ids) == 0:
delta_text = "<think>" + delta_text
return DeltaMessage(content=delta_text)
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
) -> tuple[str | None, str | None]:
return None, "<think>" + model_output
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment