Unverified Commit e1bf04b6 authored by Andrew Xia's avatar Andrew Xia Committed by GitHub
Browse files

[1/N] Initial Implementation of Parser for ResponsesAPI (#32712)


Signed-off-by: default avatarAndrew Xia <axia@fb.com>
Co-authored-by: default avatarAndrew Xia <axia@fb.com>
parent 02080179
...@@ -36,6 +36,7 @@ class MockHFConfig: ...@@ -36,6 +36,7 @@ class MockHFConfig:
class MockModelConfig: class MockModelConfig:
task = "generate" task = "generate"
runner_type = "generate" runner_type = "generate"
model = MODEL_NAME
tokenizer = MODEL_NAME tokenizer = MODEL_NAME
trust_remote_code = False trust_remote_code = False
tokenizer_mode = "auto" tokenizer_mode = "auto"
......
...@@ -36,6 +36,7 @@ class MockHFConfig: ...@@ -36,6 +36,7 @@ class MockHFConfig:
class MockModelConfig: class MockModelConfig:
task = "generate" task = "generate"
runner_type = "generate" runner_type = "generate"
model = MODEL_NAME
tokenizer = MODEL_NAME tokenizer = MODEL_NAME
trust_remote_code = False trust_remote_code = False
tokenizer_mode = "auto" tokenizer_mode = "auto"
......
...@@ -511,6 +511,7 @@ class MockHFConfig: ...@@ -511,6 +511,7 @@ class MockHFConfig:
class MockModelConfig: class MockModelConfig:
task = "generate" task = "generate"
runner_type = "generate" runner_type = "generate"
model = MODEL_NAME
tokenizer = MODEL_NAME tokenizer = MODEL_NAME
trust_remote_code = False trust_remote_code = False
tokenizer_mode = "auto" tokenizer_mode = "auto"
......
...@@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt ...@@ -71,6 +71,7 @@ from vllm.inputs.data import EmbedsPrompt, TokensPrompt
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.logprobs import Logprob from vllm.logprobs import Logprob
from vllm.outputs import CompletionOutput, RequestOutput from vllm.outputs import CompletionOutput, RequestOutput
from vllm.parser import ParserManager
from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tokenizers.mistral import ( from vllm.tokenizers.mistral import (
...@@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing): ...@@ -131,13 +132,15 @@ class OpenAIServingChat(OpenAIServing):
self.logits_processors = self.model_config.logits_processors self.logits_processors = self.model_config.logits_processors
# set up reasoning parser # set up reasoning parser
self.reasoning_parser = self._get_reasoning_parser( self.reasoning_parser = ParserManager.get_reasoning_parser(
reasoning_parser_name=reasoning_parser reasoning_parser_name=reasoning_parser
) )
# set up tool use # set up tool use
self.enable_auto_tools: bool = enable_auto_tools self.enable_auto_tools: bool = enable_auto_tools
self.tool_parser = self._get_tool_parser( self.tool_parser = ParserManager.get_tool_parser(
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools tool_parser_name=tool_parser,
enable_auto_tools=enable_auto_tools,
model_name=self.model_config.model,
) )
self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none
......
...@@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest ...@@ -107,11 +107,10 @@ from vllm.lora.request import LoRARequest
from vllm.multimodal import MultiModalDataDict from vllm.multimodal import MultiModalDataDict
from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.reasoning import ReasoningParser, ReasoningParserManager
from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs from vllm.renderers import ChatParams, TokenizeParams, merge_kwargs
from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager from vllm.tool_parsers import ToolParser
from vllm.tracing import ( from vllm.tracing import (
contains_trace_headers, contains_trace_headers,
extract_trace_headers, extract_trace_headers,
...@@ -246,46 +245,6 @@ class OpenAIServing: ...@@ -246,46 +245,6 @@ class OpenAIServing:
self.model_config = self.models.model_config self.model_config = self.models.model_config
self.max_model_len = self.model_config.max_model_len self.max_model_len = self.model_config.max_model_len
def _get_tool_parser(
self, tool_parser_name: str | None = None, enable_auto_tools: bool = False
) -> Callable[[TokenizerLike], ToolParser] | None:
"""Get the tool parser based on the name."""
parser = None
if not enable_auto_tools or tool_parser_name is None:
return parser
logger.info('"auto" tool choice has been enabled.')
try:
if tool_parser_name == "pythonic" and self.model_config.model.startswith(
"meta-llama/Llama-3.2"
):
logger.warning(
"Llama3.2 models may struggle to emit valid pythonic tool calls"
)
parser = ToolParserManager.get_tool_parser(tool_parser_name)
except Exception as e:
raise TypeError(
"Error: --enable-auto-tool-choice requires "
f"tool_parser:'{tool_parser_name}' which has not "
"been registered"
) from e
return parser
def _get_reasoning_parser(
self,
reasoning_parser_name: str,
) -> Callable[[TokenizerLike], ReasoningParser] | None:
"""Get the reasoning parser based on the name."""
parser = None
if not reasoning_parser_name:
return None
try:
parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
assert parser is not None
except Exception as e:
raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
return parser
async def beam_search( async def beam_search(
self, self,
prompt: PromptType, prompt: PromptType,
......
...@@ -123,6 +123,7 @@ from vllm.logger import init_logger ...@@ -123,6 +123,7 @@ from vllm.logger import init_logger
from vllm.logprobs import Logprob as SampleLogprob from vllm.logprobs import Logprob as SampleLogprob
from vllm.logprobs import SampleLogprobs from vllm.logprobs import SampleLogprobs
from vllm.outputs import CompletionOutput from vllm.outputs import CompletionOutput
from vllm.parser import ParserManager
from vllm.sampling_params import SamplingParams, StructuredOutputsParams from vllm.sampling_params import SamplingParams, StructuredOutputsParams
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.utils import random_uuid from vllm.utils import random_uuid
...@@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -217,8 +218,13 @@ class OpenAIServingResponses(OpenAIServing):
self.chat_template_content_format: Final = chat_template_content_format self.chat_template_content_format: Final = chat_template_content_format
self.enable_log_outputs = enable_log_outputs self.enable_log_outputs = enable_log_outputs
self.reasoning_parser = self._get_reasoning_parser( # Set up the unified parser - either a unified parser or fall back to
reasoning_parser_name=reasoning_parser # separate parsers accessed through the parser interface
self.parser = ParserManager.get_parser(
tool_parser_name=tool_parser,
reasoning_parser_name=reasoning_parser,
enable_auto_tools=enable_auto_tools,
model_name=self.model_config.model,
) )
self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage self.enable_force_include_usage = enable_force_include_usage
...@@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -263,10 +269,6 @@ class OpenAIServingResponses(OpenAIServing):
self.tool_call_id_type = "random" self.tool_call_id_type = "random"
self.enable_auto_tools = enable_auto_tools self.enable_auto_tools = enable_auto_tools
# set up tool use
self.tool_parser = self._get_tool_parser(
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools
)
# HACK(woosuk): This is a hack. We should use a better store. # HACK(woosuk): This is a hack. We should use a better store.
# FIXME: If enable_store=True, this may cause a memory leak since we # FIXME: If enable_store=True, this may cause a memory leak since we
# never remove responses from the store. # never remove responses from the store.
...@@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -469,9 +471,13 @@ class OpenAIServingResponses(OpenAIServing):
context = ParsableContext( context = ParsableContext(
response_messages=messages, response_messages=messages,
tokenizer=tokenizer, tokenizer=tokenizer,
reasoning_parser_cls=self.reasoning_parser, reasoning_parser_cls=self.parser.reasoning_parser_cls
if self.parser
else None,
request=request, request=request,
tool_parser_cls=self.tool_parser, tool_parser_cls=self.parser.tool_parser_cls
if self.parser
else None,
available_tools=available_tools, available_tools=available_tools,
chat_template=self.chat_template, chat_template=self.chat_template,
chat_template_content_format=self.chat_template_content_format, chat_template_content_format=self.chat_template_content_format,
...@@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -479,8 +485,8 @@ class OpenAIServingResponses(OpenAIServing):
else: else:
context = SimpleContext() context = SimpleContext()
if self.reasoning_parser is not None: if self.parser and self.parser.reasoning_parser_cls is not None:
reasoning_parser = self.reasoning_parser(tokenizer) reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
if ( if (
isinstance( isinstance(
struct_out := sampling_params.structured_outputs, struct_out := sampling_params.structured_outputs,
...@@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -617,7 +623,7 @@ class OpenAIServingResponses(OpenAIServing):
default_template_content_format=self.chat_template_content_format, default_template_content_format=self.chat_template_content_format,
default_template_kwargs=None, default_template_kwargs=None,
tool_dicts=tool_dicts, tool_dicts=tool_dicts,
tool_parser=self.tool_parser, tool_parser=self.parser.tool_parser_cls if self.parser else None,
) )
return messages, engine_prompts return messages, engine_prompts
...@@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -909,9 +915,9 @@ class OpenAIServingResponses(OpenAIServing):
final_output: CompletionOutput, final_output: CompletionOutput,
tokenizer: TokenizerLike, tokenizer: TokenizerLike,
) -> list[ResponseOutputItem]: ) -> list[ResponseOutputItem]:
if self.reasoning_parser: if self.parser and self.parser.reasoning_parser_cls:
try: try:
reasoning_parser = self.reasoning_parser(tokenizer) reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
except RuntimeError as e: except RuntimeError as e:
logger.exception("Error in reasoning parser creation.") logger.exception("Error in reasoning parser creation.")
raise e raise e
...@@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -958,7 +964,7 @@ class OpenAIServingResponses(OpenAIServing):
tokenizer=tokenizer, tokenizer=tokenizer,
content=content, content=content,
enable_auto_tools=self.enable_auto_tools, enable_auto_tools=self.enable_auto_tools,
tool_parser_cls=self.tool_parser, tool_parser_cls=self.parser.tool_parser_cls if self.parser else None,
) )
if content or (self.use_harmony and tool_calls): if content or (self.use_harmony and tool_calls):
...@@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -1339,8 +1345,8 @@ class OpenAIServingResponses(OpenAIServing):
current_output_index = 0 current_output_index = 0
current_item_id = "" current_item_id = ""
reasoning_parser = None reasoning_parser = None
if self.reasoning_parser: if self.parser and self.parser.reasoning_parser_cls:
reasoning_parser = self.reasoning_parser(tokenizer) reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
previous_text = "" previous_text = ""
previous_token_ids: list[int] = [] previous_token_ids: list[int] = []
first_delta_sent = False first_delta_sent = False
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.parser.abstract_parser import (
DelegatingParser,
Parser,
_WrappedParser,
)
from vllm.parser.parser_manager import ParserManager
__all__ = [
"Parser",
"DelegatingParser",
"ParserManager",
"_WrappedParser",
]
_PARSERS_TO_REGISTER = {
"minimax_m2": ( # name
"minimax_m2_parser", # filename
"MiniMaxM2Parser", # class_name
),
}
# Register lazy parsers
ParserManager.register_lazy_module(
name="minimax_m2",
module_path="vllm.parser.minimax_m2_parser",
class_name="MiniMaxM2Parser",
)
def register_lazy_parsers():
for name, (file_name, class_name) in _PARSERS_TO_REGISTER.items():
module_path = f"vllm.parser.{file_name}"
ParserManager.register_lazy_module(name, module_path, class_name)
register_lazy_parsers()
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import abstractmethod
from collections.abc import Sequence
from functools import cached_property
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
)
from vllm.entrypoints.openai.responses.protocol import (
ResponsesRequest,
)
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
class Parser:
"""
Abstract Parser class that unifies ReasoningParser and ToolParser into
a single interface for parsing model output.
This class provides a unified way to handle both reasoning extraction
(e.g., chain-of-thought content in <think> tags) and tool call extraction
(e.g., function calls in XML/JSON format) from model outputs.
Subclasses can either:
1. Override the abstract methods directly for custom parsing logic
2. Set `reasoning_parser` and `tool_parser` properties to delegate to
existing parser implementations
Class Attributes:
reasoning_parser_cls: The ReasoningParser class to use (for compatibility
with code that needs the class, not instance).
tool_parser_cls: The ToolParser class to use (for compatibility with
code that needs the class, not instance).
"""
# Class-level parser classes for compatibility with existing patterns
# Subclasses should override these if they use specific parser classes
reasoning_parser_cls: type[ReasoningParser] | None = None
tool_parser_cls: type[ToolParser] | None = None
def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
"""
Initialize the Parser.
Args:
tokenizer: The tokenizer used by the model. This is required for
token-based parsing operations.
"""
self.model_tokenizer = tokenizer
self._reasoning_parser: ReasoningParser | None = None
self._tool_parser: ToolParser | None = None
@cached_property
def vocab(self) -> dict[str, int]:
"""Get the vocabulary mapping from tokens to IDs."""
return self.model_tokenizer.get_vocab()
@property
def reasoning_parser(self) -> ReasoningParser | None:
"""The underlying reasoning parser, if any."""
return self._reasoning_parser
@reasoning_parser.setter
def reasoning_parser(self, parser: ReasoningParser | None) -> None:
self._reasoning_parser = parser
@property
def tool_parser(self) -> ToolParser | None:
"""The underlying tool parser, if any."""
return self._tool_parser
@tool_parser.setter
def tool_parser(self, parser: ToolParser | None) -> None:
self._tool_parser = parser
# ========== Reasoning Parser Methods ==========
@abstractmethod
def is_reasoning_end(self, input_ids: list[int]) -> bool:
"""
Check if the reasoning content ends in the input_ids.
Used by structured engines like `xgrammar` to check if the
reasoning content ends in the model output.
Args:
input_ids: The token IDs of the model output.
Returns:
True if the reasoning content ends in the input_ids.
"""
def is_reasoning_end_streaming(
self, input_ids: list[int], delta_ids: list[int]
) -> bool:
"""
Check if the reasoning content ends during a decode step.
Args:
input_ids: The entire model output token IDs.
delta_ids: The last few computed tokens at the current decode step.
Returns:
True if the reasoning content ends in the delta_ids.
"""
return self.is_reasoning_end(input_ids)
@abstractmethod
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
"""
Extract content token IDs from the input_ids.
This extracts the non-reasoning content (e.g., everything after
the </think> tag).
Args:
input_ids: The token IDs of the model output.
Returns:
The extracted content token IDs.
"""
@abstractmethod
def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]:
"""
Extract reasoning content from a complete model-generated string.
Used for non-streaming responses where we have the entire model
response available before sending to the client.
Args:
model_output: The complete model-generated string.
request: The request object used to generate the output.
Returns:
A tuple of (reasoning_content, response_content).
"""
@abstractmethod
def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
"""
Extract reasoning content from a streaming delta message.
Args:
previous_text: Text from all previous tokens.
current_text: Text including the current delta.
delta_text: The new text in this delta.
previous_token_ids: Token IDs from previous generation.
current_token_ids: All token IDs including current.
delta_token_ids: The new token IDs in this delta.
Returns:
A DeltaMessage with reasoning and/or content fields, or None.
"""
# ========== Tool Parser Methods ==========
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
"""
Adjust the request parameters for tool calling.
Can be overridden by subclasses to modify request parameters
(e.g., setting structured output schemas for tool calling).
Args:
request: The original request.
Returns:
The adjusted request.
"""
return request
@abstractmethod
def extract_tool_calls(
self,
model_output: str,
request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
"""
Extract tool calls from a complete model-generated string.
Used for non-streaming responses.
Args:
model_output: The complete model-generated string.
request: The request object used to generate the output.
Returns:
ExtractedToolCallInformation containing the tool calls.
"""
@abstractmethod
def extract_tool_calls_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
request: ChatCompletionRequest,
) -> DeltaMessage | None:
"""
Extract tool calls from a streaming delta message.
Args:
previous_text: Text from all previous tokens.
current_text: Text including the current delta.
delta_text: The new text in this delta.
previous_token_ids: Token IDs from previous generation.
current_token_ids: All token IDs including current.
delta_token_ids: The new token IDs in this delta.
request: The request object.
Returns:
A DeltaMessage with tool_calls field, or None.
"""
class DelegatingParser(Parser):
"""
A Parser implementation that delegates to separate ReasoningParser and
ToolParser instances.
This is the recommended base class for creating model-specific parsers
that combine existing reasoning and tool parser implementations.
Subclasses should set `self._reasoning_parser` and `self._tool_parser`
in their `__init__` method.
If either parser is None, the corresponding methods will return default
values (no reasoning extraction, no tool calls).
"""
def extract_reasoning(
self,
model_output: str,
request: ChatCompletionRequest | ResponsesRequest,
) -> tuple[str | None, str | None]:
if self._reasoning_parser is None:
return None, model_output
return self._reasoning_parser.extract_reasoning(model_output, request)
def extract_reasoning_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
) -> DeltaMessage | None:
if self._reasoning_parser is None:
return DeltaMessage(content=delta_text)
return self._reasoning_parser.extract_reasoning_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
delta_token_ids,
)
def extract_tool_calls(
self,
model_output: str,
request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
if self._tool_parser is None:
return ExtractedToolCallInformation(
tools_called=False, tool_calls=[], content=model_output
)
return self._tool_parser.extract_tool_calls(model_output, request)
def extract_tool_calls_streaming(
self,
previous_text: str,
current_text: str,
delta_text: str,
previous_token_ids: Sequence[int],
current_token_ids: Sequence[int],
delta_token_ids: Sequence[int],
request: ChatCompletionRequest,
) -> DeltaMessage | None:
if self._tool_parser is None:
return None
return self._tool_parser.extract_tool_calls_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
delta_token_ids,
request,
)
class _WrappedParser(DelegatingParser):
"""
A DelegatingParser subclass that instantiates parsers from class attributes.
This class is used to dynamically create a parser that wraps individual
ReasoningParser and ToolParser classes. The class attributes
`reasoning_parser_cls` and `tool_parser_cls` should be set before
instantiation.
Usage:
_WrappedParser.reasoning_parser_cls = MyReasoningParser
_WrappedParser.tool_parser_cls = MyToolParser
parser = _WrappedParser(tokenizer)
"""
reasoning_parser_cls: type[ReasoningParser] | None = None
tool_parser_cls: type[ToolParser] | None = None
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
# Instantiate the underlying parsers from class attributes
if self.__class__.reasoning_parser_cls is not None:
self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
if self.__class__.tool_parser_cls is not None:
self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
MiniMax M2 Parser - A unified parser for MiniMax M2 models.
This parser combines the existing MiniMaxM2ReasoningParser and
MinimaxM2ToolParser into a single unified interface by delegating
to those implementations.
"""
from vllm.logger import init_logger
from vllm.parser.abstract_parser import DelegatingParser
from vllm.reasoning.minimax_m2_reasoning_parser import MiniMaxM2ReasoningParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.minimax_m2_tool_parser import MinimaxM2ToolParser
logger = init_logger(__name__)
class MiniMaxM2Parser(DelegatingParser):
"""
Unified parser for MiniMax M2 models that handles both reasoning
extraction and tool call parsing.
This parser delegates to the existing implementations:
- MiniMaxM2ReasoningParser for reasoning extraction
- MinimaxM2ToolParser for tool call parsing
MiniMax M2 models have two special behaviors:
1. Reasoning: They don't generate <think> start token, only </think> end
token. All content before </think> is reasoning, content after is the
actual response.
2. Tool Calls: They use <minimax:tool_call>...</minimax:tool_call> tags
with <invoke name="...">...</invoke> and <parameter name="...">...</parameter>
syntax.
"""
# Class-level parser classes for compatibility
reasoning_parser_cls = MiniMaxM2ReasoningParser
tool_parser_cls = MinimaxM2ToolParser
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
# Initialize the underlying parsers
self._reasoning_parser = MiniMaxM2ReasoningParser(tokenizer)
self._tool_parser = MinimaxM2ToolParser(tokenizer)
logger.debug(
"vLLM Successfully initialized parser %s!", self.__class__.__name__
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import importlib
import os
from collections.abc import Callable
from typing import TYPE_CHECKING
from vllm.logger import init_logger
from vllm.utils.collection_utils import is_list_of
from vllm.utils.import_utils import import_from_path
if TYPE_CHECKING:
from vllm.parser.abstract_parser import Parser
from vllm.reasoning import ReasoningParser
from vllm.tool_parsers import ToolParser
logger = init_logger(__name__)
class ParserManager:
"""
Central registry for Parser implementations.
Supports two registration modes:
- Eager registration via `register_module`
- Lazy registration via `register_lazy_module`
"""
parsers: dict[str, type[Parser]] = {}
lazy_parsers: dict[str, tuple[str, str]] = {} # name -> (module_path, class_name)
@classmethod
def get_parser_internal(cls, name: str) -> type[Parser]:
"""
Retrieve a registered or lazily registered Parser class.
Args:
name: The registered name of the parser.
Returns:
The Parser class.
Raises:
KeyError: If no parser is found under the given name.
"""
if name in cls.parsers:
return cls.parsers[name]
if name in cls.lazy_parsers:
return cls._load_lazy_parser(name)
registered = ", ".join(cls.list_registered())
raise KeyError(f"Parser '{name}' not found. Available parsers: {registered}")
@classmethod
def _load_lazy_parser(cls, name: str) -> type[Parser]:
"""Import and register a lazily loaded parser."""
from vllm.parser.abstract_parser import Parser
module_path, class_name = cls.lazy_parsers[name]
try:
mod = importlib.import_module(module_path)
parser_cls = getattr(mod, class_name)
if not issubclass(parser_cls, Parser):
raise TypeError(
f"{class_name} in {module_path} is not a Parser subclass."
)
cls.parsers[name] = parser_cls # cache
return parser_cls
except Exception as e:
logger.exception(
"Failed to import lazy parser '%s' from %s: %s",
name,
module_path,
e,
)
raise
@classmethod
def _register_module(
cls,
module: type[Parser],
module_name: str | list[str] | None = None,
force: bool = True,
) -> None:
"""Register a Parser class immediately."""
from vllm.parser.abstract_parser import Parser
if not issubclass(module, Parser):
raise TypeError(
f"module must be subclass of Parser, but got {type(module)}"
)
if module_name is None:
module_names = [module.__name__]
elif isinstance(module_name, str):
module_names = [module_name]
elif is_list_of(module_name, str):
module_names = module_name
else:
raise TypeError("module_name must be str, list[str], or None.")
for name in module_names:
if not force and name in cls.parsers:
existed = cls.parsers[name]
raise KeyError(f"{name} is already registered at {existed.__module__}")
cls.parsers[name] = module
@classmethod
def register_lazy_module(cls, name: str, module_path: str, class_name: str) -> None:
"""
Register a lazy module mapping for delayed import.
Example:
ParserManager.register_lazy_module(
name="minimax_m2",
module_path="vllm.parser.minimax_m2_parser",
class_name="MiniMaxM2Parser",
)
"""
cls.lazy_parsers[name] = (module_path, class_name)
@classmethod
def register_module(
cls,
name: str | list[str] | None = None,
force: bool = True,
module: type[Parser] | None = None,
) -> type[Parser] | Callable[[type[Parser]], type[Parser]]:
"""
Register a Parser class.
Can be used as a decorator or called directly.
Usage:
@ParserManager.register_module("my_parser")
class MyParser(Parser):
...
Or:
ParserManager.register_module(module=MyParser)
"""
if not isinstance(force, bool):
raise TypeError(f"force must be a boolean, but got {type(force)}")
# Immediate registration
if module is not None:
cls._register_module(module=module, module_name=name, force=force)
return module
# Decorator usage
def _decorator(obj: type[Parser]) -> type[Parser]:
module_path = obj.__module__
class_name = obj.__name__
if isinstance(name, str):
names = [name]
elif is_list_of(name, str):
names = name
else:
names = [class_name]
for n in names:
cls.lazy_parsers[n] = (module_path, class_name)
return obj
return _decorator
@classmethod
def list_registered(cls) -> list[str]:
"""Return names of all registered parsers."""
return sorted(set(cls.parsers.keys()) | set(cls.lazy_parsers.keys()))
@classmethod
def import_parser(cls, plugin_path: str) -> None:
"""Import a user-defined parser from an arbitrary path."""
module_name = os.path.splitext(os.path.basename(plugin_path))[0]
try:
import_from_path(module_name, plugin_path)
except Exception:
logger.exception(
"Failed to load module '%s' from %s.", module_name, plugin_path
)
@classmethod
def get_tool_parser(
cls,
tool_parser_name: str | None = None,
enable_auto_tools: bool = False,
model_name: str | None = None,
) -> type[ToolParser] | None:
"""Get the tool parser based on the name."""
from vllm.tool_parsers import ToolParserManager
parser: type[ToolParser] | None = None
if not enable_auto_tools or tool_parser_name is None:
return parser
logger.info('"auto" tool choice has been enabled.')
try:
if (
tool_parser_name == "pythonic"
and model_name
and model_name.startswith("meta-llama/Llama-3.2")
):
logger.warning(
"Llama3.2 models may struggle to emit valid pythonic tool calls"
)
parser = ToolParserManager.get_tool_parser(tool_parser_name)
except Exception as e:
raise TypeError(
"Error: --enable-auto-tool-choice requires "
f"tool_parser:'{tool_parser_name}' which has not "
"been registered"
) from e
return parser
@classmethod
def get_reasoning_parser(
cls,
reasoning_parser_name: str | None,
) -> type[ReasoningParser] | None:
"""Get the reasoning parser based on the name."""
from vllm.reasoning import ReasoningParserManager
parser: type[ReasoningParser] | None = None
if not reasoning_parser_name:
return None
try:
parser = ReasoningParserManager.get_reasoning_parser(reasoning_parser_name)
assert parser is not None
except Exception as e:
raise TypeError(f"{reasoning_parser_name=} has not been registered") from e
return parser
@classmethod
def get_parser(
cls,
tool_parser_name: str | None = None,
reasoning_parser_name: str | None = None,
enable_auto_tools: bool = False,
model_name: str | None = None,
) -> type[Parser] | None:
"""
Get a unified Parser that handles both reasoning and tool parsing.
This method checks if a unified Parser exists that can handle both
reasoning extraction and tool call parsing. If no unified parser
exists, it creates a DelegatingParser that wraps the individual
reasoning and tool parsers.
Args:
tool_parser_name: The name of the tool parser.
reasoning_parser_name: The name of the reasoning parser.
enable_auto_tools: Whether auto tool choice is enabled.
model_name: The model name for parser-specific warnings.
Returns:
A Parser class, or None if neither parser is specified.
"""
from vllm.parser.abstract_parser import _WrappedParser
if not tool_parser_name and not reasoning_parser_name:
return None
# Strategy 1: If both names match, check for a unified parser with that name
if tool_parser_name and tool_parser_name == reasoning_parser_name:
try:
parser = cls.get_parser_internal(tool_parser_name)
logger.info(
"Using unified parser '%s' for both reasoning and tool parsing.",
tool_parser_name,
)
return parser
except KeyError:
pass # No unified parser with this name
# Strategy 2: Check for parser with either name
for name in [tool_parser_name, reasoning_parser_name]:
if name:
try:
parser = cls.get_parser_internal(name)
logger.info(
"Using unified parser '%s' for reasoning and tool parsing.",
name,
)
return parser
except KeyError:
pass
# Strategy 3: Create a DelegatingParser with the individual parser classes
reasoning_parser_cls = cls.get_reasoning_parser(reasoning_parser_name)
tool_parser_cls = cls.get_tool_parser(
tool_parser_name, enable_auto_tools, model_name
)
if reasoning_parser_cls is None and tool_parser_cls is None:
return None
# Set the class-level attributes on the imported _WrappedParser
_WrappedParser.reasoning_parser_cls = reasoning_parser_cls
_WrappedParser.tool_parser_cls = tool_parser_cls
return _WrappedParser
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment