# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable, Sequence from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) from vllm.entrypoints.openai.engine.protocol import DeltaMessage from vllm.entrypoints.openai.responses.protocol import ResponsesRequest from vllm.logger import init_logger from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser from vllm.tokenizers import TokenizerLike logger = init_logger(__name__) class HYV3ReasoningParser(BaseThinkingReasoningParser): """ HYV3 parser that delegates to either HYV3ReasoningParser or IdentityReasoningParser based on `reasoning_effort`. The HYV3 model uses ... tokens to denote reasoning text. This parser extracts the reasoning content from the model output. """ def __init__(self, tokenizer: TokenizerLike, *args, **kwargs): super().__init__(tokenizer, *args, **kwargs) # First, If there is reasoning_effort in chat_kwargs, # prioritize using chat_kwargs.reasoning_effort. # If it's not present, use the "reasoning_effort" field # at the outer level of the chat message. # Otherwise, If both are empty, assign "no_think". chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {} reasoning_effort = ( chat_kwargs.get("reasoning_effort") or kwargs.get("reasoning_effort") or "no_think" ) logger.debug("reasoning_effort for choosing parser: %s", reasoning_effort) self._identity_parser: IdentityReasoningParser | None if reasoning_effort == "no_think": self._identity_parser = IdentityReasoningParser(tokenizer, *args, **kwargs) else: self._identity_parser = None @property def start_token(self) -> str: """The token that starts reasoning content.""" return "" @property def end_token(self) -> str: """The token that ends reasoning content.""" return "" def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: if self._identity_parser is not None: return self._identity_parser.is_reasoning_end(input_ids) return super().is_reasoning_end(input_ids) def is_reasoning_end_streaming( self, input_ids: Sequence[int], delta_ids: Iterable[int] ) -> bool: if self._identity_parser is not None: return self._identity_parser.is_reasoning_end_streaming( input_ids, delta_ids ) return super().is_reasoning_end_streaming(input_ids, delta_ids) def extract_content_ids(self, input_ids: list[int]) -> list[int]: if self._identity_parser is not None: return self._identity_parser.extract_content_ids(input_ids) return super().extract_content_ids(input_ids) def extract_reasoning( self, model_output: str, request: "ChatCompletionRequest | ResponsesRequest" ) -> tuple[str | None, str | None]: if self._identity_parser is not None: return self._identity_parser.extract_reasoning(model_output, request) return super().extract_reasoning(model_output, request) def extract_reasoning_streaming( self, previous_text: str, current_text: str, delta_text: str, previous_token_ids: Sequence[int], current_token_ids: Sequence[int], delta_token_ids: Sequence[int], ) -> DeltaMessage | None: if self._identity_parser is not None: return self._identity_parser.extract_reasoning_streaming( previous_text, current_text, delta_text, previous_token_ids, current_token_ids, delta_token_ids, ) ret = super().extract_reasoning_streaming( previous_text, current_text, delta_text, previous_token_ids, current_token_ids, delta_token_ids, ) if ( ret is not None and self.start_token_id not in previous_token_ids and self.start_token_id not in delta_token_ids ): if self.end_token_id in delta_token_ids: # end token in delta with more tokens, # extract reasoning content and content end_index = delta_text.find(self.end_token) reasoning = delta_text[:end_index] content = delta_text[end_index + len(self.end_token) :] return DeltaMessage( reasoning=reasoning, content=content if content else None, ) elif self.end_token_id in previous_token_ids: # end token in previous, thinking content ends return DeltaMessage(content=delta_text) else: # no end token in previous or delta, reasoning content continues return DeltaMessage(reasoning=delta_text) return ret