# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Sequence from typing import TYPE_CHECKING, Any from transformers import PreTrainedTokenizerBase from vllm.entrypoints.openai.engine.protocol import DeltaMessage from vllm.logger import init_logger from vllm.reasoning import ReasoningParser from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from .identity_reasoning_parser import IdentityReasoningParser if TYPE_CHECKING: from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, ) else: ChatCompletionRequest = Any logger = init_logger(__name__) class KimiK2ReasoningParser(ReasoningParser): """ Kimi K2 parser that delegates to either DeepSeekR1ReasoningParser or IdentityReasoningParser based on `thinking` and `separate_reasoning`. Unlike DeepSeekV3ReasoningParser which defaults to NOT thinking, KimiK2ReasoningParser defaults to thinking mode (uses DeepSeekR1ReasoningParser). """ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): super().__init__(tokenizer, *args, **kwargs) chat_kwargs = kwargs.pop("chat_template_kwargs", {}) or {} # Key difference: default to True instead of False thinking = bool(chat_kwargs.pop("thinking", True)) if thinking: self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs) else: self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs) def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: return self._parser.is_reasoning_end(input_ids) def is_reasoning_end_streaming( self, input_ids: list[int], delta_ids: list[int] ) -> bool: return self._parser.is_reasoning_end_streaming(input_ids, delta_ids) def extract_content_ids(self, input_ids: list[int]) -> list[int]: return self._parser.extract_content_ids(input_ids) def extract_reasoning( self, model_output: str, request: "ChatCompletionRequest" ) -> tuple[str | None, str | None]: return self._parser.extract_reasoning(model_output, request) def extract_reasoning_streaming( self, previous_text: str, current_text: str, delta_text: str, previous_token_ids: Sequence[int], current_token_ids: Sequence[int], delta_token_ids: Sequence[int], ) -> DeltaMessage | None: return self._parser.extract_reasoning_streaming( previous_text, current_text, delta_text, previous_token_ids, current_token_ids, delta_token_ids, )