identity_reasoning_parser.py 2.14 KB
Newer Older
1
2
3
4
5
6
7
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from collections.abc import Sequence

from transformers import PreTrainedTokenizerBase

8
9
10
11
from vllm.entrypoints.openai.chat_completion.protocol import (
    ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser

logger = init_logger(__name__)


class IdentityReasoningParser(ReasoningParser):
    """
    Identity reasoning parser.

    This parser does not attempt to parse or strip out reasoning tokens.
    It treats the entire model output as content and ignores reasoning.
    """

    def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
        super().__init__(tokenizer, *args, **kwargs)
        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ReasoningParser "
                "constructor during construction."
            )

    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        # Always return True, since we never treat reasoning specially
        return True

38
39
40
41
42
    def is_reasoning_end_streaming(
        self, input_ids: list[int], delta_ids: list[int]
    ) -> bool:
        return True

43
44
45
46
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        # Identity: return all tokens as content
        return input_ids

47
    def extract_reasoning_streaming(
48
49
50
51
52
53
54
55
56
57
58
59
60
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        # Just wrap delta_text as content, ignore reasoning
        if delta_text:
            return DeltaMessage(content=delta_text)
        return None

61
    def extract_reasoning(
62
63
        self, model_output: str, request: ChatCompletionRequest
    ) -> tuple[str | None, str | None]:
64
        # No reasoning separation: return None for reasoning,
65
66
        # and full model_output as content
        return None, model_output