"vllm/tokenizers/deepseek_v32_encoding.py" did not exist on "f5d3d93c40417c296c20dc301100e55708a17f3f"
deepseek_r1_reasoning_parser.py 2.5 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Sequence
5
from typing import Union
6

7
8
9
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
10
11
12


@ReasoningParserManager.register_module("deepseek_r1")
13
class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
14
15
16
    """
    Reasoning parser for DeepSeek R1 model.

17
    The DeepSeek R1 model uses <think>...</think> tokens to denote reasoning
18
19
20
    text. This parser extracts the reasoning content from the model output.
    """

21
22
23
24
    @property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        return "<think>"
25

26
27
28
29
    @property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        return "</think>"
30

31
32
33
34
35
36
37
38
39
    def extract_reasoning_content_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> Union[DeltaMessage, None]:
40
41
42
43
44
45
46
47
        ret = super().extract_reasoning_content_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
        )
48
49
50
51
52
        if (
            ret is not None
            and self.start_token_id not in previous_token_ids
            and self.start_token_id not in delta_token_ids
        ):
53
            if self.end_token_id in delta_token_ids:
54
                # end token in delta with more tokens,
55
                # extract reasoning content and content
56
                end_index = delta_text.find(self.end_token)
57
                reasoning_content = delta_text[:end_index]
58
                content = delta_text[end_index + len(self.end_token) :]
59
60
61
62
63
                return DeltaMessage(
                    reasoning_content=reasoning_content,
                    content=content if content else None,
                )
            elif self.end_token_id in previous_token_ids:
64
                # end token in previous, thinking content ends
65
66
                return DeltaMessage(content=delta_text)
            else:
67
                # no end token in previous or delta, reasoning content continues
68
                return DeltaMessage(reasoning_content=delta_text)
69

70
        return ret