qwen3_reasoning_parser.py 2.73 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6

from typing import Optional, Union

from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
7
8
9
                                              ResponsesRequest)
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
10
11
12


@ReasoningParserManager.register_module("qwen3")
13
class Qwen3ReasoningParser(BaseThinkingReasoningParser):
14
15
16
17
18
19
20
21
22
23
    """
    Reasoning parser for the Qwen3 model.

    The Qwen3 model uses <think>...</think> tokens to denote reasoning text
    within its output. The model provides a strict switch to disable reasoning
    output via the 'enable_thinking=False' parameter. This parser extracts the
    reasoning content enclosed by <think> and </think> tokens from the model's
    output.
    """

24
25
26
27
    @property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        return "<think>"
28

29
30
31
32
    @property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        return "</think>"
33
34

    def extract_reasoning_content(
35
36
        self, model_output: str, request: Union[ChatCompletionRequest,
                                                ResponsesRequest]
37
    ) -> tuple[Optional[str], Optional[str]]:
38
39
        """
        Extract reasoning content from the model output.
40
41
42
        
        Qwen3 has stricter requirements - it needs both start and end tokens
        to be present, unlike other models that work with just the end token.
43
44
45
46

        For text <think>abc</think>xyz:
        - 'abc' goes to reasoning_content
        - 'xyz' goes to content
47

48
49
50
51
        Returns:
            tuple[Optional[str], Optional[str]]: reasoning content and content
        """

52
53
54
        # Check if the model output contains both <think> and </think> tokens.
        if (self.start_token not in model_output
                or self.end_token not in model_output):
55
            return None, model_output
56

57
58
        # Check if the <think> is present in the model output, remove it
        # if it is present.
59
        model_output_parts = model_output.partition(self.start_token)
60
61
        model_output = model_output_parts[2] if model_output_parts[
            1] else model_output_parts[0]
62

63
64
        # Check if the model output contains the </think> tokens.
        # If the end token is not found, return the model output as is.
65
        if self.end_token not in model_output:
66
67
68
            return None, model_output

        # Extract reasoning content from the model output.
69
        reasoning_content, _, content = model_output.partition(self.end_token)
70
71
72

        final_content = content or None
        return reasoning_content, final_content