mistral_reasoning_parser.py 2.05 KB
Newer Older
Julien Denize's avatar
Julien Denize committed
1
2
3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

4
5
from functools import cached_property

Julien Denize's avatar
Julien Denize committed
6
7
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
8
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
Julien Denize's avatar
Julien Denize committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer

logger = init_logger(__name__)


@ReasoningParserManager.register_module("mistral")
class MistralReasoningParser(DeepSeekR1ReasoningParser):
    """
    Reasoning parser for Mistral models.

    The Mistral models uses [THINK]...[/THINK] tokens to denote reasoning
    text. This parser extracts the reasoning content from the model output.
    """

23
    def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
Julien Denize's avatar
Julien Denize committed
24
        if not isinstance(tokenizer, MistralTokenizer):
25
            raise ValueError("The tokenizer must be an instance of MistralTokenizer.")
Julien Denize's avatar
Julien Denize committed
26

27
        ReasoningParser.__init__(self, tokenizer, *args, **kwargs)
Julien Denize's avatar
Julien Denize committed
28
29
30
31

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ReasoningParser "
32
33
                "constructor during construction."
            )
Julien Denize's avatar
Julien Denize committed
34

35
36
        self.start_token_id = tokenizer.tokenizer.get_control_token(self.start_token)
        self.end_token_id = tokenizer.tokenizer.get_control_token(self.end_token)
Julien Denize's avatar
Julien Denize committed
37
38
39
40

        if self.start_token_id is None or self.end_token_id is None:
            raise RuntimeError(
                "Mistral reasoning parser could not locate think start/end "
41
42
                "tokens in the tokenizer!"
            )
43
44
45
46
47

    @cached_property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens
48

49
50
51
52
53
54
        return SpecialTokens.begin_think

    @cached_property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens
55

56
        return SpecialTokens.end_think