mistral_reasoning_parser.py 1.97 KB
Newer Older
Julien Denize's avatar
Julien Denize committed
1
2
3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

4
5
from functools import cached_property

Julien Denize's avatar
Julien Denize committed
6
from vllm.logger import init_logger
7
from vllm.reasoning import ReasoningParser
8
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
Julien Denize's avatar
Julien Denize committed
9
10
11
12
13
14
15
16
17
18
19
20
21
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer

logger = init_logger(__name__)


class MistralReasoningParser(DeepSeekR1ReasoningParser):
    """
    Reasoning parser for Mistral models.

    The Mistral models uses [THINK]...[/THINK] tokens to denote reasoning
    text. This parser extracts the reasoning content from the model output.
    """

22
    def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs):
Julien Denize's avatar
Julien Denize committed
23
        if not isinstance(tokenizer, MistralTokenizer):
24
            raise ValueError("The tokenizer must be an instance of MistralTokenizer.")
Julien Denize's avatar
Julien Denize committed
25

26
        ReasoningParser.__init__(self, tokenizer, *args, **kwargs)
Julien Denize's avatar
Julien Denize committed
27
28
29
30

        if not self.model_tokenizer:
            raise ValueError(
                "The model tokenizer must be passed to the ReasoningParser "
31
32
                "constructor during construction."
            )
Julien Denize's avatar
Julien Denize committed
33

34
35
        self.start_token_id = tokenizer.tokenizer.get_control_token(self.start_token)
        self.end_token_id = tokenizer.tokenizer.get_control_token(self.end_token)
Julien Denize's avatar
Julien Denize committed
36
37
38
39

        if self.start_token_id is None or self.end_token_id is None:
            raise RuntimeError(
                "Mistral reasoning parser could not locate think start/end "
40
41
                "tokens in the tokenizer!"
            )
42
43
44
45
46

    @cached_property
    def start_token(self) -> str:
        """The token that starts reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens
47

48
49
50
51
52
53
        return SpecialTokens.begin_think

    @cached_property
    def end_token(self) -> str:
        """The token that ends reasoning content."""
        from mistral_common.tokens.tokenizers.base import SpecialTokens
54

55
        return SpecialTokens.end_think