deepseek_v32.py 3.29 KB
Newer Older
1
2
3
4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any

5
from vllm.config import VllmConfig
6
7
8
9
10
11
12
13
14
15
from vllm.entrypoints.chat_utils import (
    ChatCompletionMessageParam,
    ConversationMessage,
    parse_chat_messages,
    parse_chat_messages_async,
)
from vllm.logger import init_logger
from vllm.tokenizers import cached_get_tokenizer
from vllm.tokenizers.deepseek_v32 import DeepseekV32Tokenizer

16
from ..tokenizers.hf import HfTokenizer
17
from .base import BaseRenderer
18
19
from .inputs import DictPrompt
from .inputs.preprocess import parse_dec_only_prompt
20
from .params import ChatParams
21
22
23
24

logger = init_logger(__name__)


25
class DeepseekV32Renderer(BaseRenderer):
26
27
28
    @classmethod
    def from_config(
        cls,
29
        config: VllmConfig,
30
        tokenizer_kwargs: dict[str, Any],
31
    ) -> "BaseRenderer":
32
33
34
35
        return cls(config, tokenizer_kwargs)

    def __init__(
        self,
36
        config: VllmConfig,
37
38
        tokenizer_kwargs: dict[str, Any],
    ) -> None:
39
        super().__init__(config)
40

41
42
        model_config = self.model_config
        if model_config.skip_tokenizer_init:
43
44
45
46
47
48
49
50
51
52
            tokenizer = None
        else:
            tokenizer = cached_get_tokenizer(
                tokenizer_cls=DeepseekV32Tokenizer,
                **tokenizer_kwargs,
            )

        self._tokenizer = tokenizer

    @property
53
    def tokenizer(self) -> HfTokenizer | None:
54
55
        return self._tokenizer

56
    def get_tokenizer(self) -> HfTokenizer:
57
58
59
60
61
62
63
64
65
        tokenizer = self.tokenizer
        if tokenizer is None:
            raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`")

        return tokenizer

    def render_messages(
        self,
        messages: list[ChatCompletionMessageParam],
66
        params: ChatParams,
67
    ) -> tuple[list[ConversationMessage], DictPrompt]:
68
69
70
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = parse_chat_messages(
            messages,
71
            self.model_config,
72
73
74
75
76
77
            content_format="string",
        )

        prompt_raw = tokenizer.apply_chat_template(
            conversation=conversation,
            messages=messages,
78
            **params.get_apply_chat_template_kwargs(),
79
80
        )

81
        prompt = parse_dec_only_prompt(prompt_raw)
82
83
84
85
86
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

87
        return conversation, prompt
88
89
90
91

    async def render_messages_async(
        self,
        messages: list[ChatCompletionMessageParam],
92
        params: ChatParams,
93
    ) -> tuple[list[ConversationMessage], DictPrompt]:
94
95
96
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
            messages,
97
            self.model_config,
98
99
100
101
102
103
            content_format="string",
        )

        prompt_raw = tokenizer.apply_chat_template(
            conversation=conversation,
            messages=messages,
104
            **params.get_apply_chat_template_kwargs(),
105
106
        )

107
        prompt = parse_dec_only_prompt(prompt_raw)
108
109
110
111
112
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

113
        return conversation, prompt