grok2.py 2.99 KB
Newer Older
1
2
3
4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any

5
from vllm.config import VllmConfig
6
7
8
9
10
11
12
13
14
15
from vllm.entrypoints.chat_utils import (
    ChatCompletionMessageParam,
    ConversationMessage,
    parse_chat_messages,
    parse_chat_messages_async,
)
from vllm.logger import init_logger
from vllm.tokenizers import cached_get_tokenizer
from vllm.tokenizers.grok2 import Grok2Tokenizer

16
from .base import BaseRenderer
17
18
from .inputs import DictPrompt
from .inputs.preprocess import parse_dec_only_prompt
19
from .params import ChatParams
20
21
22
23

logger = init_logger(__name__)


24
class Grok2Renderer(BaseRenderer[Grok2Tokenizer]):
25
    @classmethod
26
    def from_config(  # type: ignore[override]
27
        cls,
28
        config: VllmConfig,
29
        tokenizer_kwargs: dict[str, Any],
30
31
    ) -> "Grok2Renderer":
        model_config = config.model_config
32
        if model_config.skip_tokenizer_init:
33
34
35
36
37
38
39
            tokenizer = None
        else:
            tokenizer = cached_get_tokenizer(
                tokenizer_cls=Grok2Tokenizer,
                **tokenizer_kwargs,
            )

40
        return cls(config, tokenizer)
41
42
43
44

    def render_messages(
        self,
        messages: list[ChatCompletionMessageParam],
45
        params: ChatParams,
46
    ) -> tuple[list[ConversationMessage], DictPrompt]:
47
48
49
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = parse_chat_messages(
            messages,
50
            self.model_config,
51
            content_format="string",
52
            media_io_kwargs=params.media_io_kwargs,
53
            mm_processor_kwargs=params.mm_processor_kwargs,
54
55
56
57
58
        )

        prompt_raw = tokenizer.apply_chat_template(
            conversation=conversation,
            messages=messages,
59
            **params.get_apply_chat_template_kwargs(),
60
61
        )

62
        prompt = parse_dec_only_prompt(prompt_raw)
63
64
65
66
67
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

68
        return conversation, prompt
69
70
71
72

    async def render_messages_async(
        self,
        messages: list[ChatCompletionMessageParam],
73
        params: ChatParams,
74
    ) -> tuple[list[ConversationMessage], DictPrompt]:
75
76
77
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
            messages,
78
            self.model_config,
79
            content_format="string",
80
            media_io_kwargs=params.media_io_kwargs,
81
            mm_processor_kwargs=params.mm_processor_kwargs,
82
83
84
85
86
        )

        prompt_raw = tokenizer.apply_chat_template(
            conversation=conversation,
            messages=messages,
87
            **params.get_apply_chat_template_kwargs(),
88
89
        )

90
        prompt = parse_dec_only_prompt(prompt_raw)
91
92
93
94
95
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

96
        return conversation, prompt