# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from concurrent.futures import ThreadPoolExecutor from typing import Any from vllm.config import ModelConfig from vllm.entrypoints.chat_utils import ( ChatCompletionMessageParam, ConversationMessage, parse_chat_messages, parse_chat_messages_async, ) from vllm.logger import init_logger from vllm.tokenizers import cached_get_tokenizer from vllm.tokenizers.mistral import MistralTokenizer from vllm.utils.async_utils import make_async from .inputs import DictPrompt from .inputs.preprocess import parse_dec_only_prompt from .params import ChatParams from .protocol import BaseRenderer logger = init_logger(__name__) def safe_apply_chat_template( tokenizer: MistralTokenizer, messages: list[ChatCompletionMessageParam], **kwargs, ) -> str | list[int]: from mistral_common.exceptions import MistralCommonException try: return tokenizer.apply_chat_template(messages, **kwargs) # mistral-common uses assert statements to stop processing of input # if input does not comply with the expected format. # We convert those assertion errors to ValueErrors so they can be # properly caught in the preprocessing_input step except (AssertionError, MistralCommonException) as e: raise ValueError(str(e)) from e # External library exceptions can sometimes occur despite the framework's # internal exception management capabilities. except Exception as e: # Log and report any library-related exceptions for further # investigation. logger.exception( "An error occurred in `mistral_common` while applying chat template" ) raise ValueError(str(e)) from e class MistralRenderer(BaseRenderer): @classmethod def from_config( cls, config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> "BaseRenderer": return cls(config, tokenizer_kwargs) def __init__( self, config: ModelConfig, tokenizer_kwargs: dict[str, Any], ) -> None: super().__init__(config) if config.skip_tokenizer_init: tokenizer = None else: tokenizer = cached_get_tokenizer( tokenizer_cls=MistralTokenizer, **tokenizer_kwargs, ) self._tokenizer = tokenizer self._apply_chat_template_executor = ThreadPoolExecutor(max_workers=1) self._apply_chat_template_async = make_async( safe_apply_chat_template, executor=self._apply_chat_template_executor ) @property def tokenizer(self) -> MistralTokenizer | None: return self._tokenizer def get_tokenizer(self) -> MistralTokenizer: tokenizer = self.tokenizer if tokenizer is None: raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`") return tokenizer def render_messages( self, messages: list[ChatCompletionMessageParam], params: ChatParams, ) -> tuple[list[ConversationMessage], DictPrompt]: tokenizer = self.get_tokenizer() conversation, mm_data, mm_uuids = parse_chat_messages( messages, self.config, content_format="string", ) prompt_raw = safe_apply_chat_template( tokenizer, messages, **params.get_apply_chat_template_kwargs(), ) prompt = parse_dec_only_prompt(prompt_raw) if mm_data is not None: prompt["multi_modal_data"] = mm_data if mm_uuids is not None: prompt["multi_modal_uuids"] = mm_uuids return conversation, prompt async def render_messages_async( self, messages: list[ChatCompletionMessageParam], params: ChatParams, ) -> tuple[list[ConversationMessage], DictPrompt]: tokenizer = self.get_tokenizer() conversation, mm_data, mm_uuids = await parse_chat_messages_async( messages, self.config, content_format="string", ) prompt_raw = await self._apply_chat_template_async( tokenizer, messages, **params.get_apply_chat_template_kwargs(), ) prompt = parse_dec_only_prompt(prompt_raw) if mm_data is not None: prompt["multi_modal_data"] = mm_data if mm_uuids is not None: prompt["multi_modal_uuids"] = mm_uuids return conversation, prompt