Unverified Commit f0d52517 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Voxtral models] Skip warm-up to skip confusing error message in warm-up (#33576)


Signed-off-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 5c4f2dd6
...@@ -138,6 +138,9 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -138,6 +138,9 @@ class OpenAISpeechToText(OpenAIServing):
if not supports_transcription(self.model_cls): if not supports_transcription(self.model_cls):
return return
if getattr(self.model_cls, "skip_warmup_audio_preprocessing", False):
return
try: try:
warmup_start = time.perf_counter() warmup_start = time.perf_counter()
logger.info("Warming up audio preprocessing libraries...") logger.info("Warming up audio preprocessing libraries...")
...@@ -150,9 +153,7 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -150,9 +153,7 @@ class OpenAISpeechToText(OpenAIServing):
_ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate) _ = librosa.get_duration(y=dummy_audio, sr=self.asr_config.sample_rate)
# Warm up mel-spectrogram computation with model-specific parameters # Warm up mel-spectrogram computation with model-specific parameters
from vllm.transformers_utils.processor import ( from vllm.transformers_utils.processor import cached_processor_from_config
cached_processor_from_config,
)
processor = cached_processor_from_config(self.model_config) processor = cached_processor_from_config(self.model_config)
feature_extractor = None feature_extractor = None
......
...@@ -335,6 +335,9 @@ class VoxtralForConditionalGeneration( ...@@ -335,6 +335,9 @@ class VoxtralForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
): ):
supported_languages = ISO639_1_SUPPORTED_LANGS supported_languages = ISO639_1_SUPPORTED_LANGS
# transformers' currently has limited support for MistralCommon backend
# and cached_get_processor. Let's skip until fixed
skip_warmup_audio_preprocessing = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
......
...@@ -218,6 +218,9 @@ class VoxtralRealtimeBuffer: ...@@ -218,6 +218,9 @@ class VoxtralRealtimeBuffer:
@support_torch_compile @support_torch_compile
class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime): class VoxtralRealtimeGeneration(VoxtralForConditionalGeneration, SupportsRealtime):
requires_raw_input_tokens = True requires_raw_input_tokens = True
# transformers' currently has limited support for MistralCommon backend
# and cached_get_processor. Let's skip until fixed
skip_warmup_audio_preprocessing = True
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix) super().__init__(vllm_config=vllm_config, prefix=prefix)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment