[Misc] Raise error when using encoder/decoder model with cpu backend (#8355)

295c4730 · Kevin Lin · GitHub · 1bf2dd9d · 295c4730 · 295c4730
Unverified Commit 295c4730 authored Sep 12, 2024 by Kevin Lin Committed by GitHub Sep 12, 2024
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 1 deletion

vllm/utils.py vllm/utils.py +4 -0

vllm/worker/cpu_model_runner.py vllm/worker/cpu_model_runner.py +5 -1

No files found.
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -82,6 +82,9 @@ STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER = ("Prompt adapters are not "
                                       "currently supported with encoder/"
                                       "decoder models.")

+STR_NOT_IMPL_ENC_DEC_CPU = ("CPU is not currently supported with "
+                            "encoder/decoder models.")
+
 # Efficiently import all enc/dec error strings
 # rather than having to import all of the above
 STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
@@ -97,6 +100,7 @@ STR_NOT_IMPL_ENC_DEC_ERR_STRS = {
    "STR_NOT_IMPL_ENC_DEC_CUDA_GRAPH": STR_NOT_IMPL_ENC_DEC_CUDAGRAPH,
    "STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND,
    "STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER": STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER,
+    "STR_NOT_IMPL_ENC_DEC_CPU": STR_NOT_IMPL_ENC_DEC_CPU
 }

 # Constants related to forcing the attention backend selection

--- a/vllm/worker/cpu_model_runner.py
+++ b/vllm/worker/cpu_model_runner.py
@@ -15,7 +15,7 @@ from vllm.model_executor.model_loader import get_model
 from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
                             MultiModalInputs)
 from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
-from vllm.utils import make_tensor_with_pad
+from vllm.utils import STR_NOT_IMPL_ENC_DEC_ERR_STRS, make_tensor_with_pad
 from vllm.worker.model_runner_base import (
    ModelRunnerBase, ModelRunnerInputBase,
    _add_attn_metadata_broadcastable_dict,
@@ -121,6 +121,10 @@ class CPUModelRunner(ModelRunnerBase[CPUModelInput]):
        # Lazy initialization.
        self.model: nn.Module  # Set after init_Model

+        if self.model_config.is_encoder_decoder_model:
+            raise NotImplementedError(
+                STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_CPU'])
+
    def load_model(self) -> None:
        self.model = get_model(model_config=self.model_config,
                               load_config=self.load_config,