[BugFix] Prevent `LLM.encode` for non-generation Models (#5184)

Co-authored-by: mgoin <michael@neuralmagic.com>

[BugFix] Prevent `LLM.encode` for non-generation Models (#5184)
Co-authored-by: mgoin <michael@neuralmagic.com>
044793d8 · Robert Shaw · GitHub · c2d6d2f9 · 044793d8
Unverified Commit 044793d8 authored Jun 01, 2024 by Robert Shaw Committed by GitHub Jun 01, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 0 deletions

vllm/entrypoints/llm.py vllm/entrypoints/llm.py +10 -0

No files found.
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -276,6 +276,11 @@ class LLM:
            considered legacy and may be deprecated in the future. You should
            instead pass them via the ``inputs`` parameter.
        """
+        if self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.generate() is only supported for generation models "
+                "(XForCausalLM).")
+
        if prompt_token_ids is not None or multi_modal_data is not None:
            inputs = self._convert_v1_inputs(
                prompts=cast(Optional[Union[str, List[str]]], prompts),
@@ -420,6 +425,11 @@ class LLM:
            considered legacy and may be deprecated in the future. You should
            instead pass them via the ``inputs`` parameter.
        """
+        if not self.llm_engine.model_config.embedding_mode:
+            raise ValueError(
+                "LLM.encode() is only supported for embedding models (XModel)."
+            )
+
        if prompt_token_ids is not None or multi_modal_data is not None:
            inputs = self._convert_v1_inputs(
                prompts=cast(Optional[Union[str, List[str]]], prompts),