Unverified Commit 46ad7395 authored by yyzxw's avatar yyzxw Committed by GitHub
Browse files

[FIX] Throwing an exception when the model does not support pool tasks (#25840) (#25855)


Signed-off-by: default avatarzxw <1020938856@qq.com>
Co-authored-by: default avatarwang.yuqi <noooop@126.com>
parent 41f38844
......@@ -399,6 +399,9 @@ def as_reward_model(cls: _T) -> _T:
# Lazy import
from vllm.model_executor.layers.pooler import DispatchPooler, Pooler
from .interfaces_base import default_pooling_type
@default_pooling_type("ALL")
class ModelForReward(_create_pooling_model_cls(cls)):
def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
pooler_config = vllm_config.model_config.pooler_config
......
......@@ -3622,8 +3622,28 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
hidden_states: torch.Tensor,
) -> PoolerOutput:
# Find the task that has the largest output for subsequent steps
supported_pooling_tasks = self.get_supported_pooling_tasks()
if not supported_pooling_tasks:
if self.scheduler_config.chunked_prefill_enabled:
raise RuntimeError(
f"Model {self.model_config.model} does not support "
"any pooling tasks with chunked prefill enabled. "
"Please add --no-enable-chunked-prefill to your "
"config or CLI args. See "
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
"to learn more."
)
else:
raise RuntimeError(
f"Model {self.model_config.model} does not support "
"any pooling tasks. See "
"https://docs.vllm.ai/en/latest/models/pooling_models.html "
"to learn more."
)
output_size = dict[PoolingTask, float]()
for task in self.get_supported_pooling_tasks():
for task in supported_pooling_tasks:
# Run a full batch with each task to ensure none of them OOMs
output = self._dummy_pooler_run_task(hidden_states, task)
output_size[task] = sum(o.nbytes for o in output)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment