Unverified Commit f54f8512 authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Model][2/N] Improve all pooling task | Support multi-vector retrieval (#25370)


Signed-off-by: default avatarwang.yuqi <noooop@126.com>
parent d4d1a602
......@@ -1926,15 +1926,16 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
supported_tasks = list(model.pooler.get_supported_tasks())
if (
self.scheduler_config.chunked_prefill_enabled
and "encode" in supported_tasks
):
supported_tasks.remove("encode")
if self.scheduler_config.chunked_prefill_enabled:
if "token_embed" in supported_tasks:
supported_tasks.remove("token_embed")
if "token_classify" in supported_tasks:
supported_tasks.remove("token_classify")
logger.debug_once(
"Chunked prefill is not supported with "
"encode task which using ALL pooling. "
"token_embed and token_classify tasks "
"which using ALL pooling. "
"Please turn off chunked prefill by "
"`--no-enable-chunked-prefill` before using it."
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment