Unverified Commit 66c079ae authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend][4/n] Improve pooling entrypoints | pooling. (#39153)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent b6c9be50
......@@ -12,6 +12,23 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
logger = logging.getLogger(__name__)
def has_io_processor(
vllm_config: VllmConfig,
plugin_from_init: str | None = None,
):
if plugin_from_init:
model_plugin = plugin_from_init
else:
# A plugin can be specified via the model config
# Retrieve the model specific plugin if available
# This is using a custom field in the hf_config for the model
hf_config = vllm_config.model_config.hf_config.to_dict()
config_plugin = hf_config.get("io_processor_plugin")
model_plugin = config_plugin
return model_plugin is not None
def get_io_processor(
vllm_config: VllmConfig,
renderer: BaseRenderer,
......
......@@ -26,7 +26,6 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams
from vllm.renderers import renderer_from_config
from vllm.renderers.inputs.preprocess import extract_prompt_components
......@@ -133,11 +132,6 @@ class AsyncLLM(EngineClient):
)
self.renderer = renderer = renderer_from_config(self.vllm_config)
self.io_processor = get_io_processor(
self.vllm_config,
self.renderer,
self.model_config.io_processor_plugin,
)
# Convert EngineInput --> EngineCoreRequest.
self.input_processor = InputProcessor(self.vllm_config, renderer)
......
......@@ -19,7 +19,6 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams
from vllm.renderers import renderer_from_config
from vllm.renderers.inputs.preprocess import extract_prompt_components
......@@ -90,11 +89,6 @@ class LLMEngine:
self.should_execute_dummy_batch = False
self.renderer = renderer = renderer_from_config(self.vllm_config)
self.io_processor = get_io_processor(
self.vllm_config,
self.renderer,
self.model_config.io_processor_plugin,
)
# Convert EngineInput --> EngineCoreRequest.
self.input_processor = InputProcessor(self.vllm_config, renderer)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment