"examples/backends/trtllm/vscode:/vscode.git/clone" did not exist on "47c4bd46f75cc6deb587b8add25f5dbf601bbb6a"
Unverified Commit 66c079ae authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend][4/n] Improve pooling entrypoints | pooling. (#39153)


Signed-off-by: default avatarwang.yuqi <yuqi.wang@daocloud.io>
parent b6c9be50
...@@ -12,6 +12,23 @@ from vllm.utils.import_utils import resolve_obj_by_qualname ...@@ -12,6 +12,23 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def has_io_processor(
vllm_config: VllmConfig,
plugin_from_init: str | None = None,
):
if plugin_from_init:
model_plugin = plugin_from_init
else:
# A plugin can be specified via the model config
# Retrieve the model specific plugin if available
# This is using a custom field in the hf_config for the model
hf_config = vllm_config.model_config.hf_config.to_dict()
config_plugin = hf_config.get("io_processor_plugin")
model_plugin = config_plugin
return model_plugin is not None
def get_io_processor( def get_io_processor(
vllm_config: VllmConfig, vllm_config: VllmConfig,
renderer: BaseRenderer, renderer: BaseRenderer,
......
...@@ -26,7 +26,6 @@ from vllm.logger import init_logger ...@@ -26,7 +26,6 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.renderers import renderer_from_config from vllm.renderers import renderer_from_config
from vllm.renderers.inputs.preprocess import extract_prompt_components from vllm.renderers.inputs.preprocess import extract_prompt_components
...@@ -133,11 +132,6 @@ class AsyncLLM(EngineClient): ...@@ -133,11 +132,6 @@ class AsyncLLM(EngineClient):
) )
self.renderer = renderer = renderer_from_config(self.vllm_config) self.renderer = renderer = renderer_from_config(self.vllm_config)
self.io_processor = get_io_processor(
self.vllm_config,
self.renderer,
self.model_config.io_processor_plugin,
)
# Convert EngineInput --> EngineCoreRequest. # Convert EngineInput --> EngineCoreRequest.
self.input_processor = InputProcessor(self.vllm_config, renderer) self.input_processor = InputProcessor(self.vllm_config, renderer)
......
...@@ -19,7 +19,6 @@ from vllm.logger import init_logger ...@@ -19,7 +19,6 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.renderers import renderer_from_config from vllm.renderers import renderer_from_config
from vllm.renderers.inputs.preprocess import extract_prompt_components from vllm.renderers.inputs.preprocess import extract_prompt_components
...@@ -90,11 +89,6 @@ class LLMEngine: ...@@ -90,11 +89,6 @@ class LLMEngine:
self.should_execute_dummy_batch = False self.should_execute_dummy_batch = False
self.renderer = renderer = renderer_from_config(self.vllm_config) self.renderer = renderer = renderer_from_config(self.vllm_config)
self.io_processor = get_io_processor(
self.vllm_config,
self.renderer,
self.model_config.io_processor_plugin,
)
# Convert EngineInput --> EngineCoreRequest. # Convert EngineInput --> EngineCoreRequest.
self.input_processor = InputProcessor(self.vllm_config, renderer) self.input_processor = InputProcessor(self.vllm_config, renderer)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment