Unverified Commit 1c2bc7ea authored by Gabriel Marinho's avatar Gabriel Marinho Committed by GitHub
Browse files

Truncation control for embedding models (#14776)


Signed-off-by: default avatarGabriel Marinho <gmarinho@ibm.com>
Signed-off-by: default avatarMax de Bayser <mbayser@br.ibm.com>
Co-authored-by: default avatarMax de Bayser <mbayser@br.ibm.com>
parent 4055130a
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import time import time
from collections.abc import Mapping, Sequence from collections.abc import Mapping, Sequence
from typing import Literal, Optional, Union from typing import Any, Literal, Optional, Union
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs
...@@ -198,6 +198,7 @@ class Processor: ...@@ -198,6 +198,7 @@ class Processor:
params: Union[SamplingParams, PoolingParams], params: Union[SamplingParams, PoolingParams],
arrival_time: Optional[float] = None, arrival_time: Optional[float] = None,
lora_request: Optional[LoRARequest] = None, lora_request: Optional[LoRARequest] = None,
tokenization_kwargs: Optional[dict[str, Any]] = None,
trace_headers: Optional[Mapping[str, str]] = None, trace_headers: Optional[Mapping[str, str]] = None,
prompt_adapter_request: Optional[PromptAdapterRequest] = None, prompt_adapter_request: Optional[PromptAdapterRequest] = None,
priority: int = 0, priority: int = 0,
...@@ -224,6 +225,7 @@ class Processor: ...@@ -224,6 +225,7 @@ class Processor:
# 3. Apply prompt adapter to prompt token ids if one exists. # 3. Apply prompt adapter to prompt token ids if one exists.
processed_inputs: ProcessorInputs = self.input_preprocessor.preprocess( processed_inputs: ProcessorInputs = self.input_preprocessor.preprocess(
prompt, prompt,
tokenization_kwargs=tokenization_kwargs,
lora_request=lora_request, lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request, prompt_adapter_request=prompt_adapter_request,
return_mm_hashes=self.use_hash, return_mm_hashes=self.use_hash,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment