Unverified Commit 1501a407 authored by Jeffrey Wang's avatar Jeffrey Wang Committed by GitHub
Browse files

[Bugfix] Read truncate_prompt_tokens from pooling_params in AsyncLLM.encode() (#31013)


Signed-off-by: default avatarJeffrey Wang <jeffreywang@anyscale.com>
parent ff2168bc
...@@ -71,7 +71,11 @@ class EngineClient(ABC): ...@@ -71,7 +71,11 @@ class EngineClient(ABC):
truncate_prompt_tokens: int | None = None, truncate_prompt_tokens: int | None = None,
tokenization_kwargs: dict[str, Any] | None = None, tokenization_kwargs: dict[str, Any] | None = None,
) -> AsyncGenerator[PoolingRequestOutput, None]: ) -> AsyncGenerator[PoolingRequestOutput, None]:
"""Generate outputs for a request from a pooling model.""" """Generate outputs for a request from a pooling model.
NOTE: truncate_prompt_tokens is deprecated in v0.14.
TODO: Remove this argument in v0.15.
"""
... ...
@abstractmethod @abstractmethod
......
...@@ -4,6 +4,7 @@ import asyncio ...@@ -4,6 +4,7 @@ import asyncio
import os import os
import socket import socket
import time import time
import warnings
from collections.abc import AsyncGenerator, Iterable, Mapping from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy from copy import copy
from typing import Any, cast from typing import Any, cast
...@@ -627,6 +628,9 @@ class AsyncLLM(EngineClient): ...@@ -627,6 +628,9 @@ class AsyncLLM(EngineClient):
The caller of generate() iterates the returned AsyncGenerator, The caller of generate() iterates the returned AsyncGenerator,
returning the RequestOutput back to the caller. returning the RequestOutput back to the caller.
NOTE: truncate_prompt_tokens is deprecated in v0.14.
TODO: Remove truncate_prompt_tokens in v0.15.
""" """
try: try:
...@@ -641,9 +645,19 @@ class AsyncLLM(EngineClient): ...@@ -641,9 +645,19 @@ class AsyncLLM(EngineClient):
if tokenization_kwargs is None: if tokenization_kwargs is None:
tokenization_kwargs = {} tokenization_kwargs = {}
if truncate_prompt_tokens is not None:
warnings.warn(
"The `truncate_prompt_tokens` parameter in `AsyncLLM.encode()` "
"is deprecated and will be removed in v0.15. "
"Please use `pooling_params.truncate_prompt_tokens` instead.",
DeprecationWarning,
stacklevel=2,
)
_validate_truncation_size( _validate_truncation_size(
self.model_config.max_model_len, self.model_config.max_model_len,
truncate_prompt_tokens, pooling_params.truncate_prompt_tokens,
tokenization_kwargs, tokenization_kwargs,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment