Unverified Commit e113a301 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Deprecation] Deprecate code in 0.17 as scheduled (#35441)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 1dafb29f
...@@ -683,13 +683,13 @@ async def test_params_not_supported( ...@@ -683,13 +683,13 @@ async def test_params_not_supported(
@pytest.mark.asyncio @pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_normalize(server: RemoteOpenAIServer, model_name: str): async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
async def get_outputs(normalize): async def get_outputs(use_activation):
request_args = { request_args = {
"model": MODEL_NAME, "model": MODEL_NAME,
"input": input_text, "input": input_text,
"encoding_format": "float", "encoding_format": "float",
"normalize": normalize, "use_activation": use_activation,
} }
response = requests.post(server.url_for("v1/embeddings"), json=request_args) response = requests.post(server.url_for("v1/embeddings"), json=request_args)
...@@ -697,9 +697,9 @@ async def test_normalize(server: RemoteOpenAIServer, model_name: str): ...@@ -697,9 +697,9 @@ async def test_normalize(server: RemoteOpenAIServer, model_name: str):
return torch.tensor([x["embedding"] for x in outputs["data"]]) return torch.tensor([x["embedding"] for x in outputs["data"]])
default = await get_outputs(normalize=None) default = await get_outputs(use_activation=None)
w_normal = await get_outputs(normalize=True) w_normal = await get_outputs(use_activation=True)
wo_normal = await get_outputs(normalize=False) wo_normal = await get_outputs(use_activation=False)
assert torch.allclose(default, w_normal, atol=1e-2), "Default should use normal." assert torch.allclose(default, w_normal, atol=1e-2), "Default should use normal."
assert not torch.allclose(w_normal, wo_normal, atol=1e-2), ( assert not torch.allclose(w_normal, wo_normal, atol=1e-2), (
......
...@@ -101,11 +101,15 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer): ...@@ -101,11 +101,15 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
sampling_params = self._sampling_params_from_proto( sampling_params = self._sampling_params_from_proto(
request.sampling_params, stream=request.stream request.sampling_params, stream=request.stream
) )
tokenization_kwargs = self._tokenization_kwargs_from_proto(
request.sampling_params
)
async for output in self.async_llm.generate( async for output in self.async_llm.generate(
prompt=prompt, prompt=prompt,
sampling_params=sampling_params, sampling_params=sampling_params,
request_id=request_id, request_id=request_id,
tokenization_kwargs=tokenization_kwargs,
): ):
# Convert vLLM output to protobuf # Convert vLLM output to protobuf
# For streaming, always send chunks # For streaming, always send chunks
...@@ -308,9 +312,6 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer): ...@@ -308,9 +312,6 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
seed=params.seed if params.HasField("seed") else None, seed=params.seed if params.HasField("seed") else None,
include_stop_str_in_output=params.include_stop_str_in_output, include_stop_str_in_output=params.include_stop_str_in_output,
logit_bias=dict(params.logit_bias) if params.logit_bias else None, logit_bias=dict(params.logit_bias) if params.logit_bias else None,
truncate_prompt_tokens=params.truncate_prompt_tokens
if params.HasField("truncate_prompt_tokens")
else None,
structured_outputs=structured_outputs, structured_outputs=structured_outputs,
# detokenize must be True if stop strings are used # detokenize must be True if stop strings are used
detokenize=bool(stop), detokenize=bool(stop),
...@@ -319,6 +320,14 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer): ...@@ -319,6 +320,14 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
else RequestOutputKind.FINAL_ONLY, else RequestOutputKind.FINAL_ONLY,
) )
@staticmethod
def _tokenization_kwargs_from_proto(
params: vllm_engine_pb2.SamplingParams,
) -> dict[str, int] | None:
if params.HasField("truncate_prompt_tokens"):
return {"truncate_prompt_tokens": params.truncate_prompt_tokens}
return None
@staticmethod @staticmethod
def _chunk_response(output: RequestOutput) -> vllm_engine_pb2.GenerateResponse: def _chunk_response(output: RequestOutput) -> vllm_engine_pb2.GenerateResponse:
""" """
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import itertools import itertools
import warnings
from collections.abc import Callable, Iterable, Sequence from collections.abc import Callable, Iterable, Sequence
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
...@@ -1030,7 +1029,6 @@ class LLM: ...@@ -1030,7 +1029,6 @@ class LLM:
prompts: PromptType | Sequence[PromptType] | DataPrompt, prompts: PromptType | Sequence[PromptType] | DataPrompt,
pooling_params: PoolingParams | Sequence[PoolingParams] | None = None, pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
*, *,
truncate_prompt_tokens: int | None = None,
use_tqdm: bool | Callable[..., tqdm] = True, use_tqdm: bool | Callable[..., tqdm] = True,
lora_request: list[LoRARequest] | LoRARequest | None = None, lora_request: list[LoRARequest] | LoRARequest | None = None,
pooling_task: PoolingTask | None = None, pooling_task: PoolingTask | None = None,
...@@ -1088,20 +1086,6 @@ class LLM: ...@@ -1088,20 +1086,6 @@ class LLM:
"pooling model." "pooling model."
) )
if truncate_prompt_tokens is not None:
warnings.warn(
"The `truncate_prompt_tokens` parameter in `LLM.encode()` "
"is deprecated and will be removed in v0.16. "
"Please pass it via `tokenization_kwargs` instead.",
DeprecationWarning,
stacklevel=2,
)
tokenization_kwargs = merge_kwargs(
tokenization_kwargs,
dict(truncate_prompt_tokens=truncate_prompt_tokens),
)
if use_io_processor := (isinstance(prompts, dict) and "data" in prompts): if use_io_processor := (isinstance(prompts, dict) and "data" in prompts):
if self.io_processor is None: if self.io_processor is None:
raise ValueError( raise ValueError(
...@@ -1185,7 +1169,6 @@ class LLM: ...@@ -1185,7 +1169,6 @@ class LLM:
self, self,
prompts: PromptType | Sequence[PromptType], prompts: PromptType | Sequence[PromptType],
*, *,
truncate_prompt_tokens: int | None = None,
use_tqdm: bool | Callable[..., tqdm] = True, use_tqdm: bool | Callable[..., tqdm] = True,
pooling_params: PoolingParams | Sequence[PoolingParams] | None = None, pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
lora_request: list[LoRARequest] | LoRARequest | None = None, lora_request: list[LoRARequest] | LoRARequest | None = None,
...@@ -1221,12 +1204,6 @@ class LLM: ...@@ -1221,12 +1204,6 @@ class LLM:
"Try converting the model using `--convert embed`." "Try converting the model using `--convert embed`."
) )
if truncate_prompt_tokens is not None:
tokenization_kwargs = merge_kwargs(
tokenization_kwargs,
dict(truncate_prompt_tokens=truncate_prompt_tokens),
)
items = self.encode( items = self.encode(
prompts, prompts,
use_tqdm=use_tqdm, use_tqdm=use_tqdm,
...@@ -1294,7 +1271,6 @@ class LLM: ...@@ -1294,7 +1271,6 @@ class LLM:
/, /,
*, *,
pooling_params: PoolingParams | Sequence[PoolingParams] | None = None, pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
truncate_prompt_tokens: int | None = None,
use_tqdm: bool | Callable[..., tqdm] = True, use_tqdm: bool | Callable[..., tqdm] = True,
lora_request: list[LoRARequest] | LoRARequest | None = None, lora_request: list[LoRARequest] | LoRARequest | None = None,
tokenization_kwargs: dict[str, Any] | None = None, tokenization_kwargs: dict[str, Any] | None = None,
...@@ -1319,13 +1295,11 @@ class LLM: ...@@ -1319,13 +1295,11 @@ class LLM:
A list of `PoolingRequestOutput` objects containing the A list of `PoolingRequestOutput` objects containing the
pooled hidden states in the same order as the input prompts. pooled hidden states in the same order as the input prompts.
""" """
return self.encode( return self.encode(
prompts, prompts,
use_tqdm=use_tqdm, use_tqdm=use_tqdm,
lora_request=lora_request, lora_request=lora_request,
pooling_params=pooling_params, pooling_params=pooling_params,
truncate_prompt_tokens=truncate_prompt_tokens,
pooling_task="token_classify", pooling_task="token_classify",
tokenization_kwargs=tokenization_kwargs, tokenization_kwargs=tokenization_kwargs,
) )
...@@ -1771,23 +1745,15 @@ class LLM: ...@@ -1771,23 +1745,15 @@ class LLM:
seq_prompts = prompt_to_seq(prompts) seq_prompts = prompt_to_seq(prompts)
seq_params = self._params_to_seq(params, len(seq_prompts)) seq_params = self._params_to_seq(params, len(seq_prompts))
seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_prompts)) seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_prompts))
seq_tok_kwargs = [
merge_kwargs(
tokenization_kwargs,
dict(truncate_prompt_tokens=param.truncate_prompt_tokens),
)
for param in seq_params
]
seq_priority = self._priority_to_seq(priority, len(prompts)) seq_priority = self._priority_to_seq(priority, len(prompts))
return self._render_and_add_requests( return self._render_and_add_requests(
prompts=( prompts=(
self._preprocess_cmpl_one(prompt, tok_kwargs) self._preprocess_cmpl_one(prompt, tokenization_kwargs)
for prompt, tok_kwargs in zip( for prompt in maybe_tqdm(
maybe_tqdm( seq_prompts,
seq_prompts, use_tqdm=use_tqdm, desc="Rendering prompts" use_tqdm=use_tqdm,
), desc="Rendering prompts",
seq_tok_kwargs,
) )
), ),
params=seq_params, params=seq_params,
...@@ -1841,13 +1807,6 @@ class LLM: ...@@ -1841,13 +1807,6 @@ class LLM:
seq_convs = conversation_to_seq(messages) seq_convs = conversation_to_seq(messages)
seq_params = self._params_to_seq(params, len(seq_convs)) seq_params = self._params_to_seq(params, len(seq_convs))
seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_convs)) seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_convs))
seq_tok_kwargs = [
merge_kwargs(
tokenization_kwargs,
dict(truncate_prompt_tokens=param.truncate_prompt_tokens),
)
for param in seq_params
]
return self._render_and_run_requests( return self._render_and_run_requests(
prompts=( prompts=(
...@@ -1859,16 +1818,13 @@ class LLM: ...@@ -1859,16 +1818,13 @@ class LLM:
add_generation_prompt=add_generation_prompt, add_generation_prompt=add_generation_prompt,
continue_final_message=continue_final_message, continue_final_message=continue_final_message,
tools=tools, tools=tools,
tokenization_kwargs=tok_kwargs, tokenization_kwargs=tokenization_kwargs,
mm_processor_kwargs=mm_processor_kwargs, mm_processor_kwargs=mm_processor_kwargs,
) )
for conversation, tok_kwargs in zip( for conversation in maybe_tqdm(
maybe_tqdm( seq_convs,
seq_convs, use_tqdm=use_tqdm,
use_tqdm=use_tqdm, desc="Rendering conversations",
desc="Rendering conversations",
),
seq_tok_kwargs,
) )
), ),
params=seq_params, params=seq_params,
......
...@@ -490,7 +490,6 @@ class ChatCompletionRequest(OpenAIBaseModel): ...@@ -490,7 +490,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
skip_special_tokens=self.skip_special_tokens, skip_special_tokens=self.skip_special_tokens,
spaces_between_special_tokens=self.spaces_between_special_tokens, spaces_between_special_tokens=self.spaces_between_special_tokens,
include_stop_str_in_output=self.include_stop_str_in_output, include_stop_str_in_output=self.include_stop_str_in_output,
truncate_prompt_tokens=self.truncate_prompt_tokens,
output_kind=RequestOutputKind.DELTA output_kind=RequestOutputKind.DELTA
if self.stream if self.stream
else RequestOutputKind.FINAL_ONLY, else RequestOutputKind.FINAL_ONLY,
......
...@@ -302,7 +302,6 @@ class CompletionRequest(OpenAIBaseModel): ...@@ -302,7 +302,6 @@ class CompletionRequest(OpenAIBaseModel):
skip_special_tokens=self.skip_special_tokens, skip_special_tokens=self.skip_special_tokens,
spaces_between_special_tokens=self.spaces_between_special_tokens, spaces_between_special_tokens=self.spaces_between_special_tokens,
include_stop_str_in_output=self.include_stop_str_in_output, include_stop_str_in_output=self.include_stop_str_in_output,
truncate_prompt_tokens=self.truncate_prompt_tokens,
output_kind=RequestOutputKind.DELTA output_kind=RequestOutputKind.DELTA
if self.stream if self.stream
else RequestOutputKind.FINAL_ONLY, else RequestOutputKind.FINAL_ONLY,
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
warnings.warn(
"The 'vllm.entrypoints.openai.translations' module has been renamed to "
"'vllm.entrypoints.openai.speech_to_text'. Please update your imports. "
"This backward-compatible alias will be removed in version 0.17+.",
DeprecationWarning,
stacklevel=2,
)
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
warnings.warn(
"'vllm.entrypoints.openai.translations.api_router' has been moved to "
"'vllm.entrypoints.openai.speech_to_text.api_router'. Please update your "
"imports. This backward-compatible alias will be removed in version 0.17+.",
DeprecationWarning,
stacklevel=2,
)
from vllm.entrypoints.openai.speech_to_text.api_router import * # noqa: F401,F403,E402
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
warnings.warn(
"'vllm.entrypoints.openai.translations.protocol' has been moved to "
"'vllm.entrypoints.openai.speech_to_text.protocol'. Please update your "
"imports. This backward-compatible alias will be removed in version 0.17+.",
DeprecationWarning,
stacklevel=2,
)
from vllm.entrypoints.openai.speech_to_text.protocol import * # noqa: F401,F403,E402
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
warnings.warn(
"'vllm.entrypoints.openai.translations.serving' has been moved to "
"'vllm.entrypoints.openai.speech_to_text.serving'. Please update your "
"imports. This backward-compatible alias will be removed in version 0.17+.",
DeprecationWarning,
stacklevel=2,
)
from vllm.entrypoints.openai.speech_to_text.serving import * # noqa: F401,F403,E402
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import warnings
warnings.warn(
"'vllm.entrypoints.openai.translations.speech_to_text' has been moved to "
"'vllm.entrypoints.openai.speech_to_text.speech_to_text'. Please update "
"your imports. This backward-compatible alias will be removed in version "
"0.17+.",
DeprecationWarning,
stacklevel=2,
)
from vllm.entrypoints.openai.speech_to_text.speech_to_text import * # noqa: F401,F403,E402
...@@ -190,10 +190,6 @@ class EmbedRequestMixin(EncodingRequestMixin): ...@@ -190,10 +190,6 @@ class EmbedRequestMixin(EncodingRequestMixin):
description="Whether to use activation for the pooler outputs. " description="Whether to use activation for the pooler outputs. "
"`None` uses the pooler's default, which is `True` in most cases.", "`None` uses the pooler's default, which is `True` in most cases.",
) )
normalize: bool | None = Field(
default=None,
description="Deprecated; please pass `use_activation` instead",
)
# --8<-- [end:embed-extra-params] # --8<-- [end:embed-extra-params]
......
...@@ -40,7 +40,6 @@ class ClassificationCompletionRequest( ...@@ -40,7 +40,6 @@ class ClassificationCompletionRequest(
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
task="classify", task="classify",
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
) )
...@@ -63,7 +62,6 @@ class ClassificationChatRequest( ...@@ -63,7 +62,6 @@ class ClassificationChatRequest(
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
task="classify", task="classify",
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
) )
......
...@@ -14,12 +14,9 @@ from vllm.entrypoints.pooling.base.protocol import ( ...@@ -14,12 +14,9 @@ from vllm.entrypoints.pooling.base.protocol import (
EmbedRequestMixin, EmbedRequestMixin,
PoolingBasicRequestMixin, PoolingBasicRequestMixin,
) )
from vllm.logger import init_logger
from vllm.renderers import TokenizeParams from vllm.renderers import TokenizeParams
from vllm.utils import random_uuid from vllm.utils import random_uuid
logger = init_logger(__name__)
def _get_max_total_output_tokens( def _get_max_total_output_tokens(
model_config: ModelConfig, model_config: ModelConfig,
...@@ -60,18 +57,10 @@ class EmbeddingCompletionRequest( ...@@ -60,18 +57,10 @@ class EmbeddingCompletionRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
task="embed", task="embed",
dimensions=self.dimensions, dimensions=self.dimensions,
use_activation=self.use_activation, use_activation=self.use_activation,
truncate_prompt_tokens=self.truncate_prompt_tokens,
) )
...@@ -97,18 +86,10 @@ class EmbeddingChatRequest( ...@@ -97,18 +86,10 @@ class EmbeddingChatRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
task="embed", task="embed",
dimensions=self.dimensions, dimensions=self.dimensions,
use_activation=self.use_activation, use_activation=self.use_activation,
truncate_prompt_tokens=self.truncate_prompt_tokens,
) )
......
...@@ -16,13 +16,10 @@ from vllm.entrypoints.pooling.base.protocol import ( ...@@ -16,13 +16,10 @@ from vllm.entrypoints.pooling.base.protocol import (
EncodingRequestMixin, EncodingRequestMixin,
PoolingBasicRequestMixin, PoolingBasicRequestMixin,
) )
from vllm.logger import init_logger
from vllm.renderers import TokenizeParams from vllm.renderers import TokenizeParams
from vllm.tasks import PoolingTask from vllm.tasks import PoolingTask
from vllm.utils import random_uuid from vllm.utils import random_uuid
logger = init_logger(__name__)
class PoolingCompletionRequest( class PoolingCompletionRequest(
PoolingBasicRequestMixin, PoolingBasicRequestMixin,
...@@ -45,16 +42,8 @@ class PoolingCompletionRequest( ...@@ -45,16 +42,8 @@ class PoolingCompletionRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
task=self.task, task=self.task,
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
dimensions=self.dimensions, dimensions=self.dimensions,
) )
...@@ -78,16 +67,8 @@ class PoolingChatRequest( ...@@ -78,16 +67,8 @@ class PoolingChatRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
task=self.task, task=self.task,
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
dimensions=self.dimensions, dimensions=self.dimensions,
) )
......
...@@ -37,7 +37,6 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin): ...@@ -37,7 +37,6 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
def to_pooling_params(self, task: PoolingTask = "score"): def to_pooling_params(self, task: PoolingTask = "score"):
return PoolingParams( return PoolingParams(
task=task, task=task,
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
) )
...@@ -113,7 +112,6 @@ class RerankRequest(PoolingBasicRequestMixin, ClassifyRequestMixin): ...@@ -113,7 +112,6 @@ class RerankRequest(PoolingBasicRequestMixin, ClassifyRequestMixin):
def to_pooling_params(self, task: PoolingTask = "score"): def to_pooling_params(self, task: PoolingTask = "score"):
return PoolingParams( return PoolingParams(
task=task, task=task,
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation, use_activation=self.use_activation,
) )
......
...@@ -289,9 +289,6 @@ def get_temporal_copy_spec( ...@@ -289,9 +289,6 @@ def get_temporal_copy_spec(
) )
get_full_copy_spec = get_temporal_copy_spec
class MambaStateCopyFuncCalculator: class MambaStateCopyFuncCalculator:
@classmethod @classmethod
def linear_attention_state_copy_func(cls): def linear_attention_state_copy_func(cls):
......
...@@ -43,12 +43,9 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape ...@@ -43,12 +43,9 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
IMAGE_TOKEN = "<image>" IMAGE_TOKEN = "<image>"
IMAGE_PLACEHOLDER_ID = 151669
VIDEO_TOKEN = "<video>" VIDEO_TOKEN = "<video>"
VIDEO_PLACEHOLDER_ID = 151670
INDICATOR_IDS = [151672, 151673, 151674, 151675] INDICATOR_IDS = [151672, 151673, 151674, 151675]
IMAGE_PAD_TOKEN_ID = 151655 IMAGE_PAD_TOKEN_ID = 151655
THINK_END_TOKEN_ID = 151668
class Ovis2_5ImagePatchInputs(TensorSchema): class Ovis2_5ImagePatchInputs(TensorSchema):
......
...@@ -17,7 +17,7 @@ from typing import ( ...@@ -17,7 +17,7 @@ from typing import (
import regex as re import regex as re
import torch import torch
from typing_extensions import TypeVar, assert_never, deprecated from typing_extensions import TypeVar, assert_never
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
...@@ -996,16 +996,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): ...@@ -996,16 +996,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
self.data_parser = self.info.get_data_parser() self.data_parser = self.info.get_data_parser()
@property
@deprecated("Will be removed in v0.17. Use `info.supported_mm_limits` instead.")
def supported_mm_limits(self):
return self.info.supported_mm_limits
@property
@deprecated("Will be removed in v0.17. Use `info.allowed_mm_limits` instead.")
def allowed_mm_limits(self):
return self.info.allowed_mm_limits
def __call__( def __call__(
self, self,
prompt: str, prompt: str,
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import mimetypes import mimetypes
import warnings
from collections import defaultdict from collections import defaultdict
from collections.abc import Generator, Sequence from collections.abc import Generator, Sequence
from itertools import groupby from itertools import groupby
...@@ -30,23 +29,6 @@ else: ...@@ -30,23 +29,6 @@ else:
torch = LazyLoader("torch", globals(), "torch") torch = LazyLoader("torch", globals(), "torch")
def __getattr__(name: str):
if name == "MEDIA_CONNECTOR_REGISTRY":
from .media import MEDIA_CONNECTOR_REGISTRY
warnings.warn(
"`vllm.multimodal.utils.MEDIA_CONNECTOR_REGISTRY` "
"has been moved to `vllm.multimodal.media.MEDIA_CONNECTOR_REGISTRY`. "
"The old name will be removed in v0.17.",
DeprecationWarning,
stacklevel=2,
)
return MEDIA_CONNECTOR_REGISTRY
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def encode_audio_base64( def encode_audio_base64(
audio: np.ndarray, audio: np.ndarray,
sampling_rate: int, sampling_rate: int,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from copy import deepcopy from copy import deepcopy
from typing import Annotated, Any from typing import Any
import msgspec import msgspec
...@@ -19,10 +19,6 @@ class PoolingParams( ...@@ -19,10 +19,6 @@ class PoolingParams(
"""API parameters for pooling models. """API parameters for pooling models.
Attributes: Attributes:
truncate_prompt_tokens: Controls prompt truncation.
Set to -1 to use the model's default truncation size.
Set to k to keep only the last k tokens (left truncation).
Set to None to disable truncation.
use_activation: Whether to apply activation function to the pooler outputs. use_activation: Whether to apply activation function to the pooler outputs.
`None` uses the pooler's default, which is `True` in most cases. `None` uses the pooler's default, which is `True` in most cases.
dimensions: Reduce the dimensions of embeddings dimensions: Reduce the dimensions of embeddings
...@@ -30,7 +26,6 @@ class PoolingParams( ...@@ -30,7 +26,6 @@ class PoolingParams(
""" """
# --8<-- [start:common-pooling-params] # --8<-- [start:common-pooling-params]
truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
use_activation: bool | None = None use_activation: bool | None = None
# --8<-- [end:common-pooling-params] # --8<-- [end:common-pooling-params]
...@@ -198,7 +193,6 @@ class PoolingParams( ...@@ -198,7 +193,6 @@ class PoolingParams(
f"returned_token_ids={self.returned_token_ids}, " f"returned_token_ids={self.returned_token_ids}, "
f"requires_token_ids={self.requires_token_ids}, " f"requires_token_ids={self.requires_token_ids}, "
f"skip_reading_prefix_cache={self.skip_reading_prefix_cache}, " f"skip_reading_prefix_cache={self.skip_reading_prefix_cache}, "
f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
f"extra_kwargs={self.extra_kwargs})" f"extra_kwargs={self.extra_kwargs})"
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment