Unverified Commit 92924b2d authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Deprecation] Remove deprecated items related to pooling (#33477)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 27cb2f67
......@@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_
- `token_embed` is the same as `embed`, using normalization as the activation.
- `token_classify` is the same as `classify`, by default using softmax as the activation.
### Remove softmax from PoolingParams
We are going to remove `softmax` and `activation` from `PoolingParams` in v0.15. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function.
### as_reward_model
!!! warning
We are going to remove `--convert reward` in v0.15, use `--convert embed` instead.
Pooling models now default support all pooling, you can use it without any settings.
- Extracting hidden states prefers using `token_embed` task.
......
......@@ -75,7 +75,7 @@ else:
logger = init_logger(__name__)
RunnerOption = Literal["auto", RunnerType]
ConvertType = Literal["none", "embed", "classify", "reward", "mm_encoder_only"]
ConvertType = Literal["none", "embed", "classify"]
ConvertOption = Literal["auto", ConvertType]
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
......@@ -499,15 +499,6 @@ class ModelConfig:
)
self.model_arch_config = self.get_model_arch_config()
if self.convert == "mm_encoder_only":
logger.warning_once(
"`--convert mm_encoder_only` is deprecated and "
"will be removed in v0.15. "
"Please use --mm-encoder-only` instead."
)
mm_encoder_only = True
self.convert = "none"
architectures = self.architectures
registry = self.registry
is_generative_model = registry.is_text_generation_model(architectures, self)
......@@ -855,13 +846,6 @@ class ModelConfig:
runner_type: RunnerType,
convert: ConvertOption,
) -> ConvertType:
if convert == "reward":
logger.warning(
"`--convert reward` is deprecated and will be removed in v0.15. "
"Please use `--convert embed` instead."
)
return "embed"
if convert != "auto":
return convert
......
......@@ -45,11 +45,13 @@ class PoolerConfig:
The pooling method used for tokenwise pooling.
"""
## for embeddings models
normalize: bool | None = None
use_activation: bool | None = None
"""
DEPRECATED: please use `use_activation` instead.
Whether to apply activation function to the pooler outputs.
`None` uses the pooler's default, which is `True` in most cases.
"""
## for embedding models
dimensions: int | None = None
"""
Reduce the dimensions of embeddings if model
......@@ -73,19 +75,6 @@ class PoolerConfig:
"""
## for classification models
softmax: float | None = None
"""
DEPRECATED: please use `use_activation` instead.
"""
activation: float | None = None
"""
DEPRECATED: please use `use_activation` instead.
"""
use_activation: bool | None = None
"""
Whether to apply activation function to the classification outputs.
Defaults to True.
"""
logit_bias: float | None = None
"""
If provided, apply classification logit biases. Defaults to None.
......@@ -105,10 +94,7 @@ class PoolerConfig:
`math-shepherd-mistral-7b-prm` model.
"""
def __post_init__(self):
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
def __post_init__(self) -> None:
if pooling_type := self.pooling_type:
if self.seq_pooling_type is not None:
raise ValueError(
......@@ -161,28 +147,3 @@ class PoolerConfig:
factors: list[Any] = []
hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
return hash_str
def get_use_activation(o: object):
if (normalize := getattr(o, "normalize", None)) is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return normalize
if (softmax := getattr(o, "softmax", None)) is not None:
logger.warning_once(
"`softmax` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return softmax
if (activation := getattr(o, "activation", None)) is not None:
logger.warning_once(
"`activation` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return activation
return getattr(o, "use_activation", None)
......@@ -7,16 +7,18 @@ from typing import Annotated, Any
from pydantic import Field, model_validator
from vllm import PoolingParams
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
ChatTemplateContentFormatOption,
)
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel
from vllm.logger import init_logger
from vllm.renderers import ChatParams, merge_kwargs
from vllm.utils import random_uuid
from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness
logger = init_logger(__name__)
class PoolingBasicRequestMixin(OpenAIBaseModel):
# --8<-- [start:pooling-common-params]
......@@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin):
# --8<-- [end:embed-params]
# --8<-- [start:embed-extra-params]
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for the pooler outputs. "
"`None` uses the pooler's default, which is `True` in most cases.",
)
normalize: bool | None = Field(
default=None,
description="Whether to normalize the embeddings outputs. Default is True.",
description="Deprecated; please pass `use_activation` instead",
)
# --8<-- [end:embed-extra-params]
def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams(
dimensions=self.dimensions,
use_activation=self.normalize,
use_activation=self.use_activation,
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
)
class ClassifyRequestMixin(OpenAIBaseModel):
# --8<-- [start:classify-extra-params]
softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"Default is True.",
description="Whether to use activation for the pooler outputs. "
"`None` uses the pooler's default, which is `True` in most cases.",
)
# --8<-- [end:classify-extra-params]
def to_pooling_params(self):
return PoolingParams(
use_activation=get_use_activation(self),
use_activation=self.use_activation,
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
)
......@@ -7,7 +7,6 @@ from pydantic import Field
from vllm import PoolingParams
from vllm.config import ModelConfig
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
from vllm.entrypoints.pooling.base.protocol import (
ChatRequestMixin,
......@@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import (
EncodingRequestMixin,
PoolingBasicRequestMixin,
)
from vllm.logger import init_logger
from vllm.renderers import TokenizeParams
from vllm.tasks import PoolingTask
from vllm.utils import random_uuid
logger = init_logger(__name__)
class PoolingCompletionRequest(
PoolingBasicRequestMixin,
......@@ -43,10 +45,17 @@ class PoolingCompletionRequest(
)
def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation,
dimensions=self.dimensions,
use_activation=get_use_activation(self),
)
......@@ -73,10 +82,17 @@ class PoolingChatRequest(
)
def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation,
dimensions=self.dimensions,
use_activation=get_use_activation(self),
)
......
......@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field
from vllm import PoolingParams
from vllm.config import ModelConfig
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
from vllm.entrypoints.pooling.base.protocol import (
ClassifyRequestMixin,
......@@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
def to_pooling_params(self):
return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=get_use_activation(self),
use_activation=self.use_activation,
)
......
......@@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
def verify_and_update_model_config(model_config: "ModelConfig") -> None:
pooler_config = model_config.pooler_config
if pooler_config.softmax is None:
pooler_config.softmax = False
if pooler_config.use_activation is None:
pooler_config.use_activation = False
class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
......
......@@ -7,7 +7,6 @@ from typing import Annotated, Any
import msgspec
from vllm.config import ModelConfig, PoolerConfig
from vllm.config.pooler import get_use_activation
from vllm.sampling_params import RequestOutputKind
from vllm.tasks import PoolingTask
......@@ -24,30 +23,24 @@ class PoolingParams(
Set to -1 to use the model's default truncation size.
Set to k to keep only the last k tokens (left truncation).
Set to None to disable truncation.
use_activation: Whether to apply activation function to the pooler outputs.
`None` uses the pooler's default, which is `True` in most cases.
dimensions: Reduce the dimensions of embeddings
if model support matryoshka representation.
normalize: Deprecated, please use use_activation instead.
softmax: Deprecated, please use use_activation instead.
activation: Deprecated, please use use_activation instead.
use_activation: Whether to apply activation function to
the classification outputs.
"""
# --8<-- [start:common-pooling-params]
truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
use_activation: bool | None = None
# --8<-- [end:common-pooling-params]
## for embeddings models
# --8<-- [start:embed-pooling-params]
dimensions: int | None = None
normalize: bool | None = None
# --8<-- [end:embed-pooling-params]
## for classification, scoring and rerank
# --8<-- [start:classify-pooling-params]
softmax: bool | None = None
activation: bool | None = None
use_activation: bool | None = None
# --8<-- [end:classify-pooling-params]
## for step pooling models
......@@ -88,9 +81,6 @@ class PoolingParams(
msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
raise ValueError(msg)
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
# plugin task uses io_processor.parse_request to verify inputs,
# skipping PoolingParams verify
if self.task == "plugin":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment