Unverified Commit 92924b2d authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Deprecation] Remove deprecated items related to pooling (#33477)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 27cb2f67
...@@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_ ...@@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_
- `token_embed` is the same as `embed`, using normalization as the activation. - `token_embed` is the same as `embed`, using normalization as the activation.
- `token_classify` is the same as `classify`, by default using softmax as the activation. - `token_classify` is the same as `classify`, by default using softmax as the activation.
### Remove softmax from PoolingParams
We are going to remove `softmax` and `activation` from `PoolingParams` in v0.15. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function.
### as_reward_model
!!! warning
We are going to remove `--convert reward` in v0.15, use `--convert embed` instead.
Pooling models now default support all pooling, you can use it without any settings. Pooling models now default support all pooling, you can use it without any settings.
- Extracting hidden states prefers using `token_embed` task. - Extracting hidden states prefers using `token_embed` task.
......
...@@ -75,7 +75,7 @@ else: ...@@ -75,7 +75,7 @@ else:
logger = init_logger(__name__) logger = init_logger(__name__)
RunnerOption = Literal["auto", RunnerType] RunnerOption = Literal["auto", RunnerType]
ConvertType = Literal["none", "embed", "classify", "reward", "mm_encoder_only"] ConvertType = Literal["none", "embed", "classify"]
ConvertOption = Literal["auto", ConvertType] ConvertOption = Literal["auto", ConvertType]
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"] TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
...@@ -499,15 +499,6 @@ class ModelConfig: ...@@ -499,15 +499,6 @@ class ModelConfig:
) )
self.model_arch_config = self.get_model_arch_config() self.model_arch_config = self.get_model_arch_config()
if self.convert == "mm_encoder_only":
logger.warning_once(
"`--convert mm_encoder_only` is deprecated and "
"will be removed in v0.15. "
"Please use --mm-encoder-only` instead."
)
mm_encoder_only = True
self.convert = "none"
architectures = self.architectures architectures = self.architectures
registry = self.registry registry = self.registry
is_generative_model = registry.is_text_generation_model(architectures, self) is_generative_model = registry.is_text_generation_model(architectures, self)
...@@ -855,13 +846,6 @@ class ModelConfig: ...@@ -855,13 +846,6 @@ class ModelConfig:
runner_type: RunnerType, runner_type: RunnerType,
convert: ConvertOption, convert: ConvertOption,
) -> ConvertType: ) -> ConvertType:
if convert == "reward":
logger.warning(
"`--convert reward` is deprecated and will be removed in v0.15. "
"Please use `--convert embed` instead."
)
return "embed"
if convert != "auto": if convert != "auto":
return convert return convert
......
...@@ -45,11 +45,13 @@ class PoolerConfig: ...@@ -45,11 +45,13 @@ class PoolerConfig:
The pooling method used for tokenwise pooling. The pooling method used for tokenwise pooling.
""" """
## for embeddings models use_activation: bool | None = None
normalize: bool | None = None
""" """
DEPRECATED: please use `use_activation` instead. Whether to apply activation function to the pooler outputs.
`None` uses the pooler's default, which is `True` in most cases.
""" """
## for embedding models
dimensions: int | None = None dimensions: int | None = None
""" """
Reduce the dimensions of embeddings if model Reduce the dimensions of embeddings if model
...@@ -73,19 +75,6 @@ class PoolerConfig: ...@@ -73,19 +75,6 @@ class PoolerConfig:
""" """
## for classification models ## for classification models
softmax: float | None = None
"""
DEPRECATED: please use `use_activation` instead.
"""
activation: float | None = None
"""
DEPRECATED: please use `use_activation` instead.
"""
use_activation: bool | None = None
"""
Whether to apply activation function to the classification outputs.
Defaults to True.
"""
logit_bias: float | None = None logit_bias: float | None = None
""" """
If provided, apply classification logit biases. Defaults to None. If provided, apply classification logit biases. Defaults to None.
...@@ -105,10 +94,7 @@ class PoolerConfig: ...@@ -105,10 +94,7 @@ class PoolerConfig:
`math-shepherd-mistral-7b-prm` model. `math-shepherd-mistral-7b-prm` model.
""" """
def __post_init__(self): def __post_init__(self) -> None:
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
if pooling_type := self.pooling_type: if pooling_type := self.pooling_type:
if self.seq_pooling_type is not None: if self.seq_pooling_type is not None:
raise ValueError( raise ValueError(
...@@ -161,28 +147,3 @@ class PoolerConfig: ...@@ -161,28 +147,3 @@ class PoolerConfig:
factors: list[Any] = [] factors: list[Any] = []
hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest() hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
return hash_str return hash_str
def get_use_activation(o: object):
if (normalize := getattr(o, "normalize", None)) is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return normalize
if (softmax := getattr(o, "softmax", None)) is not None:
logger.warning_once(
"`softmax` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return softmax
if (activation := getattr(o, "activation", None)) is not None:
logger.warning_once(
"`activation` is deprecated and will be removed in v0.15. "
"Please use `use_activation` instead."
)
return activation
return getattr(o, "use_activation", None)
...@@ -7,16 +7,18 @@ from typing import Annotated, Any ...@@ -7,16 +7,18 @@ from typing import Annotated, Any
from pydantic import Field, model_validator from pydantic import Field, model_validator
from vllm import PoolingParams from vllm import PoolingParams
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.chat_utils import ( from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam, ChatCompletionMessageParam,
ChatTemplateContentFormatOption, ChatTemplateContentFormatOption,
) )
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel
from vllm.logger import init_logger
from vllm.renderers import ChatParams, merge_kwargs from vllm.renderers import ChatParams, merge_kwargs
from vllm.utils import random_uuid from vllm.utils import random_uuid
from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness
logger = init_logger(__name__)
class PoolingBasicRequestMixin(OpenAIBaseModel): class PoolingBasicRequestMixin(OpenAIBaseModel):
# --8<-- [start:pooling-common-params] # --8<-- [start:pooling-common-params]
...@@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin): ...@@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin):
# --8<-- [end:embed-params] # --8<-- [end:embed-params]
# --8<-- [start:embed-extra-params] # --8<-- [start:embed-extra-params]
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for the pooler outputs. "
"`None` uses the pooler's default, which is `True` in most cases.",
)
normalize: bool | None = Field( normalize: bool | None = Field(
default=None, default=None,
description="Whether to normalize the embeddings outputs. Default is True.", description="Deprecated; please pass `use_activation` instead",
) )
# --8<-- [end:embed-extra-params] # --8<-- [end:embed-extra-params]
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
dimensions=self.dimensions, dimensions=self.dimensions,
use_activation=self.normalize, use_activation=self.use_activation,
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None), truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
) )
class ClassifyRequestMixin(OpenAIBaseModel): class ClassifyRequestMixin(OpenAIBaseModel):
# --8<-- [start:classify-extra-params] # --8<-- [start:classify-extra-params]
softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field( use_activation: bool | None = Field(
default=None, default=None,
description="Whether to use activation for classification outputs. " description="Whether to use activation for the pooler outputs. "
"Default is True.", "`None` uses the pooler's default, which is `True` in most cases.",
) )
# --8<-- [end:classify-extra-params] # --8<-- [end:classify-extra-params]
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
use_activation=get_use_activation(self), use_activation=self.use_activation,
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None), truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
) )
...@@ -7,7 +7,6 @@ from pydantic import Field ...@@ -7,7 +7,6 @@ from pydantic import Field
from vllm import PoolingParams from vllm import PoolingParams
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
from vllm.entrypoints.pooling.base.protocol import ( from vllm.entrypoints.pooling.base.protocol import (
ChatRequestMixin, ChatRequestMixin,
...@@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import ( ...@@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import (
EncodingRequestMixin, EncodingRequestMixin,
PoolingBasicRequestMixin, PoolingBasicRequestMixin,
) )
from vllm.logger import init_logger
from vllm.renderers import TokenizeParams from vllm.renderers import TokenizeParams
from vllm.tasks import PoolingTask from vllm.tasks import PoolingTask
from vllm.utils import random_uuid from vllm.utils import random_uuid
logger = init_logger(__name__)
class PoolingCompletionRequest( class PoolingCompletionRequest(
PoolingBasicRequestMixin, PoolingBasicRequestMixin,
...@@ -43,10 +45,17 @@ class PoolingCompletionRequest( ...@@ -43,10 +45,17 @@ class PoolingCompletionRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation,
dimensions=self.dimensions, dimensions=self.dimensions,
use_activation=get_use_activation(self),
) )
...@@ -73,10 +82,17 @@ class PoolingChatRequest( ...@@ -73,10 +82,17 @@ class PoolingChatRequest(
) )
def to_pooling_params(self): def to_pooling_params(self):
if self.normalize is not None:
logger.warning_once(
"`normalize` is deprecated and will be removed in v0.17. "
"Please pass `use_activation` instead."
)
self.use_activation = self.normalize
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=self.use_activation,
dimensions=self.dimensions, dimensions=self.dimensions,
use_activation=get_use_activation(self),
) )
......
...@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field ...@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field
from vllm import PoolingParams from vllm import PoolingParams
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.config.pooler import get_use_activation
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
from vllm.entrypoints.pooling.base.protocol import ( from vllm.entrypoints.pooling.base.protocol import (
ClassifyRequestMixin, ClassifyRequestMixin,
...@@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin): ...@@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
use_activation=get_use_activation(self), use_activation=self.use_activation,
) )
......
...@@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig): ...@@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
def verify_and_update_model_config(model_config: "ModelConfig") -> None: def verify_and_update_model_config(model_config: "ModelConfig") -> None:
pooler_config = model_config.pooler_config pooler_config = model_config.pooler_config
if pooler_config.softmax is None: if pooler_config.use_activation is None:
pooler_config.softmax = False pooler_config.use_activation = False
class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig): class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
......
...@@ -7,7 +7,6 @@ from typing import Annotated, Any ...@@ -7,7 +7,6 @@ from typing import Annotated, Any
import msgspec import msgspec
from vllm.config import ModelConfig, PoolerConfig from vllm.config import ModelConfig, PoolerConfig
from vllm.config.pooler import get_use_activation
from vllm.sampling_params import RequestOutputKind from vllm.sampling_params import RequestOutputKind
from vllm.tasks import PoolingTask from vllm.tasks import PoolingTask
...@@ -24,30 +23,24 @@ class PoolingParams( ...@@ -24,30 +23,24 @@ class PoolingParams(
Set to -1 to use the model's default truncation size. Set to -1 to use the model's default truncation size.
Set to k to keep only the last k tokens (left truncation). Set to k to keep only the last k tokens (left truncation).
Set to None to disable truncation. Set to None to disable truncation.
use_activation: Whether to apply activation function to the pooler outputs.
`None` uses the pooler's default, which is `True` in most cases.
dimensions: Reduce the dimensions of embeddings dimensions: Reduce the dimensions of embeddings
if model support matryoshka representation. if model support matryoshka representation.
normalize: Deprecated, please use use_activation instead.
softmax: Deprecated, please use use_activation instead.
activation: Deprecated, please use use_activation instead.
use_activation: Whether to apply activation function to
the classification outputs.
""" """
# --8<-- [start:common-pooling-params] # --8<-- [start:common-pooling-params]
truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
use_activation: bool | None = None
# --8<-- [end:common-pooling-params] # --8<-- [end:common-pooling-params]
## for embeddings models ## for embeddings models
# --8<-- [start:embed-pooling-params] # --8<-- [start:embed-pooling-params]
dimensions: int | None = None dimensions: int | None = None
normalize: bool | None = None
# --8<-- [end:embed-pooling-params] # --8<-- [end:embed-pooling-params]
## for classification, scoring and rerank ## for classification, scoring and rerank
# --8<-- [start:classify-pooling-params] # --8<-- [start:classify-pooling-params]
softmax: bool | None = None
activation: bool | None = None
use_activation: bool | None = None
# --8<-- [end:classify-pooling-params] # --8<-- [end:classify-pooling-params]
## for step pooling models ## for step pooling models
...@@ -88,9 +81,6 @@ class PoolingParams( ...@@ -88,9 +81,6 @@ class PoolingParams(
msg = f"You cannot overwrite {self.task=!r} with {task=!r}!" msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
raise ValueError(msg) raise ValueError(msg)
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
# plugin task uses io_processor.parse_request to verify inputs, # plugin task uses io_processor.parse_request to verify inputs,
# skipping PoolingParams verify # skipping PoolingParams verify
if self.task == "plugin": if self.task == "plugin":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment