Unverified Commit 4464723f authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Frontend][Doc][5/N] Improve all pooling task | Polish encode (pooling) api & Document. (#25524)


Signed-off-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
parent 74374386
...@@ -7,6 +7,9 @@ from typing import Any ...@@ -7,6 +7,9 @@ from typing import Any
from pydantic.dataclasses import dataclass from pydantic.dataclasses import dataclass
from vllm.config.utils import config from vllm.config.utils import config
from vllm.logger import init_logger
logger = init_logger(__name__)
@config @config
...@@ -48,7 +51,15 @@ class PoolerConfig: ...@@ -48,7 +51,15 @@ class PoolerConfig:
""" """
## for classification models ## for classification models
activation: bool | None = None softmax: float | None = None
"""
softmax will be deprecated, please use use_activation instead.
"""
activation: float | None = None
"""
activation will be deprecated, please use use_activation instead.
"""
use_activation: bool | None = None
""" """
Whether to apply activation function to the classification outputs. Whether to apply activation function to the classification outputs.
Defaults to True. Defaults to True.
...@@ -59,11 +70,6 @@ class PoolerConfig: ...@@ -59,11 +70,6 @@ class PoolerConfig:
""" """
## for reward models ## for reward models
softmax: bool | None = None
"""
Whether to apply softmax to the reward outputs.
Defaults to True.
"""
step_tag_id: int | None = None step_tag_id: int | None = None
""" """
If set, only the score corresponding to the `step_tag_id` in the If set, only the score corresponding to the `step_tag_id` in the
...@@ -77,6 +83,10 @@ class PoolerConfig: ...@@ -77,6 +83,10 @@ class PoolerConfig:
`math-shepherd-mistral-7b-prm` model. `math-shepherd-mistral-7b-prm` model.
""" """
def __post_init__(self):
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
def compute_hash(self) -> str: def compute_hash(self) -> str:
""" """
WARNING: Whenever a new field is added to this config, WARNING: Whenever a new field is added to this config,
...@@ -94,3 +104,19 @@ class PoolerConfig: ...@@ -94,3 +104,19 @@ class PoolerConfig:
factors: list[Any] = [] factors: list[Any] = []
hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest() hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
return hash_str return hash_str
def get_use_activation(o: object):
if softmax := getattr(o, "softmax", None) is not None:
logger.warning_once(
"softmax will be deprecated, please use use_activation instead."
)
return softmax
if activation := getattr(o, "activation", None) is not None:
logger.warning_once(
"activation will be deprecated, please use use_activation instead."
)
return activation
return getattr(o, "use_activation", None)
...@@ -107,6 +107,7 @@ from vllm.entrypoints.utils import ( ...@@ -107,6 +107,7 @@ from vllm.entrypoints.utils import (
) )
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager from vllm.reasoning import ReasoningParserManager
from vllm.tasks import POOLING_TASKS
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.network_utils import is_valid_ipv6_address from vllm.utils.network_utils import is_valid_ipv6_address
...@@ -1748,12 +1749,7 @@ async def init_app_state( ...@@ -1748,12 +1749,7 @@ async def init_app_state(
log_error_stack=args.log_error_stack, log_error_stack=args.log_error_stack,
) )
) )
if ( if any(task in POOLING_TASKS for task in supported_tasks)
any(
task in supported_tasks
for task in ["token_embed", "token_classify", "plugin"]
)
)
else None else None
) )
state.openai_serving_embedding = ( state.openai_serving_embedding = (
......
...@@ -49,6 +49,8 @@ from openai.types.responses.response_reasoning_item import ( ...@@ -49,6 +49,8 @@ from openai.types.responses.response_reasoning_item import (
) )
from openai_harmony import Message as OpenAIHarmonyMessage from openai_harmony import Message as OpenAIHarmonyMessage
from vllm.config.pooler import get_use_activation
from vllm.tasks import PoolingTask
from vllm.utils.serial_utils import ( from vllm.utils.serial_utils import (
EmbedDType, EmbedDType,
EncodingFormat, EncodingFormat,
...@@ -1669,8 +1671,58 @@ class EmbeddingChatRequest(OpenAIBaseModel): ...@@ -1669,8 +1671,58 @@ class EmbeddingChatRequest(OpenAIBaseModel):
EmbeddingRequest: TypeAlias = EmbeddingCompletionRequest | EmbeddingChatRequest EmbeddingRequest: TypeAlias = EmbeddingCompletionRequest | EmbeddingChatRequest
PoolingCompletionRequest = EmbeddingCompletionRequest
PoolingChatRequest = EmbeddingChatRequest class PoolingCompletionRequest(EmbeddingCompletionRequest):
task: PoolingTask | None = None
softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"If it is a classify or token_classify task, the default is True; "
"for other tasks, this value should be None.",
)
def to_pooling_params(self):
return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens,
dimensions=self.dimensions,
normalize=self.normalize,
use_activation=get_use_activation(self),
)
class PoolingChatRequest(EmbeddingChatRequest):
task: PoolingTask | None = None
softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"If it is a classify or token_classify task, the default is True; "
"for other tasks, this value should be None.",
)
def to_pooling_params(self):
return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens,
dimensions=self.dimensions,
normalize=self.normalize,
use_activation=get_use_activation(self),
)
T = TypeVar("T") T = TypeVar("T")
...@@ -1686,6 +1738,7 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]): ...@@ -1686,6 +1738,7 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
""" """
data: T data: T
task: PoolingTask = "plugin"
encoding_format: EncodingFormat = "float" encoding_format: EncodingFormat = "float"
embed_dtype: EmbedDType = Field( embed_dtype: EmbedDType = Field(
default="float32", default="float32",
...@@ -1749,14 +1802,27 @@ class ScoreRequest(OpenAIBaseModel): ...@@ -1749,14 +1802,27 @@ class ScoreRequest(OpenAIBaseModel):
), ),
) )
activation: bool | None = None softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"Default is True.",
)
# --8<-- [end:score-extra-params] # --8<-- [end:score-extra-params]
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
activation=self.activation, use_activation=get_use_activation(self),
) )
...@@ -1783,14 +1849,27 @@ class RerankRequest(OpenAIBaseModel): ...@@ -1783,14 +1849,27 @@ class RerankRequest(OpenAIBaseModel):
), ),
) )
activation: bool | None = None softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"Default is True.",
)
# --8<-- [end:rerank-extra-params] # --8<-- [end:rerank-extra-params]
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
activation=self.activation, use_activation=get_use_activation(self),
) )
...@@ -1958,14 +2037,27 @@ class ClassificationRequest(OpenAIBaseModel): ...@@ -1958,14 +2037,27 @@ class ClassificationRequest(OpenAIBaseModel):
), ),
) )
activation: bool | None = None softmax: bool | None = Field(
default=None,
description="softmax will be deprecated, please use use_activation instead.",
)
activation: bool | None = Field(
default=None,
description="activation will be deprecated, please use use_activation instead.",
)
use_activation: bool | None = Field(
default=None,
description="Whether to use activation for classification outputs. "
"Default is True.",
)
# --8<-- [end:classification-extra-params] # --8<-- [end:classification-extra-params]
def to_pooling_params(self): def to_pooling_params(self):
return PoolingParams( return PoolingParams(
truncate_prompt_tokens=self.truncate_prompt_tokens, truncate_prompt_tokens=self.truncate_prompt_tokens,
activation=self.activation, use_activation=get_use_activation(self),
) )
......
...@@ -170,6 +170,7 @@ class OpenAIServingPooling(OpenAIServing): ...@@ -170,6 +170,7 @@ class OpenAIServingPooling(OpenAIServing):
pooling_params = request.to_pooling_params() pooling_params = request.to_pooling_params()
pooling_task: PoolingTask pooling_task: PoolingTask
if request.task is None:
if "token_embed" in self.supported_tasks: if "token_embed" in self.supported_tasks:
pooling_task = "token_embed" pooling_task = "token_embed"
elif "token_classify" in self.supported_tasks: elif "token_classify" in self.supported_tasks:
...@@ -180,6 +181,14 @@ class OpenAIServingPooling(OpenAIServing): ...@@ -180,6 +181,14 @@ class OpenAIServingPooling(OpenAIServing):
return self.create_error_response( return self.create_error_response(
f"pooling_task must be one of {self.supported_tasks}." f"pooling_task must be one of {self.supported_tasks}."
) )
else:
pooling_task = request.task
if pooling_task not in self.supported_tasks:
return self.create_error_response(
f"Task {pooling_task} is not supported, it"
f" must be one of {self.supported_tasks}."
)
try: try:
pooling_params.verify(pooling_task, self.model_config) pooling_params.verify(pooling_task, self.model_config)
......
...@@ -607,7 +607,7 @@ class ClassifierPooler(Pooler): ...@@ -607,7 +607,7 @@ class ClassifierPooler(Pooler):
pooled_data -= self.logit_bias pooled_data -= self.logit_bias
pooling_params = get_pooling_params(pooling_metadata) pooling_params = get_pooling_params(pooling_metadata)
flags = [p.activation for p in pooling_params] flags = [p.use_activation for p in pooling_params]
if len(set(flags)) == 1: if len(set(flags)) == 1:
scores = self.act_fn(pooled_data) if flags[0] else pooled_data scores = self.act_fn(pooled_data) if flags[0] else pooled_data
...@@ -681,7 +681,7 @@ class TokenClassifierPoolerHead(nn.Module): ...@@ -681,7 +681,7 @@ class TokenClassifierPoolerHead(nn.Module):
if self.logit_bias is not None: if self.logit_bias is not None:
scores -= self.logit_bias scores -= self.logit_bias
if pooling_param.activation: if pooling_param.use_activation:
scores = self.act_fn(scores) scores = self.act_fn(scores)
# scores shape: [n_token, num_labels] # scores shape: [n_token, num_labels]
......
...@@ -53,8 +53,8 @@ class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig): ...@@ -53,8 +53,8 @@ class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig):
@staticmethod @staticmethod
def verify_and_update_config(vllm_config: "VllmConfig") -> None: def verify_and_update_config(vllm_config: "VllmConfig") -> None:
pooler_config = vllm_config.model_config.pooler_config pooler_config = vllm_config.model_config.pooler_config
if pooler_config.activation is None: if pooler_config.use_activation is None:
pooler_config.activation = False pooler_config.use_activation = False
class JinaRobertaModelConfig(VerifyAndUpdateConfig): class JinaRobertaModelConfig(VerifyAndUpdateConfig):
......
...@@ -2,16 +2,15 @@ ...@@ -2,16 +2,15 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from copy import deepcopy from copy import deepcopy
from typing import TYPE_CHECKING, Annotated, Any, Optional from typing import Annotated, Any, Optional
import msgspec import msgspec
from vllm.config import ModelConfig, PoolerConfig
from vllm.config.pooler import get_use_activation
from vllm.sampling_params import RequestOutputKind from vllm.sampling_params import RequestOutputKind
from vllm.tasks import PoolingTask from vllm.tasks import PoolingTask
if TYPE_CHECKING:
from vllm.config import ModelConfig, PoolerConfig
class PoolingParams( class PoolingParams(
msgspec.Struct, msgspec.Struct,
...@@ -25,10 +24,12 @@ class PoolingParams( ...@@ -25,10 +24,12 @@ class PoolingParams(
Set to -1 to use the model's default truncation size. Set to -1 to use the model's default truncation size.
Set to k to keep only the last k tokens (left truncation). Set to k to keep only the last k tokens (left truncation).
Set to None to disable truncation. Set to None to disable truncation.
normalize: Whether to normalize the embeddings outputs.
dimensions: Reduce the dimensions of embeddings dimensions: Reduce the dimensions of embeddings
if model support matryoshka representation. if model support matryoshka representation.
activation: Whether to apply activation function to normalize: Whether to normalize the embeddings outputs.
softmax: softmax will be deprecated, please use use_activation instead.
activation: activation will be deprecated, please use use_activation instead.
use_activation: Whether to apply activation function to
the classification outputs. the classification outputs.
""" """
...@@ -44,7 +45,9 @@ class PoolingParams( ...@@ -44,7 +45,9 @@ class PoolingParams(
## for classification, scoring and rerank ## for classification, scoring and rerank
# --8<-- [start:classification-pooling-params] # --8<-- [start:classification-pooling-params]
softmax: bool | None = None
activation: bool | None = None activation: bool | None = None
use_activation: bool | None = None
# --8<-- [end:classification-pooling-params] # --8<-- [end:classification-pooling-params]
## for step pooling models ## for step pooling models
...@@ -59,16 +62,16 @@ class PoolingParams( ...@@ -59,16 +62,16 @@ class PoolingParams(
@property @property
def all_parameters(self) -> list[str]: def all_parameters(self) -> list[str]:
return ["dimensions", "normalize", "activation"] return ["dimensions", "normalize", "use_activation"]
@property @property
def valid_parameters(self): def valid_parameters(self):
return { return {
"embed": ["dimensions", "normalize"], "embed": ["dimensions", "normalize"],
"classify": ["activation"], "classify": ["use_activation"],
"score": ["activation"], "score": ["use_activation"],
"token_embed": ["dimensions", "normalize"], "token_embed": ["dimensions", "normalize"],
"token_classify": ["activation"], "token_classify": ["use_activation"],
} }
def clone(self) -> "PoolingParams": def clone(self) -> "PoolingParams":
...@@ -84,6 +87,9 @@ class PoolingParams( ...@@ -84,6 +87,9 @@ class PoolingParams(
msg = f"You cannot overwrite {self.task=!r} with {task=!r}!" msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
raise ValueError(msg) raise ValueError(msg)
# raise deprecated warning for softmax and activation
self.use_activation = get_use_activation(self)
# plugin task uses io_processor.parse_request to verify inputs, # plugin task uses io_processor.parse_request to verify inputs,
# skipping PoolingParams verify # skipping PoolingParams verify
if self.task == "plugin": if self.task == "plugin":
...@@ -168,8 +174,8 @@ class PoolingParams( ...@@ -168,8 +174,8 @@ class PoolingParams(
raise ValueError("Dimensions must be greater than 0") raise ValueError("Dimensions must be greater than 0")
elif self.task in ["classify", "score", "token_classify"]: elif self.task in ["classify", "score", "token_classify"]:
if self.activation is None: if self.use_activation is None:
self.activation = True self.use_activation = True
else: else:
raise ValueError(f"Unknown pooling task: {self.task}") raise ValueError(f"Unknown pooling task: {self.task}")
...@@ -197,7 +203,7 @@ class PoolingParams( ...@@ -197,7 +203,7 @@ class PoolingParams(
f"task={self.task}, " f"task={self.task}, "
f"normalize={self.normalize}, " f"normalize={self.normalize}, "
f"dimensions={self.dimensions}, " f"dimensions={self.dimensions}, "
f"activation={self.activation}, " f"use_activation={self.use_activation}, "
f"step_tag_id={self.step_tag_id}, " f"step_tag_id={self.step_tag_id}, "
f"returned_token_ids={self.returned_token_ids}, " f"returned_token_ids={self.returned_token_ids}, "
f"requires_token_ids={self.requires_token_ids}, " f"requires_token_ids={self.requires_token_ids}, "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment