[Frontend][Doc][5/N] Improve all pooling task | Polish encode (pooling) api & Document. (#25524)

Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>

[Frontend][Doc][5/N] Improve all pooling task | Polish encode (pooling) api & Document. (#25524)
Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
4464723f · wang.yuqi · GitHub · 74374386 · 4464723f · 4464723f
Unverified Commit 4464723f authored Oct 30, 2025 by wang.yuqi Committed by GitHub Oct 30, 2025
7 changed files
--- a/vllm/config/pooler.py
+++ b/vllm/config/pooler.py
@@ -7,6 +7,9 @@ from typing import Any
 from pydantic.dataclasses import dataclass
 from vllm.config.utils import config
+from vllm.logger import init_logger
+logger = init_logger(__name__)
 @config
@@ -48,7 +51,15 @@ class PoolerConfig:
    """
    ## for classification models
-    activation: bool | None = None
+    softmax: float | None = None
+    """
+    softmax will be deprecated, please use use_activation instead.
+    """
+    activation: float | None = None
+    """
+    activation will be deprecated, please use use_activation instead.
+    """
+    use_activation: bool | None = None
    """
    Whether to apply activation function to the classification outputs.
    Defaults to True.
@@ -59,11 +70,6 @@ class PoolerConfig:
    """
    ## for reward models
-    softmax: bool | None = None
-    """
-    Whether to apply softmax to the reward outputs.
-    Defaults to True.
-    """
    step_tag_id: int | None = None
    """
    If set, only the score corresponding to the `step_tag_id` in the
@@ -77,6 +83,10 @@ class PoolerConfig:
    `math-shepherd-mistral-7b-prm` model.
    """
+    def __post_init__(self):
+        # raise deprecated warning for softmax and activation
+        self.use_activation = get_use_activation(self)
    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
@@ -94,3 +104,19 @@ class PoolerConfig:
        factors: list[Any] = []
        hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
        return hash_str
+def get_use_activation(o: object):
+    if softmax := getattr(o, "softmax", None) is not None:
+        logger.warning_once(
+            "softmax will be deprecated, please use use_activation instead."
+        )
+        return softmax
+    if activation := getattr(o, "activation", None) is not None:
+        logger.warning_once(
+            "activation will be deprecated, please use use_activation instead."
+        )
+        return activation
+    return getattr(o, "use_activation", None)
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -107,6 +107,7 @@ from vllm.entrypoints.utils import (
 )
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
+from vllm.tasks import POOLING_TASKS
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 from vllm.utils.network_utils import is_valid_ipv6_address
@@ -1748,12 +1749,7 @@ async def init_app_state(
                log_error_stack=args.log_error_stack,
            )
        )
-        if (
+        if any(task in POOLING_TASKS for task in supported_tasks)
-            any(
-                task in supported_tasks
-                for task in ["token_embed", "token_classify", "plugin"]
-            )
-        )
        else None
    )
    state.openai_serving_embedding = (

--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -49,6 +49,8 @@ from openai.types.responses.response_reasoning_item import (
 )
 from openai_harmony import Message as OpenAIHarmonyMessage
+from vllm.config.pooler import get_use_activation
+from vllm.tasks import PoolingTask
 from vllm.utils.serial_utils import (
    EmbedDType,
    EncodingFormat,
@@ -1669,8 +1671,58 @@ class EmbeddingChatRequest(OpenAIBaseModel):
 EmbeddingRequest: TypeAlias = EmbeddingCompletionRequest | EmbeddingChatRequest
-PoolingCompletionRequest = EmbeddingCompletionRequest
-PoolingChatRequest = EmbeddingChatRequest
+class PoolingCompletionRequest(EmbeddingCompletionRequest):
+    task: PoolingTask | None = None
+    softmax: bool | None = Field(
+        default=None,
+        description="softmax will be deprecated, please use use_activation instead.",
+    )
+    activation: bool | None = Field(
+        default=None,
+        description="activation will be deprecated, please use use_activation instead.",
+    )
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for classification outputs. "
+        "If it is a classify or token_classify task, the default is True; "
+        "for other tasks, this value should be None.",
+    )
+    def to_pooling_params(self):
+        return PoolingParams(
+            truncate_prompt_tokens=self.truncate_prompt_tokens,
+            dimensions=self.dimensions,
+            normalize=self.normalize,
+            use_activation=get_use_activation(self),
+        )
+class PoolingChatRequest(EmbeddingChatRequest):
+    task: PoolingTask | None = None
+    softmax: bool | None = Field(
+        default=None,
+        description="softmax will be deprecated, please use use_activation instead.",
+    )
+    activation: bool | None = Field(
+        default=None,
+        description="activation will be deprecated, please use use_activation instead.",
+    )
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for classification outputs. "
+        "If it is a classify or token_classify task, the default is True; "
+        "for other tasks, this value should be None.",
+    )
+    def to_pooling_params(self):
+        return PoolingParams(
+            truncate_prompt_tokens=self.truncate_prompt_tokens,
+            dimensions=self.dimensions,
+            normalize=self.normalize,
+            use_activation=get_use_activation(self),
+        )
 T = TypeVar("T")
@@ -1686,6 +1738,7 @@ class IOProcessorRequest(OpenAIBaseModel, Generic[T]):
    """
    data: T
+    task: PoolingTask = "plugin"
    encoding_format: EncodingFormat = "float"
    embed_dtype: EmbedDType = Field(
        default="float32",
@@ -1749,14 +1802,27 @@ class ScoreRequest(OpenAIBaseModel):
        ),
    )
-    activation: bool | None = None
+    softmax: bool | None = Field(
+        default=None,
+        description="softmax will be deprecated, please use use_activation instead.",
+    )
+    activation: bool | None = Field(
+        default=None,
+        description="activation will be deprecated, please use use_activation instead.",
+    )
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for classification outputs. "
+        "Default is True.",
+    )
    # --8<-- [end:score-extra-params]
    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            activation=self.activation,
+            use_activation=get_use_activation(self),
        )
@@ -1783,14 +1849,27 @@ class RerankRequest(OpenAIBaseModel):
        ),
    )
-    activation: bool | None = None
+    softmax: bool | None = Field(
+        default=None,
+        description="softmax will be deprecated, please use use_activation instead.",
+    )
+    activation: bool | None = Field(
+        default=None,
+        description="activation will be deprecated, please use use_activation instead.",
+    )
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for classification outputs. "
+        "Default is True.",
+    )
    # --8<-- [end:rerank-extra-params]
    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            activation=self.activation,
+            use_activation=get_use_activation(self),
        )
@@ -1958,14 +2037,27 @@ class ClassificationRequest(OpenAIBaseModel):
        ),
    )
-    activation: bool | None = None
+    softmax: bool | None = Field(
+        default=None,
+        description="softmax will be deprecated, please use use_activation instead.",
+    )
+    activation: bool | None = Field(
+        default=None,
+        description="activation will be deprecated, please use use_activation instead.",
+    )
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for classification outputs. "
+        "Default is True.",
+    )
    # --8<-- [end:classification-extra-params]
    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            activation=self.activation,
+            use_activation=get_use_activation(self),
        )

--- a/vllm/entrypoints/openai/serving_pooling.py
+++ b/vllm/entrypoints/openai/serving_pooling.py
@@ -170,6 +170,7 @@ class OpenAIServingPooling(OpenAIServing):
                pooling_params = request.to_pooling_params()
            pooling_task: PoolingTask
+            if request.task is None:
                if "token_embed" in self.supported_tasks:
                    pooling_task = "token_embed"
                elif "token_classify" in self.supported_tasks:
@@ -180,6 +181,14 @@ class OpenAIServingPooling(OpenAIServing):
                    return self.create_error_response(
                        f"pooling_task must be one of {self.supported_tasks}."
                    )
+            else:
+                pooling_task = request.task
+            if pooling_task not in self.supported_tasks:
+                return self.create_error_response(
+                    f"Task {pooling_task} is not supported, it"
+                    f" must be one of {self.supported_tasks}."
+                )
            try:
                pooling_params.verify(pooling_task, self.model_config)

--- a/vllm/model_executor/layers/pooler.py
+++ b/vllm/model_executor/layers/pooler.py
@@ -607,7 +607,7 @@ class ClassifierPooler(Pooler):
            pooled_data -= self.logit_bias
        pooling_params = get_pooling_params(pooling_metadata)
-        flags = [p.activation for p in pooling_params]
+        flags = [p.use_activation for p in pooling_params]
        if len(set(flags)) == 1:
            scores = self.act_fn(pooled_data) if flags[0] else pooled_data
@@ -681,7 +681,7 @@ class TokenClassifierPoolerHead(nn.Module):
        if self.logit_bias is not None:
            scores -= self.logit_bias
-        if pooling_param.activation:
+        if pooling_param.use_activation:
            scores = self.act_fn(scores)
        # scores shape: [n_token, num_labels]

--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -53,8 +53,8 @@ class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig):
    @staticmethod
    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
        pooler_config = vllm_config.model_config.pooler_config
-        if pooler_config.activation is None:
+        if pooler_config.use_activation is None:
-            pooler_config.activation = False
+            pooler_config.use_activation = False
 class JinaRobertaModelConfig(VerifyAndUpdateConfig):

--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -2,16 +2,15 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from copy import deepcopy
-from typing import TYPE_CHECKING, Annotated, Any, Optional
+from typing import Annotated, Any, Optional
 import msgspec
+from vllm.config import ModelConfig, PoolerConfig
+from vllm.config.pooler import get_use_activation
 from vllm.sampling_params import RequestOutputKind
 from vllm.tasks import PoolingTask
-if TYPE_CHECKING:
-    from vllm.config import ModelConfig, PoolerConfig
 class PoolingParams(
    msgspec.Struct,
@@ -25,10 +24,12 @@ class PoolingParams(
            Set to -1 to use the model's default truncation size.
            Set to k to keep only the last k tokens (left truncation).
            Set to None to disable truncation.
-        normalize: Whether to normalize the embeddings outputs.
        dimensions: Reduce the dimensions of embeddings
            if model support matryoshka representation.
-        activation: Whether to apply activation function to
+        normalize: Whether to normalize the embeddings outputs.
+        softmax: softmax will be deprecated, please use use_activation instead.
+        activation: activation will be deprecated, please use use_activation instead.
+        use_activation: Whether to apply activation function to
            the classification outputs.
    """
@@ -44,7 +45,9 @@ class PoolingParams(
    ## for classification, scoring and rerank
    # --8<-- [start:classification-pooling-params]
+    softmax: bool | None = None
    activation: bool | None = None
+    use_activation: bool | None = None
    # --8<-- [end:classification-pooling-params]
    ## for step pooling models
@@ -59,16 +62,16 @@ class PoolingParams(
    @property
    def all_parameters(self) -> list[str]:
-        return ["dimensions", "normalize", "activation"]
+        return ["dimensions", "normalize", "use_activation"]
    @property
    def valid_parameters(self):
        return {
            "embed": ["dimensions", "normalize"],
-            "classify": ["activation"],
+            "classify": ["use_activation"],
-            "score": ["activation"],
+            "score": ["use_activation"],
            "token_embed": ["dimensions", "normalize"],
-            "token_classify": ["activation"],
+            "token_classify": ["use_activation"],
        }
    def clone(self) -> "PoolingParams":
@@ -84,6 +87,9 @@ class PoolingParams(
            msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
            raise ValueError(msg)
+        # raise deprecated warning for softmax and activation
+        self.use_activation = get_use_activation(self)
        # plugin task uses io_processor.parse_request to verify inputs,
        # skipping PoolingParams verify
        if self.task == "plugin":
@@ -168,8 +174,8 @@ class PoolingParams(
                    raise ValueError("Dimensions must be greater than 0")
        elif self.task in ["classify", "score", "token_classify"]:
-            if self.activation is None:
+            if self.use_activation is None:
-                self.activation = True
+                self.use_activation = True
        else:
            raise ValueError(f"Unknown pooling task: {self.task}")
@@ -197,7 +203,7 @@ class PoolingParams(
            f"task={self.task}, "
            f"normalize={self.normalize}, "
            f"dimensions={self.dimensions}, "
-            f"activation={self.activation}, "
+            f"use_activation={self.use_activation}, "
            f"step_tag_id={self.step_tag_id}, "
            f"returned_token_ids={self.returned_token_ids}, "
            f"requires_token_ids={self.requires_token_ids}, "