Unverified Commit 586f2867 authored by wang.yuqi's avatar wang.yuqi Committed by GitHub
Browse files

[Model] Pooling model activation supports per request control by PoolingParams (#20538)


Signed-off-by: default avatarwang.yuqi <noooop@126.com>
parent 811ac13d
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from copy import deepcopy
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
import msgspec import msgspec
...@@ -19,13 +20,25 @@ class PoolingParams( ...@@ -19,13 +20,25 @@ class PoolingParams(
"""API parameters for pooling models. """API parameters for pooling models.
Attributes: Attributes:
normalize: Whether to normalize the embeddings outputs.
dimensions: Reduce the dimensions of embeddings dimensions: Reduce the dimensions of embeddings
if model support matryoshka representation. if model support matryoshka representation.
activation: Whether to apply activation function to
the classification outputs.
softmax: Whether to apply softmax to the reward outputs.
""" """
## for embeddings models
dimensions: Optional[int] = None dimensions: Optional[int] = None
normalize: Optional[bool] = None
output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY ## for classification models
activation: Optional[bool] = None
## for reward models
softmax: Optional[bool] = None
step_tag_id: Optional[int] = None
returned_token_ids: Optional[list[int]] = None
task: Optional[PoolingTask] = None task: Optional[PoolingTask] = None
"""Internal use only.""" """Internal use only."""
...@@ -33,15 +46,32 @@ class PoolingParams( ...@@ -33,15 +46,32 @@ class PoolingParams(
requires_token_ids: bool = False requires_token_ids: bool = False
"""Internal use only.""" """Internal use only."""
output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY
@property
def all_parameters(self) -> list[str]:
return [
"dimensions", "normalize", "activation", "softmax", "step_tag_id",
"returned_token_ids"
]
@property
def valid_parameters(self):
return {
"embed": ["dimensions", "normalize"],
"classify": ["activation"],
"score": ["activation"],
"encode": ["softmax", "step_tag_id", "returned_token_ids"],
}
def clone(self) -> "PoolingParams": def clone(self) -> "PoolingParams":
"""Returns a deep copy of the PoolingParams instance.""" """Returns a deep copy of the PoolingParams instance."""
return PoolingParams( return deepcopy(self)
dimensions=self.dimensions,
task=self.task, def verify(self,
requires_token_ids=self.requires_token_ids, task: PoolingTask,
) model_config: Optional["ModelConfig"] = None) -> None:
def verify(self, task: PoolingTask, model_config: "ModelConfig") -> None:
if self.task is None: if self.task is None:
self.task = task self.task = task
elif self.task != task: elif self.task != task:
...@@ -52,28 +82,91 @@ class PoolingParams( ...@@ -52,28 +82,91 @@ class PoolingParams(
# which is not available in model config. So, it's not included # which is not available in model config. So, it's not included
# in this method # in this method
if self.dimensions is not None: self._merge_default_parameters(model_config)
if not model_config.is_matryoshka: self._set_default_parameters(model_config)
raise ValueError( self._verify_valid_parameters()
f'Model "{model_config.served_model_name}" does not '
f'support matryoshka representation, ' def _merge_default_parameters(self,
f'changing output dimensions will lead to poor results.') model_config: Optional["ModelConfig"] = None
) -> None:
if model_config is None:
return
mds = model_config.matryoshka_dimensions pooler_config = model_config.pooler_config
if mds is not None: if pooler_config is None:
if self.dimensions not in mds: return
assert self.task is not None, "task must be set"
valid_parameters = self.valid_parameters[self.task]
for k in valid_parameters:
if getattr(pooler_config, k, None) is None:
continue
if getattr(self, k, None) is None:
setattr(self, k, getattr(pooler_config, k))
def _set_default_parameters(self, model_config: Optional["ModelConfig"]):
if self.task == "embed":
if self.normalize is None:
self.normalize = True
if self.dimensions is not None and model_config is not None:
if not model_config.is_matryoshka:
raise ValueError( raise ValueError(
f'Model "{model_config.served_model_name}" ' f'Model "{model_config.served_model_name}" does not '
f'only supports {str(mds)} matryoshka dimensions, ' f'support matryoshka representation, '
f'use other output dimensions will ' f'changing output dimensions will lead to poor results.'
f'lead to poor results.') )
elif self.dimensions < 1:
raise ValueError("Dimensions must be greater than 0") mds = model_config.matryoshka_dimensions
if mds is not None:
if self.dimensions not in mds:
raise ValueError(
f'Model "{model_config.served_model_name}" '
f'only supports {str(mds)} matryoshka dimensions, '
f'use other output dimensions will '
f'lead to poor results.')
elif self.dimensions < 1:
raise ValueError("Dimensions must be greater than 0")
elif self.task in ["classify", "score"]:
if self.activation is None:
self.activation = True
elif self.task == "encode":
if self.softmax is None:
self.softmax = True
else:
raise ValueError(f"Unknown pooling task: {self.task}")
def _verify_valid_parameters(self):
assert self.task is not None, "task must be set"
valid_parameters = self.valid_parameters[self.task]
invalid_parameters = []
for k in self.all_parameters:
if k in valid_parameters:
continue
if getattr(self, k, None) is not None:
invalid_parameters.append(k)
if invalid_parameters:
raise ValueError(
f"Task {self.task} only supports {valid_parameters} "
f"parameters, does not support "
f"{invalid_parameters} parameters")
def __repr__(self) -> str: def __repr__(self) -> str:
return (f"PoolingParams(" return (f"PoolingParams("
f"dimensions={self.dimensions}, "
f"task={self.task}, " f"task={self.task}, "
f"normalize={self.normalize}, "
f"dimensions={self.dimensions}, "
f"activation={self.activation}, "
f"softmax={self.softmax}, "
f"step_tag_id={self.step_tag_id}, "
f"returned_token_ids={self.returned_token_ids}, "
f"requires_token_ids={self.requires_token_ids})") f"requires_token_ids={self.requires_token_ids})")
def __post_init__(self) -> None: def __post_init__(self) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment