pooler.py 3.2 KB
Newer Older
1
2
3
4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import hashlib
5
from typing import Any
6
7
8
9
10
11
12
13
14
15
16

from pydantic.dataclasses import dataclass

from vllm.config.utils import config


@config
@dataclass
class PoolerConfig:
    """Controls the behavior of output pooling in pooling models."""

17
    pooling_type: str | None = None
18
19
20
21
22
23
    """
    The pooling method of the pooling model. This should be a key in
    [`vllm.model_executor.layers.pooler.PoolingType`][].
    """

    ## for embeddings models
24
    normalize: bool | None = None
25
26
27
    """
    Whether to normalize the embeddings outputs. Defaults to True.
    """
28
    dimensions: int | None = None
29
30
31
32
    """
    Reduce the dimensions of embeddings if model
    support matryoshka representation. Defaults to None.
    """
33
    enable_chunked_processing: bool | None = None
34
35
36
37
38
39
40
    """
    Whether to enable chunked processing for long inputs that exceed the model's
    maximum position embeddings. When enabled, long inputs will be split into
    chunks, processed separately, and then aggregated using weighted averaging.
    This allows embedding models to handle arbitrarily long text without CUDA
    errors. Defaults to False.
    """
41
    max_embed_len: int | None = None
42
43
44
45
46
47
48
49
50
    """
    Maximum input length allowed for embedding generation. When set, allows
    inputs longer than max_embed_len to be accepted for embedding models.
    When an input exceeds max_embed_len, it will be handled according to 
    the original max_model_len validation logic. 
    Defaults to None (i.e. set to max_model_len).
    """

    ## for classification models
51
    activation: bool | None = None
52
53
54
55
    """
    Whether to apply activation function to the classification outputs.
    Defaults to True.
    """
56
    logit_bias: float | None = None
57
58
59
60
61
    """
    If provided, apply classification logit biases. Defaults to None.
    """

    ## for reward models
62
    softmax: bool | None = None
63
64
65
66
    """
    Whether to apply softmax to the reward outputs.
    Defaults to True.
    """
67
    step_tag_id: int | None = None
68
69
70
71
72
    """
    If set, only the score corresponding to the ``step_tag_id`` in the
    generated sentence should be returned. Otherwise, the scores for all tokens
    are returned.
    """
73
    returned_token_ids: list[int] | None = None
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    """
    A list of indices for the vocabulary dimensions to be extracted,
    such as the token IDs of ``good_token`` and ``bad_token`` in the
    ``math-shepherd-mistral-7b-prm`` model.
    """

    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
95
        hash_str = hashlib.md5(str(factors).encode(), usedforsecurity=False).hexdigest()
96
        return hash_str