utils.py 5.79 KB
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
"""Utility functions for vLLM config dataclasses."""
4

5
6
7
import ast
import inspect
import textwrap
8
from collections.abc import Iterable
9
from dataclasses import MISSING, Field, field, fields, is_dataclass, replace
10
from itertools import pairwise
11
from typing import TYPE_CHECKING, Any, Protocol, TypeVar
12
13

import regex as re
14
from pydantic.fields import FieldInfo
15
from typing_extensions import runtime_checkable
16
17
18
19

if TYPE_CHECKING:
    from _typeshed import DataclassInstance
else:
20
    DataclassInstance = Any
21

22
ConfigType = type[DataclassInstance]
23
24
25
26
27
28
29
30
31
32
33
34
35
ConfigT = TypeVar("ConfigT", bound=ConfigType)


def config(cls: ConfigT) -> ConfigT:
    """
    A decorator that ensures all fields in a dataclass have default values
    and that each field has a docstring.

    If a `ConfigT` is used as a CLI argument itself, the `type` keyword argument
    provided by `get_kwargs` will be
    `pydantic.TypeAdapter(ConfigT).validate_json(cli_arg)` which treats the
    `cli_arg` as a JSON string which gets validated by `pydantic`.

36
    Config validation is performed by the tools/pre_commit/validate_config.py
37
38
39
    script, which is invoked during the pre-commit checks.
    """
    return cls
40
41
42
43
44
45
46
47
48
49
50
51
52
53


def get_field(cls: ConfigType, name: str) -> Field:
    """Get the default factory field of a dataclass by name. Used for getting
    default factory fields in `EngineArgs`."""
    if not is_dataclass(cls):
        raise TypeError("The given class is not a dataclass.")
    cls_fields = {f.name: f for f in fields(cls)}
    if name not in cls_fields:
        raise ValueError(f"Field '{name}' not found in {cls.__name__}.")
    named_field: Field = cls_fields[name]
    if (default_factory := named_field.default_factory) is not MISSING:
        return field(default_factory=default_factory)
    if (default := named_field.default) is not MISSING:
54
55
56
57
58
59
        if isinstance(default, FieldInfo):
            # Handle pydantic.Field defaults
            if default.default_factory is not None:
                return field(default_factory=default.default_factory)
            else:
                default = default.default
60
        return field(default=default)
61

62
    raise ValueError(
63
64
        f"{cls.__name__}.{name} must have a default value or default factory."
    )
65
66


67
68
69
70
71
72
73
74
75
76
77
78
def getattr_iter(object: object, names: Iterable[str], default: Any) -> Any:
    """
    A helper function that retrieves an attribute from an object which may
    have multiple possible names. This is useful when fetching attributes from
    arbitrary `transformers.PretrainedConfig` instances.
    """
    for name in names:
        if hasattr(object, name):
            return getattr(object, name)
    return default


79
80
81
82
83
84
85
86
87
88
89
90
91
def contains_object_print(text: str) -> bool:
    """
    Check if the text looks like a printed Python object, e.g.
    contains any substring matching the pattern: "at 0xFFFFFFF>"
    We match against 0x followed by 2-16 hex chars (there's
    a max of 16 on a 64-bit system).

    Args:
        text (str): The text to check

    Returns:
        result (bool): `True` if a match is found, `False` otherwise.
    """
92
    pattern = r"at 0x[a-fA-F0-9]{2,16}>"
93
94
95
96
97
98
99
100
101
102
    match = re.search(pattern, text)
    return match is not None


def assert_hashable(text: str) -> bool:
    if not contains_object_print(text):
        return True
    raise AssertionError(
        f"vLLM tried to hash some configs that may have Python objects ids "
        f"in them. This is a bug, please file an issue. "
103
104
        f"Text being hashed: {text}"
    )
105
106
107
108
109
110
111
112
113


def get_attr_docs(cls: type[Any]) -> dict[str, str]:
    """
    Get any docstrings placed after attribute assignments in a class body.

    https://davidism.com/mit-license/
    """

114
    cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0]
115
116
117
118
119
120
121
122
123

    if not isinstance(cls_node, ast.ClassDef):
        raise TypeError("Given object was not a class.")

    out = {}

    # Consider each pair of nodes.
    for a, b in pairwise(cls_node.body):
        # Must be an assignment then a constant string.
124
125
126
127
128
129
        if (
            not isinstance(a, (ast.Assign, ast.AnnAssign))
            or not isinstance(b, ast.Expr)
            or not isinstance(b.value, ast.Constant)
            or not isinstance(b.value.value, str)
        ):
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
            continue

        doc = inspect.cleandoc(b.value.value)

        # An assignment can have multiple targets (a = b = v), but an
        # annotated assignment only has one target.
        targets = a.targets if isinstance(a, ast.Assign) else [a.target]

        for target in targets:
            # Must be assigning to a plain name.
            if not isinstance(target, ast.Name):
                continue

            out[target.id] = doc

    return out


def is_init_field(cls: ConfigType, name: str) -> bool:
    return next(f for f in fields(cls) if f.name == name).init
150
151
152
153


@runtime_checkable
class SupportsHash(Protocol):
154
    def compute_hash(self) -> str: ...
155
156
157


class SupportsMetricsInfo(Protocol):
158
    def metrics_info(self) -> dict[str, str]: ...
159
160
161
162
163


def update_config(config: ConfigT, overrides: dict[str, Any]) -> ConfigT:
    processed_overrides = {}
    for field_name, value in overrides.items():
164
165
166
        assert hasattr(config, field_name), (
            f"{type(config)} has no field `{field_name}`"
        )
167
168
169
170
        current_value = getattr(config, field_name)
        if is_dataclass(current_value) and not is_dataclass(value):
            assert isinstance(value, dict), (
                f"Overrides to {type(config)}.{field_name} must be a dict"
171
172
                f"  or {type(current_value)}, but got {type(value)}"
            )
173
174
            value = update_config(
                current_value,  # type: ignore[type-var]
175
176
                value,
            )
177
178
        processed_overrides[field_name] = value
    return replace(config, **processed_overrides)