processor.py 10.5 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from functools import lru_cache
5
from typing import TYPE_CHECKING, Any, cast
6

7
8
9
10
11
12
from transformers import (
    AutoFeatureExtractor,
    AutoImageProcessor,
    AutoProcessor,
    AutoVideoProcessor,
)
13
14
from transformers.feature_extraction_utils import FeatureExtractionMixin
from transformers.image_processing_utils import BaseImageProcessor
15
from transformers.processing_utils import ProcessorMixin
16
from transformers.video_processing_utils import BaseVideoProcessor
17
18
from typing_extensions import TypeVar

19
from vllm.transformers_utils.utils import convert_model_repo_to_path
20
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
21

22
23
24
25
if TYPE_CHECKING:
    from vllm.config import ModelConfig

_P = TypeVar("_P", bound=ProcessorMixin, default=ProcessorMixin)
26
_V = TypeVar("_V", bound=BaseVideoProcessor, default=BaseVideoProcessor)
27
28
29
30
31
32
33
34
35
36
37
38
39


class HashableDict(dict):
    """
    A dictionary that can be hashed by lru_cache.
    """

    # NOTE: pythonic dict is not hashable,
    # we override on it directly for simplicity
    def __hash__(self) -> int:  # type: ignore[override]
        return hash(frozenset(self.items()))


40
41
42
43
44
45
46
47
48
class HashableList(list):
    """
    A list that can be hashed by lru_cache.
    """

    def __hash__(self) -> int:  # type: ignore[override]
        return hash(tuple(self))


49
def _get_processor_factory_fn(processor_cls: type | tuple[type, ...]):
50
51
52
53
54
55
56
    if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
        return AutoProcessor.from_pretrained
    if hasattr(processor_cls, "from_pretrained"):
        return processor_cls.from_pretrained

    return processor_cls

57

58
59
def _merge_mm_kwargs(
    model_config: "ModelConfig",
60
    processor_cls: type | tuple[type, ...],
61
62
63
64
65
66
67
68
69
70
71
72
73
    /,
    **kwargs,
):
    mm_config = model_config.get_multimodal_config()
    merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs)

    factory = _get_processor_factory_fn(processor_cls)
    allowed_kwargs = get_allowed_kwarg_only_overrides(
        factory,
        merged_kwargs,
        requires_kw_only=False,
        allow_var_kwargs=True,
    )
74
75
76
77

    # NOTE: Pythonic dict is not hashable and will raise unhashable type
    # error when calling `cached_get_processor`, therefore we need to
    # wrap it to a hashable dict.
78
    for key, value in allowed_kwargs.items():
79
        if isinstance(value, dict):
80
            allowed_kwargs[key] = HashableDict(value)
81
        if isinstance(value, list):
82
83
84
            allowed_kwargs[key] = HashableList(value)

    return allowed_kwargs
85

86
87
88

def get_processor(
    processor_name: str,
89
    *args: Any,
90
    revision: str | None = None,
91
    trust_remote_code: bool = False,
92
    processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
93
    **kwargs: Any,
94
) -> _P:
95
    """Load a processor for the given model name via HuggingFace."""
96
97
    if revision is None:
        revision = "main"
98
    try:
99
        processor_name = convert_model_repo_to_path(processor_name)
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
        if isinstance(processor_cls, tuple) or processor_cls == ProcessorMixin:
            processor = AutoProcessor.from_pretrained(
                processor_name,
                *args,
                revision=revision,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        elif issubclass(processor_cls, ProcessorMixin):
            processor = processor_cls.from_pretrained(
                processor_name,
                *args,
                revision=revision,
                trust_remote_code=trust_remote_code,
                **kwargs,
            )
        else:
            # Processors that are standalone classes unrelated to HF
            processor = processor_cls(*args, **kwargs)
119
120
121
122
123
124
125
126
127
128
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the processor. If the processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
129
130
                "`--trust-remote-code` flag in the CLI."
            )
131
132
133
134
            raise RuntimeError(err_msg) from e
        else:
            raise e

135
    if not isinstance(processor, processor_cls):
136
137
138
139
140
        raise TypeError(
            "Invalid type of HuggingFace processor. "
            f"Expected type: {processor_cls}, but "
            f"found type: {type(processor)}"
        )
141
142

    return processor
143
144


145
146
147
cached_get_processor = lru_cache(get_processor)


148
149
def cached_processor_from_config(
    model_config: "ModelConfig",
150
    processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
151
152
153
154
    **kwargs: Any,
) -> _P:
    return cached_get_processor(
        model_config.model,
155
        revision=model_config.revision,
156
157
        trust_remote_code=model_config.trust_remote_code,
        processor_cls=processor_cls,  # type: ignore[arg-type]
158
        **_merge_mm_kwargs(model_config, processor_cls, **kwargs),
159
160
161
    )


162
163
164
def get_feature_extractor(
    processor_name: str,
    *args: Any,
165
    revision: str | None = None,
166
167
168
    trust_remote_code: bool = False,
    **kwargs: Any,
):
169
    """Load an audio feature extractor for the given model name
170
171
    via HuggingFace."""
    try:
172
        processor_name = convert_model_repo_to_path(processor_name)
173
174
175
        feature_extractor = AutoFeatureExtractor.from_pretrained(
            processor_name,
            *args,
176
            revision=revision,
177
            trust_remote_code=trust_remote_code,
178
179
            **kwargs,
        )
180
181
182
183
184
185
186
187
188
189
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoImageProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the feature extractor. If the feature "
                "extractor is a custom extractor not yet available in the "
                "HuggingFace transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
190
191
                "`--trust-remote-code` flag in the CLI."
            )
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
            raise RuntimeError(err_msg) from e
        else:
            raise e
    return cast(FeatureExtractionMixin, feature_extractor)


cached_get_feature_extractor = lru_cache(get_feature_extractor)


def cached_feature_extractor_from_config(
    model_config: "ModelConfig",
    **kwargs: Any,
):
    return cached_get_feature_extractor(
        model_config.model,
207
        revision=model_config.revision,
208
        trust_remote_code=model_config.trust_remote_code,
209
        **_merge_mm_kwargs(model_config, AutoFeatureExtractor, **kwargs),
210
211
212
    )


213
214
215
def get_image_processor(
    processor_name: str,
    *args: Any,
216
    revision: str | None = None,
217
218
219
220
221
    trust_remote_code: bool = False,
    **kwargs: Any,
):
    """Load an image processor for the given model name via HuggingFace."""
    try:
222
        processor_name = convert_model_repo_to_path(processor_name)
223
224
225
        processor = AutoImageProcessor.from_pretrained(
            processor_name,
            *args,
226
            revision=revision,
227
            trust_remote_code=trust_remote_code,
228
229
            **kwargs,
        )
230
231
232
233
234
235
236
237
238
239
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoImageProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the image processor. If the image processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
240
241
                "`--trust-remote-code` flag in the CLI."
            )
242
243
244
245
246
247
248
            raise RuntimeError(err_msg) from e
        else:
            raise e

    return cast(BaseImageProcessor, processor)


249
250
251
252
253
254
255
256
257
cached_get_image_processor = lru_cache(get_image_processor)


def cached_image_processor_from_config(
    model_config: "ModelConfig",
    **kwargs: Any,
):
    return cached_get_image_processor(
        model_config.model,
258
        revision=model_config.revision,
259
        trust_remote_code=model_config.trust_remote_code,
260
        **_merge_mm_kwargs(model_config, AutoImageProcessor, **kwargs),
261
    )
262
263
264
265
266


def get_video_processor(
    processor_name: str,
    *args: Any,
267
    revision: str | None = None,
268
    trust_remote_code: bool = False,
269
    processor_cls_overrides: type[_V] | None = None,
270
271
272
273
    **kwargs: Any,
):
    """Load a video processor for the given model name via HuggingFace."""
    try:
274
        processor_name = convert_model_repo_to_path(processor_name)
275
276
277
278
279
280
        processor_cls = processor_cls_overrides or AutoVideoProcessor
        processor = processor_cls.from_pretrained(
            processor_name,
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
281
282
            **kwargs,
        )
283
284
285
286
287
288
289
290
291
292
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
        # Unlike AutoTokenizer, AutoVideoProcessor does not separate such errors
        if not trust_remote_code:
            err_msg = (
                "Failed to load the video processor. If the video processor is "
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
293
294
                "`--trust-remote-code` flag in the CLI."
            )
295
296
297
298
299
300
301
302
303
304
305
306
            raise RuntimeError(err_msg) from e
        else:
            raise e

    return cast(BaseVideoProcessor, processor)


cached_get_video_processor = lru_cache(get_video_processor)


def cached_video_processor_from_config(
    model_config: "ModelConfig",
307
    processor_cls: type[_V] | None = None,
308
309
310
311
312
313
314
315
316
    **kwargs: Any,
):
    return cached_get_video_processor(
        model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        processor_cls_overrides=processor_cls,  # type: ignore[arg-type]
        **_merge_mm_kwargs(model_config, AutoVideoProcessor, **kwargs),
    )