__init__.py 44.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sylvain Gugger's avatar
Sylvain Gugger committed
15
16
17
import io
import json
import os
18
import warnings
19
from pathlib import Path
20
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
21

22
from huggingface_hub import model_info
23
from numpy import isin
Sylvain Gugger's avatar
Sylvain Gugger committed
24

25
from ..configuration_utils import PretrainedConfig
Sylvain Gugger's avatar
Sylvain Gugger committed
26
from ..dynamic_module_utils import get_class_from_dynamic_module
27
from ..feature_extraction_utils import PreTrainedFeatureExtractor
28
from ..image_processing_utils import BaseImageProcessor
29
30
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
31
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
32
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
33
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
34
from ..tokenization_utils import PreTrainedTokenizer
35
36
37
from ..utils import (
    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
    is_kenlm_available,
38
    is_offline_mode,
39
40
41
42
43
    is_pyctcdecode_available,
    is_tf_available,
    is_torch_available,
    logging,
)
44
from .audio_classification import AudioClassificationPipeline
45
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
46
47
48
49
50
51
52
53
from .base import (
    ArgumentHandler,
    CsvPipelineDataFormat,
    JsonPipelineDataFormat,
    PipedPipelineDataFormat,
    Pipeline,
    PipelineDataFormat,
    PipelineException,
54
    PipelineRegistry,
55
    get_default_model_and_revision,
56
    infer_framework_load_model,
57
58
)
from .conversational import Conversation, ConversationalPipeline
59
from .depth_estimation import DepthEstimationPipeline
60
from .document_question_answering import DocumentQuestionAnsweringPipeline
61
62
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
63
from .image_classification import ImageClassificationPipeline
64
from .image_segmentation import ImageSegmentationPipeline
65
from .image_to_text import ImageToTextPipeline
66
from .object_detection import ObjectDetectionPipeline
67
68
69
70
71
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline
from .text_classification import TextClassificationPipeline
from .text_generation import TextGenerationPipeline
72
73
74
75
76
77
from .token_classification import (
    AggregationStrategy,
    NerPipeline,
    TokenClassificationArgumentHandler,
    TokenClassificationPipeline,
)
78
from .video_classification import VideoClassificationPipeline
79
from .visual_question_answering import VisualQuestionAnsweringPipeline
80
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline
81
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
82
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
83
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
84
85
86
87
88
89
90
91
92
93
94
95
96


if is_tf_available():
    import tensorflow as tf

    from ..models.auto.modeling_tf_auto import (
        TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
        TF_MODEL_WITH_LM_HEAD_MAPPING,
        TFAutoModel,
        TFAutoModelForCausalLM,
97
        TFAutoModelForImageClassification,
98
99
100
101
        TFAutoModelForMaskedLM,
        TFAutoModelForQuestionAnswering,
        TFAutoModelForSeq2SeqLM,
        TFAutoModelForSequenceClassification,
Kamal Raj's avatar
Kamal Raj committed
102
        TFAutoModelForTableQuestionAnswering,
103
        TFAutoModelForTokenClassification,
104
        TFAutoModelForVision2Seq,
105
        TFAutoModelForZeroShotImageClassification,
106
107
108
109
110
111
112
113
114
115
116
117
    )

if is_torch_available():
    import torch

    from ..models.auto.modeling_auto import (
        MODEL_FOR_MASKED_LM_MAPPING,
        MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
118
        MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
119
        AutoModel,
120
        AutoModelForAudioClassification,
121
        AutoModelForCausalLM,
122
        AutoModelForCTC,
123
        AutoModelForDocumentQuestionAnswering,
124
        AutoModelForImageClassification,
125
        AutoModelForImageSegmentation,
126
        AutoModelForMaskedLM,
127
        AutoModelForObjectDetection,
128
        AutoModelForQuestionAnswering,
129
        AutoModelForSemanticSegmentation,
130
131
        AutoModelForSeq2SeqLM,
        AutoModelForSequenceClassification,
132
        AutoModelForSpeechSeq2Seq,
133
134
        AutoModelForTableQuestionAnswering,
        AutoModelForTokenClassification,
135
        AutoModelForVideoClassification,
136
        AutoModelForVision2Seq,
137
        AutoModelForVisualQuestionAnswering,
138
        AutoModelForZeroShotImageClassification,
139
        AutoModelForZeroShotObjectDetection,
140
    )
141
142


143
144
145
if TYPE_CHECKING:
    from ..modeling_tf_utils import TFPreTrainedModel
    from ..modeling_utils import PreTrainedModel
146
147
    from ..tokenization_utils_fast import PreTrainedTokenizerFast

148
149
150
151
152

logger = logging.get_logger(__name__)


# Register all the supported tasks here
153
154
155
TASK_ALIASES = {
    "sentiment-analysis": "text-classification",
    "ner": "token-classification",
156
    "vqa": "visual-question-answering",
157
}
158
SUPPORTED_TASKS = {
159
160
161
162
    "audio-classification": {
        "impl": AudioClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
163
        "default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}},
164
        "type": "audio",
165
    },
166
167
168
    "automatic-speech-recognition": {
        "impl": AutomaticSpeechRecognitionPipeline,
        "tf": (),
169
        "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
170
        "default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}},
171
        "type": "multimodal",
172
    },
173
174
    "feature-extraction": {
        "impl": FeatureExtractionPipeline,
175
176
        "tf": (TFAutoModel,) if is_tf_available() else (),
        "pt": (AutoModel,) if is_torch_available() else (),
177
        "default": {"model": {"pt": ("distilbert-base-cased", "935ac13"), "tf": ("distilbert-base-cased", "935ac13")}},
178
        "type": "multimodal",
179
    },
180
    "text-classification": {
181
        "impl": TextClassificationPipeline,
182
183
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
184
185
        "default": {
            "model": {
186
187
                "pt": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
                "tf": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
188
189
            },
        },
190
        "type": "text",
191
    },
192
    "token-classification": {
193
        "impl": TokenClassificationPipeline,
194
195
        "tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (),
        "pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
196
197
        "default": {
            "model": {
198
199
                "pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
                "tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
200
201
            },
        },
202
        "type": "text",
203
204
205
    },
    "question-answering": {
        "impl": QuestionAnsweringPipeline,
206
207
        "tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (),
        "pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (),
208
        "default": {
209
210
211
212
            "model": {
                "pt": ("distilbert-base-cased-distilled-squad", "626af31"),
                "tf": ("distilbert-base-cased-distilled-squad", "626af31"),
            },
213
        },
214
        "type": "text",
215
216
217
    },
    "table-question-answering": {
        "impl": TableQuestionAnsweringPipeline,
218
        "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
Kamal Raj's avatar
Kamal Raj committed
219
        "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (),
220
221
        "default": {
            "model": {
222
223
                "pt": ("google/tapas-base-finetuned-wtq", "69ceee2"),
                "tf": ("google/tapas-base-finetuned-wtq", "69ceee2"),
224
225
            },
        },
226
        "type": "text",
227
    },
228
229
230
231
232
    "visual-question-answering": {
        "impl": VisualQuestionAnsweringPipeline,
        "pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
233
            "model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")},
234
235
236
        },
        "type": "multimodal",
    },
237
238
239
240
241
    "document-question-answering": {
        "impl": DocumentQuestionAnsweringPipeline,
        "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
242
            "model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")},
243
244
245
        },
        "type": "multimodal",
    },
246
247
    "fill-mask": {
        "impl": FillMaskPipeline,
248
249
        "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
        "pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
250
        "default": {"model": {"pt": ("distilroberta-base", "ec58a5b"), "tf": ("distilroberta-base", "ec58a5b")}},
251
        "type": "text",
252
253
254
    },
    "summarization": {
        "impl": SummarizationPipeline,
255
256
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
257
        "default": {"model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("t5-small", "d769bba")}},
258
        "type": "text",
259
260
261
262
    },
    # This task is a special case as it's parametrized by SRC, TGT languages.
    "translation": {
        "impl": TranslationPipeline,
263
264
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
265
        "default": {
266
267
268
            ("en", "fr"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "de"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "ro"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
269
        },
270
        "type": "text",
271
272
273
    },
    "text2text-generation": {
        "impl": Text2TextGenerationPipeline,
274
275
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
276
        "default": {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
277
        "type": "text",
278
279
280
    },
    "text-generation": {
        "impl": TextGenerationPipeline,
281
282
        "tf": (TFAutoModelForCausalLM,) if is_tf_available() else (),
        "pt": (AutoModelForCausalLM,) if is_torch_available() else (),
283
        "default": {"model": {"pt": ("gpt2", "6c0e608"), "tf": ("gpt2", "6c0e608")}},
284
        "type": "text",
285
286
287
    },
    "zero-shot-classification": {
        "impl": ZeroShotClassificationPipeline,
288
289
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
290
        "default": {
291
292
            "model": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
            "config": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
293
        },
294
        "type": "text",
295
    },
296
297
    "zero-shot-image-classification": {
        "impl": ZeroShotImageClassificationPipeline,
298
299
        "tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (),
        "pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (),
300
301
302
303
304
305
        "default": {
            "model": {
                "pt": ("openai/clip-vit-base-patch32", "f4881ba"),
                "tf": ("openai/clip-vit-base-patch32", "f4881ba"),
            }
        },
306
307
        "type": "multimodal",
    },
308
309
    "zero-shot-audio-classification": {
        "impl": ZeroShotAudioClassificationPipeline,
310
        "tf": (),
311
312
313
        "pt": (AutoModel,) if is_torch_available() else (),
        "default": {
            "model": {
Arthur's avatar
Arthur committed
314
                "pt": ("laion/clap-htsat-fused", "973b6e5"),
315
316
317
318
            }
        },
        "type": "multimodal",
    },
319
320
    "conversational": {
        "impl": ConversationalPipeline,
321
322
        "tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (),
323
324
325
        "default": {
            "model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")}
        },
326
        "type": "text",
327
    },
328
329
    "image-classification": {
        "impl": ImageClassificationPipeline,
330
        "tf": (TFAutoModelForImageClassification,) if is_tf_available() else (),
331
        "pt": (AutoModelForImageClassification,) if is_torch_available() else (),
332
333
334
335
336
337
        "default": {
            "model": {
                "pt": ("google/vit-base-patch16-224", "5dca96d"),
                "tf": ("google/vit-base-patch16-224", "5dca96d"),
            }
        },
338
        "type": "image",
339
    },
340
341
342
    "image-segmentation": {
        "impl": ImageSegmentationPipeline,
        "tf": (),
343
        "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
344
        "default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
345
        "type": "multimodal",
346
    },
347
348
    "image-to-text": {
        "impl": ImageToTextPipeline,
349
350
351
352
353
354
355
356
357
358
        "tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
        "pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
        "default": {
            "model": {
                "pt": ("ydshieh/vit-gpt2-coco-en", "65636df"),
                "tf": ("ydshieh/vit-gpt2-coco-en", "65636df"),
            }
        },
        "type": "multimodal",
    },
359
360
361
362
    "object-detection": {
        "impl": ObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
363
        "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}},
364
        "type": "multimodal",
365
    },
366
367
368
369
370
371
372
    "zero-shot-object-detection": {
        "impl": ZeroShotObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
        "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}},
        "type": "multimodal",
    },
373
374
375
376
377
378
379
    "depth-estimation": {
        "impl": DepthEstimationPipeline,
        "tf": (),
        "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
        "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}},
        "type": "image",
    },
380
381
382
383
384
385
386
    "video-classification": {
        "impl": VideoClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
        "default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}},
        "type": "video",
    },
387
388
}

389
NO_FEATURE_EXTRACTOR_TASKS = set()
390
NO_IMAGE_PROCESSOR_TASKS = set()
391
NO_TOKENIZER_TASKS = set()
392
393
394
395
# Those model configs are special, they are generic over their task, meaning
# any tokenizer/feature_extractor might be use for a given model so we cannot
# use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to
# see if the model defines such objects or not.
Nicolas Patry's avatar
Nicolas Patry committed
396
MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"}
397
398
399
for task, values in SUPPORTED_TASKS.items():
    if values["type"] == "text":
        NO_FEATURE_EXTRACTOR_TASKS.add(task)
400
        NO_IMAGE_PROCESSOR_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
401
    elif values["type"] in {"image", "video"}:
402
        NO_TOKENIZER_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
403
404
405
    elif values["type"] in {"audio"}:
        NO_TOKENIZER_TASKS.add(task)
        NO_IMAGE_PROCESSOR_TASKS.add(task)
406
407
408
    elif values["type"] != "multimodal":
        raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}")

409
410
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)

411

412
413
414
415
def get_supported_tasks() -> List[str]:
    """
    Returns a list of supported task strings.
    """
416
    return PIPELINE_REGISTRY.get_supported_tasks()
417
418


419
def get_task(model: str, use_auth_token: Optional[str] = None) -> str:
420
    if is_offline_mode():
Sylvain Gugger's avatar
Sylvain Gugger committed
421
        raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
422
    try:
423
        info = model_info(model, token=use_auth_token)
424
425
    except Exception as e:
        raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
426
    if not info.pipeline_tag:
427
428
429
        raise RuntimeError(
            f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
        )
430
431
432
    if getattr(info, "library_name", "transformers") != "transformers":
        raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
    task = info.pipeline_tag
433
434
435
    return task


436
def check_task(task: str) -> Tuple[str, Dict, Any]:
437
438
439
440
441
    """
    Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and
    default models if they exist.

    Args:
442
        task (`str`):
443
444
            The task defining which pipeline will be returned. Currently accepted tasks are:

445
446
447
            - `"audio-classification"`
            - `"automatic-speech-recognition"`
            - `"conversational"`
448
449
            - `"depth-estimation"`
            - `"document-question-answering"`
450
451
452
            - `"feature-extraction"`
            - `"fill-mask"`
            - `"image-classification"`
453
454
455
            - `"image-segmentation"`
            - `"image-to-text"`
            - `"object-detection"`
456
            - `"question-answering"`
457
            - `"summarization"`
458
459
460
461
462
463
464
            - `"table-question-answering"`
            - `"text2text-generation"`
            - `"text-classification"` (alias `"sentiment-analysis"` available)
            - `"text-generation"`
            - `"token-classification"` (alias `"ner"` available)
            - `"translation"`
            - `"translation_xx_to_yy"`
465
466
            - `"video-classification"`
            - `"visual-question-answering"`
467
            - `"zero-shot-classification"`
468
            - `"zero-shot-image-classification"`
469
            - `"zero-shot-object-detection"`
470
471

    Returns:
472
473
474
        (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name
        (removed alias and options). The actual dictionary required to initialize the pipeline and some extra task
        options for parametrized tasks like "translation_XX_to_YY"
475
476
477


    """
478
    return PIPELINE_REGISTRY.check_task(task)
479
480


Sylvain Gugger's avatar
Sylvain Gugger committed
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
def clean_custom_task(task_info):
    import transformers

    if "impl" not in task_info:
        raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
    pt_class_names = task_info.get("pt", ())
    if isinstance(pt_class_names, str):
        pt_class_names = [pt_class_names]
    task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
    tf_class_names = task_info.get("tf", ())
    if isinstance(tf_class_names, str):
        tf_class_names = [tf_class_names]
    task_info["tf"] = tuple(getattr(transformers, c) for c in tf_class_names)
    return task_info, None


497
def pipeline(
498
    task: str = None,
499
500
    model: Optional = None,
    config: Optional[Union[str, PretrainedConfig]] = None,
501
    tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None,
502
    feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
503
    image_processor: Optional[Union[str, BaseImageProcessor]] = None,
504
505
506
    framework: Optional[str] = None,
    revision: Optional[str] = None,
    use_fast: bool = True,
507
    use_auth_token: Optional[Union[str, bool]] = None,
508
    device: Optional[Union[int, str, "torch.device"]] = None,
509
510
    device_map=None,
    torch_dtype=None,
Sylvain Gugger's avatar
Sylvain Gugger committed
511
    trust_remote_code: Optional[bool] = None,
512
    model_kwargs: Dict[str, Any] = None,
513
    pipeline_class: Optional[Any] = None,
514
    **kwargs,
515
516
) -> Pipeline:
    """
517
    Utility factory method to build a [`Pipeline`].
518
519
520

    Pipelines are made of:

521
522
        - A [tokenizer](tokenizer) in charge of mapping raw textual input to token.
        - A [model](model) to make predictions from the inputs.
523
524
525
        - Some (optional) post processing for enhancing model's output.

    Args:
526
        task (`str`):
527
528
            The task defining which pipeline will be returned. Currently accepted tasks are:

529
            - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
Sylvain Gugger's avatar
Sylvain Gugger committed
530
            - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
531
            - `"conversational"`: will return a [`ConversationalPipeline`].
532
533
            - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
            - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
534
535
536
            - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
            - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
            - `"image-classification"`: will return a [`ImageClassificationPipeline`].
537
538
539
            - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
            - `"image-to-text"`: will return a [`ImageToTextPipeline`].
            - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
540
            - `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
541
            - `"summarization"`: will return a [`SummarizationPipeline`].
542
543
544
545
546
            - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
            - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`].
            - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
              [`TextClassificationPipeline`].
            - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
Sylvain Gugger's avatar
Sylvain Gugger committed
547
            - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
548
549
            - `"translation"`: will return a [`TranslationPipeline`].
            - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`].
550
551
            - `"video-classification"`: will return a [`VideoClassificationPipeline`].
            - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
552
            - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
553
            - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
554
            - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
555
            - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
556
557

        model (`str` or [`PreTrainedModel`] or [`TFPreTrainedModel`], *optional*):
558
            The model that will be used by the pipeline to make predictions. This can be a model identifier or an
Sylvain Gugger's avatar
Sylvain Gugger committed
559
560
            actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch) or
            [`TFPreTrainedModel`] (for TensorFlow).
561

562
563
            If not provided, the default for the `task` will be loaded.
        config (`str` or [`PretrainedConfig`], *optional*):
564
            The configuration that will be used by the pipeline to instantiate the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
565
            identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`].
566
567

            If not provided, the default configuration file for the requested model will be used. That means that if
Sylvain Gugger's avatar
Sylvain Gugger committed
568
569
            `model` is given, its default configuration will be used. However, if `model` is not supplied, this
            `task`'s default model's config is used instead.
570
        tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
571
            The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
572
            identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
573

Sylvain Gugger's avatar
Sylvain Gugger committed
574
575
576
577
            If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
            is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
            However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
            will be loaded.
578
        feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
579
            The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
580
            identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
581
582
583
584

            Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
            models. Multi-modal models will also require a tokenizer to be passed.

Sylvain Gugger's avatar
Sylvain Gugger committed
585
586
587
588
            If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
            `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
            is a string). However, if `config` is also not given or not a string, then the default feature extractor
            for the given `task` will be loaded.
589
        framework (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
590
591
            The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be
            installed.
592
593

            If no framework is specified, will default to the one currently installed. If no framework is specified and
Sylvain Gugger's avatar
Sylvain Gugger committed
594
595
            both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is
            provided.
Stas Bekman's avatar
Stas Bekman committed
596
        revision (`str`, *optional*, defaults to `"main"`):
597
598
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
599
600
601
602
            artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
        use_fast (`bool`, *optional*, defaults to `True`):
            Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
        use_auth_token (`str` or *bool*, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
603
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
604
            when running `huggingface-cli login` (stored in `~/.huggingface`).
605
606
607
        device (`int` or `str` or `torch.device`):
            Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
            pipeline will be allocated.
608
609
        device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
610
611
612
            `device_map="auto"` to compute the most optimized `device_map` automatically (see
            [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
            for more information).
613
614
615
616
617
618
619
620
621
622

            <Tip warning={true}>

            Do not use `device_map` AND `device` at the same time as they will conflict

            </Tip>

        torch_dtype (`str` or `torch.dtype`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
            (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
Sylvain Gugger's avatar
Sylvain Gugger committed
623
624
625
626
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
            tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
            and in which you have read the code, as it will execute code present on the Hub on your local machine.
Funtowicz Morgan's avatar
Funtowicz Morgan committed
627
        model_kwargs:
Sylvain Gugger's avatar
Sylvain Gugger committed
628
629
            Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
            **model_kwargs)` function.
630
631
632
633
634
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).

    Returns:
635
        [`Pipeline`]: A suitable pipeline for the task.
636

637
    Examples:
638

639
640
    ```python
    >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
641

642
    >>> # Sentiment analysis pipeline
643
    >>> analyzer = pipeline("sentiment-analysis")
644

645
    >>> # Question answering pipeline, specifying the checkpoint identifier
646
647
648
    >>> oracle = pipeline(
    ...     "question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="bert-base-cased"
    ... )
649

650
651
652
    >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
    >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
    >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
653
    >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
654
    ```"""
655
656
    if model_kwargs is None:
        model_kwargs = {}
Sylvain Gugger's avatar
Sylvain Gugger committed
657
658
659
    # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs,
    # this is to keep BC).
    use_auth_token = model_kwargs.pop("use_auth_token", use_auth_token)
660
661
662
663
664
665
    hub_kwargs = {
        "revision": revision,
        "use_auth_token": use_auth_token,
        "trust_remote_code": trust_remote_code,
        "_commit_hash": None,
    }
666
667
668

    if task is None and model is None:
        raise RuntimeError(
669
670
            "Impossible to instantiate a pipeline without either a task or a model "
            "being specified. "
671
672
673
            "Please provide a task class or a model"
        )

674
675
    if model is None and tokenizer is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
676
677
678
            "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
            " may not be compatible with the default model. Please provide a PreTrainedModel class or a"
            " path/identifier to a pretrained model when providing tokenizer."
679
680
681
        )
    if model is None and feature_extractor is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
682
683
684
            "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
            " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
            " or a path/identifier to a pretrained model when providing feature_extractor."
685
        )
686
687
    if isinstance(model, Path):
        model = str(model)
688

Sylvain Gugger's avatar
Sylvain Gugger committed
689
690
691
692
    # Config is the primordial information item.
    # Instantiate config if needed
    if isinstance(config, str):
        config = AutoConfig.from_pretrained(config, _from_pipeline=task, **hub_kwargs, **model_kwargs)
693
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
694
695
    elif config is None and isinstance(model, str):
        config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
696
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
697
698
699
700
701
702
703
704
705
706
707
708
709

    custom_tasks = {}
    if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
        custom_tasks = config.custom_pipelines
        if task is None and trust_remote_code is not False:
            if len(custom_tasks) == 1:
                task = list(custom_tasks.keys())[0]
            else:
                raise RuntimeError(
                    "We can't infer the task automatically for this model as there are multiple tasks available. Pick "
                    f"one in {', '.join(custom_tasks.keys())}"
                )

710
711
712
713
714
715
716
717
    if task is None and model is not None:
        if not isinstance(model, str):
            raise RuntimeError(
                "Inferring the task automatically requires to check the hub with a model_id defined as a `str`."
                f"{model} is not a valid model_id."
            )
        task = get_task(model, use_auth_token)

718
    # Retrieve the task
Sylvain Gugger's avatar
Sylvain Gugger committed
719
    if task in custom_tasks:
Sylvain Gugger's avatar
Sylvain Gugger committed
720
        normalized_task = task
Sylvain Gugger's avatar
Sylvain Gugger committed
721
722
723
724
725
726
727
728
729
730
731
732
733
734
        targeted_task, task_options = clean_custom_task(custom_tasks[task])
        if pipeline_class is None:
            if not trust_remote_code:
                raise ValueError(
                    "Loading this pipeline requires you to execute the code in the pipeline file in that"
                    " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
                    " set the option `trust_remote_code=True` to remove this error."
                )
            class_ref = targeted_task["impl"]
            module_file, class_name = class_ref.split(".")
            pipeline_class = get_class_from_dynamic_module(
                model, module_file + ".py", class_name, revision=revision, use_auth_token=use_auth_token
            )
    else:
735
        normalized_task, targeted_task, task_options = check_task(task)
Sylvain Gugger's avatar
Sylvain Gugger committed
736
737
        if pipeline_class is None:
            pipeline_class = targeted_task["impl"]
738
739
740
741

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        # At that point framework might still be undetermined
742
743
744
745
746
747
748
        model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options)
        revision = revision if revision is not None else default_revision
        logger.warning(
            f"No model was supplied, defaulted to {model} and revision"
            f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
            "Using a pipeline without specifying a model name and revision in production is not recommended."
        )
Sylvain Gugger's avatar
Sylvain Gugger committed
749
750
        if config is None and isinstance(model, str):
            config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
751
            hub_kwargs["_commit_hash"] = config._commit_hash
752

753
754
755
756
757
758
    if device_map is not None:
        if "device_map" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
                " arguments might conflict, use only one.)"
            )
759
760
761
762
763
        if device is not None:
            logger.warning(
                "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
                " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
            )
764
765
766
767
768
769
770
771
        model_kwargs["device_map"] = device_map
    if torch_dtype is not None:
        if "torch_dtype" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those'
                " arguments might conflict, use only one.)"
            )
        model_kwargs["torch_dtype"] = torch_dtype
772

773
    model_name = model if isinstance(model, str) else None
774

775
776
777
778
779
    # Infer the framework from the model
    # Forced if framework already defined, inferred if it's None
    # Will load the correct model if possible
    model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]}
    framework, model = infer_framework_load_model(
780
781
782
783
784
        model,
        model_classes=model_classes,
        config=config,
        framework=framework,
        task=task,
Sylvain Gugger's avatar
Sylvain Gugger committed
785
        **hub_kwargs,
786
        **model_kwargs,
787
    )
788

789
    model_config = model.config
790
    hub_kwargs["_commit_hash"] = model.config._commit_hash
791
792
    load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
    load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
793
    load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
794

Yih-Dar's avatar
Yih-Dar committed
795
796
797
798
799
800
801
802
    # If `model` (instance of `PretrainedModel` instead of `str`) is passed (and/or same for config), while
    # `image_processor` or `feature_extractor` is `None`, the loading will fail. This happens particularly for some
    # vision tasks when calling `pipeline()` with `model` and only one of the `image_processor` and `feature_extractor`.
    # TODO: we need to make `NO_IMAGE_PROCESSOR_TASKS` and `NO_FEATURE_EXTRACTOR_TASKS` more robust to avoid such issue.
    # This block is only temporarily to make CI green.
    if load_image_processor and load_feature_extractor:
        load_feature_extractor = False

803
804
805
806
807
808
809
810
811
812
813
    if (
        tokenizer is None
        and not load_tokenizer
        and normalized_task not in NO_TOKENIZER_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_tokenizer = True
Yih-Dar's avatar
Yih-Dar committed
814
815
816
817
818
819
820
821
822
823
824
825
    if (
        image_processor is None
        and not load_image_processor
        and normalized_task not in NO_IMAGE_PROCESSOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
        and normalized_task != "automatic-speech-recognition"
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_image_processor = True
826
827
828
829
830
831
832
833
834
835
836
837
    if (
        feature_extractor is None
        and not load_feature_extractor
        and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_feature_extractor = True

838
    if task in NO_TOKENIZER_TASKS:
839
        # These will never require a tokenizer.
840
841
842
843
        # the model on the other hand might have a tokenizer, but
        # the files could be missing from the hub, instead of failing
        # on such repos, we just force to not load it.
        load_tokenizer = False
844

845
846
    if task in NO_FEATURE_EXTRACTOR_TASKS:
        load_feature_extractor = False
847
848
    if task in NO_IMAGE_PROCESSOR_TASKS:
        load_image_processor = False
849

850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
    if load_tokenizer:
        # Try to infer tokenizer from model or config name (if provided as str)
        if tokenizer is None:
            if isinstance(model_name, str):
                tokenizer = model_name
            elif isinstance(config, str):
                tokenizer = config
            else:
                # Impossible to guess what is the right tokenizer here
                raise Exception(
                    "Impossible to guess which tokenizer to use. "
                    "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
                )

        # Instantiate tokenizer if needed
        if isinstance(tokenizer, (str, tuple)):
            if isinstance(tokenizer, tuple):
                # For tuple we have (tokenizer name, {kwargs})
                use_fast = tokenizer[1].pop("use_fast", use_fast)
                tokenizer_identifier = tokenizer[0]
                tokenizer_kwargs = tokenizer[1]
            else:
                tokenizer_identifier = tokenizer
Arthur's avatar
Arthur committed
873
874
                tokenizer_kwargs = model_kwargs.copy()
                tokenizer_kwargs.pop("torch_dtype", None)
875
876

            tokenizer = AutoTokenizer.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
877
                tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
878
879
            )

880
881
882
883
884
885
886
    if load_image_processor:
        # Try to infer image processor from model or config name (if provided as str)
        if image_processor is None:
            if isinstance(model_name, str):
                image_processor = model_name
            elif isinstance(config, str):
                image_processor = config
887
888
889
890
            # Backward compatibility, as `feature_extractor` used to be the name
            # for `ImageProcessor`.
            elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
                image_processor = feature_extractor
891
892
893
894
895
896
897
898
899
900
901
902
903
904
            else:
                # Impossible to guess what is the right image_processor here
                raise Exception(
                    "Impossible to guess which image processor to use. "
                    "Please provide a PreTrainedImageProcessor class or a path/identifier "
                    "to a pretrained image processor."
                )

        # Instantiate image_processor if needed
        if isinstance(image_processor, (str, tuple)):
            image_processor = AutoImageProcessor.from_pretrained(
                image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
            )

905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
    if load_feature_extractor:
        # Try to infer feature extractor from model or config name (if provided as str)
        if feature_extractor is None:
            if isinstance(model_name, str):
                feature_extractor = model_name
            elif isinstance(config, str):
                feature_extractor = config
            else:
                # Impossible to guess what is the right feature_extractor here
                raise Exception(
                    "Impossible to guess which feature extractor to use. "
                    "Please provide a PreTrainedFeatureExtractor class or a path/identifier "
                    "to a pretrained feature extractor."
                )

        # Instantiate feature_extractor if needed
        if isinstance(feature_extractor, (str, tuple)):
            feature_extractor = AutoFeatureExtractor.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
923
                feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
924
925
            )

Nicolas Patry's avatar
Nicolas Patry committed
926
927
928
929
930
931
            if (
                feature_extractor._processor_class
                and feature_extractor._processor_class.endswith("WithLM")
                and isinstance(model_name, str)
            ):
                try:
932
                    import kenlm  # to trigger `ImportError` if not installed
Nicolas Patry's avatar
Nicolas Patry committed
933
934
                    from pyctcdecode import BeamSearchDecoderCTC

935
936
937
938
939
940
941
                    if os.path.isdir(model_name) or os.path.isfile(model_name):
                        decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
                    else:
                        language_model_glob = os.path.join(
                            BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
                        )
                        alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
942
943
                        allow_patterns = [language_model_glob, alphabet_filename]
                        decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
Nicolas Patry's avatar
Nicolas Patry committed
944
945

                    kwargs["decoder"] = decoder
946
                except ImportError as e:
947
948
949
950
951
952
                    logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
                    if not is_kenlm_available():
                        logger.warning("Try to install `kenlm`: `pip install kenlm")

                    if not is_pyctcdecode_available():
                        logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
Nicolas Patry's avatar
Nicolas Patry committed
953

954
955
956
957
958
959
960
961
962
    if task == "translation" and model.config.task_specific_params:
        for key in model.config.task_specific_params:
            if key.startswith("translation"):
                task = key
                warnings.warn(
                    f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"',
                    UserWarning,
                )
                break
963

964
965
966
967
968
969
    if tokenizer is not None:
        kwargs["tokenizer"] = tokenizer

    if feature_extractor is not None:
        kwargs["feature_extractor"] = feature_extractor

970
971
972
    if torch_dtype is not None:
        kwargs["torch_dtype"] = torch_dtype

973
974
975
    if image_processor is not None:
        kwargs["image_processor"] = image_processor

976
977
978
    if device is not None:
        kwargs["device"] = device

979
    return pipeline_class(model=model, framework=framework, task=task, **kwargs)