__init__.py 44.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sylvain Gugger's avatar
Sylvain Gugger committed
15
16
17
import io
import json
import os
18
import warnings
19
from pathlib import Path
20
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
21

22
from huggingface_hub import model_info
23
from numpy import isin
Sylvain Gugger's avatar
Sylvain Gugger committed
24

25
from ..configuration_utils import PretrainedConfig
Sylvain Gugger's avatar
Sylvain Gugger committed
26
from ..dynamic_module_utils import get_class_from_dynamic_module
27
from ..feature_extraction_utils import PreTrainedFeatureExtractor
28
from ..image_processing_utils import BaseImageProcessor
29
30
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
31
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
32
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
33
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
34
from ..tokenization_utils import PreTrainedTokenizer
35
from ..tokenization_utils_fast import PreTrainedTokenizerFast
36
37
38
from ..utils import (
    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
    is_kenlm_available,
39
    is_offline_mode,
40
41
42
43
44
    is_pyctcdecode_available,
    is_tf_available,
    is_torch_available,
    logging,
)
45
from .audio_classification import AudioClassificationPipeline
46
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
47
48
49
50
51
52
53
54
from .base import (
    ArgumentHandler,
    CsvPipelineDataFormat,
    JsonPipelineDataFormat,
    PipedPipelineDataFormat,
    Pipeline,
    PipelineDataFormat,
    PipelineException,
55
    PipelineRegistry,
56
    get_default_model_and_revision,
57
    infer_framework_load_model,
58
59
)
from .conversational import Conversation, ConversationalPipeline
60
from .depth_estimation import DepthEstimationPipeline
61
from .document_question_answering import DocumentQuestionAnsweringPipeline
62
63
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
64
from .image_classification import ImageClassificationPipeline
65
from .image_segmentation import ImageSegmentationPipeline
66
from .image_to_text import ImageToTextPipeline
67
from .object_detection import ObjectDetectionPipeline
68
69
70
71
72
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline
from .text_classification import TextClassificationPipeline
from .text_generation import TextGenerationPipeline
73
74
75
76
77
78
from .token_classification import (
    AggregationStrategy,
    NerPipeline,
    TokenClassificationArgumentHandler,
    TokenClassificationPipeline,
)
79
from .video_classification import VideoClassificationPipeline
80
from .visual_question_answering import VisualQuestionAnsweringPipeline
81
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline
82
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
83
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
84
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
85
86
87
88
89
90
91
92
93
94
95
96
97


if is_tf_available():
    import tensorflow as tf

    from ..models.auto.modeling_tf_auto import (
        TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
        TF_MODEL_WITH_LM_HEAD_MAPPING,
        TFAutoModel,
        TFAutoModelForCausalLM,
98
        TFAutoModelForImageClassification,
99
100
101
102
        TFAutoModelForMaskedLM,
        TFAutoModelForQuestionAnswering,
        TFAutoModelForSeq2SeqLM,
        TFAutoModelForSequenceClassification,
Kamal Raj's avatar
Kamal Raj committed
103
        TFAutoModelForTableQuestionAnswering,
104
        TFAutoModelForTokenClassification,
105
        TFAutoModelForVision2Seq,
106
107
108
109
110
111
112
113
114
115
116
117
    )

if is_torch_available():
    import torch

    from ..models.auto.modeling_auto import (
        MODEL_FOR_MASKED_LM_MAPPING,
        MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
118
        MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
119
        AutoModel,
120
        AutoModelForAudioClassification,
121
        AutoModelForCausalLM,
122
        AutoModelForCTC,
123
        AutoModelForDocumentQuestionAnswering,
124
        AutoModelForImageClassification,
125
        AutoModelForImageSegmentation,
126
        AutoModelForMaskedLM,
127
        AutoModelForObjectDetection,
128
        AutoModelForQuestionAnswering,
129
        AutoModelForSemanticSegmentation,
130
131
        AutoModelForSeq2SeqLM,
        AutoModelForSequenceClassification,
132
        AutoModelForSpeechSeq2Seq,
133
134
        AutoModelForTableQuestionAnswering,
        AutoModelForTokenClassification,
135
        AutoModelForVideoClassification,
136
        AutoModelForVision2Seq,
137
        AutoModelForVisualQuestionAnswering,
138
        AutoModelForZeroShotObjectDetection,
139
140
141
142
143
144
145
146
147
    )
if TYPE_CHECKING:
    from ..modeling_tf_utils import TFPreTrainedModel
    from ..modeling_utils import PreTrainedModel

logger = logging.get_logger(__name__)


# Register all the supported tasks here
148
149
150
TASK_ALIASES = {
    "sentiment-analysis": "text-classification",
    "ner": "token-classification",
151
    "vqa": "visual-question-answering",
152
}
153
SUPPORTED_TASKS = {
154
155
156
157
    "audio-classification": {
        "impl": AudioClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
158
        "default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}},
159
        "type": "audio",
160
    },
161
162
163
    "automatic-speech-recognition": {
        "impl": AutomaticSpeechRecognitionPipeline,
        "tf": (),
164
        "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
165
        "default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}},
166
        "type": "multimodal",
167
    },
168
169
    "feature-extraction": {
        "impl": FeatureExtractionPipeline,
170
171
        "tf": (TFAutoModel,) if is_tf_available() else (),
        "pt": (AutoModel,) if is_torch_available() else (),
172
        "default": {"model": {"pt": ("distilbert-base-cased", "935ac13"), "tf": ("distilbert-base-cased", "935ac13")}},
173
        "type": "multimodal",
174
    },
175
    "text-classification": {
176
        "impl": TextClassificationPipeline,
177
178
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
179
180
        "default": {
            "model": {
181
182
                "pt": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
                "tf": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
183
184
            },
        },
185
        "type": "text",
186
    },
187
    "token-classification": {
188
        "impl": TokenClassificationPipeline,
189
190
        "tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (),
        "pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
191
192
        "default": {
            "model": {
193
194
                "pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
                "tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
195
196
            },
        },
197
        "type": "text",
198
199
200
    },
    "question-answering": {
        "impl": QuestionAnsweringPipeline,
201
202
        "tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (),
        "pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (),
203
        "default": {
204
205
206
207
            "model": {
                "pt": ("distilbert-base-cased-distilled-squad", "626af31"),
                "tf": ("distilbert-base-cased-distilled-squad", "626af31"),
            },
208
        },
209
        "type": "text",
210
211
212
    },
    "table-question-answering": {
        "impl": TableQuestionAnsweringPipeline,
213
        "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
Kamal Raj's avatar
Kamal Raj committed
214
        "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (),
215
216
        "default": {
            "model": {
217
218
                "pt": ("google/tapas-base-finetuned-wtq", "69ceee2"),
                "tf": ("google/tapas-base-finetuned-wtq", "69ceee2"),
219
220
            },
        },
221
        "type": "text",
222
    },
223
224
225
226
227
    "visual-question-answering": {
        "impl": VisualQuestionAnsweringPipeline,
        "pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
228
            "model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")},
229
230
231
        },
        "type": "multimodal",
    },
232
233
234
235
236
    "document-question-answering": {
        "impl": DocumentQuestionAnsweringPipeline,
        "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
237
            "model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")},
238
239
240
        },
        "type": "multimodal",
    },
241
242
    "fill-mask": {
        "impl": FillMaskPipeline,
243
244
        "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
        "pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
245
        "default": {"model": {"pt": ("distilroberta-base", "ec58a5b"), "tf": ("distilroberta-base", "ec58a5b")}},
246
        "type": "text",
247
248
249
    },
    "summarization": {
        "impl": SummarizationPipeline,
250
251
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
252
        "default": {"model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("t5-small", "d769bba")}},
253
        "type": "text",
254
255
256
257
    },
    # This task is a special case as it's parametrized by SRC, TGT languages.
    "translation": {
        "impl": TranslationPipeline,
258
259
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
260
        "default": {
261
262
263
            ("en", "fr"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "de"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "ro"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
264
        },
265
        "type": "text",
266
267
268
    },
    "text2text-generation": {
        "impl": Text2TextGenerationPipeline,
269
270
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
271
        "default": {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
272
        "type": "text",
273
274
275
    },
    "text-generation": {
        "impl": TextGenerationPipeline,
276
277
        "tf": (TFAutoModelForCausalLM,) if is_tf_available() else (),
        "pt": (AutoModelForCausalLM,) if is_torch_available() else (),
278
        "default": {"model": {"pt": ("gpt2", "6c0e608"), "tf": ("gpt2", "6c0e608")}},
279
        "type": "text",
280
281
282
    },
    "zero-shot-classification": {
        "impl": ZeroShotClassificationPipeline,
283
284
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
285
        "default": {
286
287
            "model": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
            "config": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
288
        },
289
        "type": "text",
290
    },
291
292
293
294
    "zero-shot-image-classification": {
        "impl": ZeroShotImageClassificationPipeline,
        "tf": (TFAutoModel,) if is_tf_available() else (),
        "pt": (AutoModel,) if is_torch_available() else (),
295
296
297
298
299
300
        "default": {
            "model": {
                "pt": ("openai/clip-vit-base-patch32", "f4881ba"),
                "tf": ("openai/clip-vit-base-patch32", "f4881ba"),
            }
        },
301
302
        "type": "multimodal",
    },
303
304
    "zero-shot-audio-classification": {
        "impl": ZeroShotAudioClassificationPipeline,
305
        "tf": (),
306
307
308
309
310
311
312
313
        "pt": (AutoModel,) if is_torch_available() else (),
        "default": {
            "model": {
                "pt": ("laion/clap-htsat-fused", "f39917b"),
            }
        },
        "type": "multimodal",
    },
314
315
    "conversational": {
        "impl": ConversationalPipeline,
316
317
        "tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (),
318
319
320
        "default": {
            "model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")}
        },
321
        "type": "text",
322
    },
323
324
    "image-classification": {
        "impl": ImageClassificationPipeline,
325
        "tf": (TFAutoModelForImageClassification,) if is_tf_available() else (),
326
        "pt": (AutoModelForImageClassification,) if is_torch_available() else (),
327
328
329
330
331
332
        "default": {
            "model": {
                "pt": ("google/vit-base-patch16-224", "5dca96d"),
                "tf": ("google/vit-base-patch16-224", "5dca96d"),
            }
        },
333
        "type": "image",
334
    },
335
336
337
    "image-segmentation": {
        "impl": ImageSegmentationPipeline,
        "tf": (),
338
        "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
339
        "default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
340
        "type": "multimodal",
341
    },
342
343
    "image-to-text": {
        "impl": ImageToTextPipeline,
344
345
346
347
348
349
350
351
352
353
        "tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
        "pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
        "default": {
            "model": {
                "pt": ("ydshieh/vit-gpt2-coco-en", "65636df"),
                "tf": ("ydshieh/vit-gpt2-coco-en", "65636df"),
            }
        },
        "type": "multimodal",
    },
354
355
356
357
    "object-detection": {
        "impl": ObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
358
        "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}},
359
        "type": "multimodal",
360
    },
361
362
363
364
365
366
367
    "zero-shot-object-detection": {
        "impl": ZeroShotObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
        "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}},
        "type": "multimodal",
    },
368
369
370
371
372
373
374
    "depth-estimation": {
        "impl": DepthEstimationPipeline,
        "tf": (),
        "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
        "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}},
        "type": "image",
    },
375
376
377
378
379
380
381
    "video-classification": {
        "impl": VideoClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
        "default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}},
        "type": "video",
    },
382
383
}

384
NO_FEATURE_EXTRACTOR_TASKS = set()
385
NO_IMAGE_PROCESSOR_TASKS = set()
386
NO_TOKENIZER_TASKS = set()
387
388
389
390
# Those model configs are special, they are generic over their task, meaning
# any tokenizer/feature_extractor might be use for a given model so we cannot
# use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to
# see if the model defines such objects or not.
Nicolas Patry's avatar
Nicolas Patry committed
391
MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"}
392
393
394
for task, values in SUPPORTED_TASKS.items():
    if values["type"] == "text":
        NO_FEATURE_EXTRACTOR_TASKS.add(task)
395
        NO_IMAGE_PROCESSOR_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
396
    elif values["type"] in {"image", "video"}:
397
        NO_TOKENIZER_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
398
399
400
    elif values["type"] in {"audio"}:
        NO_TOKENIZER_TASKS.add(task)
        NO_IMAGE_PROCESSOR_TASKS.add(task)
401
402
403
    elif values["type"] != "multimodal":
        raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}")

404
405
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)

406

407
408
409
410
def get_supported_tasks() -> List[str]:
    """
    Returns a list of supported task strings.
    """
411
    return PIPELINE_REGISTRY.get_supported_tasks()
412
413


414
def get_task(model: str, use_auth_token: Optional[str] = None) -> str:
415
    if is_offline_mode():
Sylvain Gugger's avatar
Sylvain Gugger committed
416
        raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
417
    try:
418
        info = model_info(model, token=use_auth_token)
419
420
    except Exception as e:
        raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
421
    if not info.pipeline_tag:
422
423
424
        raise RuntimeError(
            f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
        )
425
426
427
    if getattr(info, "library_name", "transformers") != "transformers":
        raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
    task = info.pipeline_tag
428
429
430
    return task


431
def check_task(task: str) -> Tuple[str, Dict, Any]:
432
433
434
435
436
    """
    Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and
    default models if they exist.

    Args:
437
        task (`str`):
438
439
            The task defining which pipeline will be returned. Currently accepted tasks are:

440
441
442
            - `"audio-classification"`
            - `"automatic-speech-recognition"`
            - `"conversational"`
443
444
            - `"depth-estimation"`
            - `"document-question-answering"`
445
446
447
            - `"feature-extraction"`
            - `"fill-mask"`
            - `"image-classification"`
448
449
450
            - `"image-segmentation"`
            - `"image-to-text"`
            - `"object-detection"`
451
            - `"question-answering"`
452
            - `"summarization"`
453
454
455
456
457
458
459
            - `"table-question-answering"`
            - `"text2text-generation"`
            - `"text-classification"` (alias `"sentiment-analysis"` available)
            - `"text-generation"`
            - `"token-classification"` (alias `"ner"` available)
            - `"translation"`
            - `"translation_xx_to_yy"`
460
461
            - `"video-classification"`
            - `"visual-question-answering"`
462
            - `"zero-shot-classification"`
463
            - `"zero-shot-image-classification"`
464
            - `"zero-shot-object-detection"`
465
466

    Returns:
467
468
469
        (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name
        (removed alias and options). The actual dictionary required to initialize the pipeline and some extra task
        options for parametrized tasks like "translation_XX_to_YY"
470
471
472


    """
473
    return PIPELINE_REGISTRY.check_task(task)
474
475


Sylvain Gugger's avatar
Sylvain Gugger committed
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
def clean_custom_task(task_info):
    import transformers

    if "impl" not in task_info:
        raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
    pt_class_names = task_info.get("pt", ())
    if isinstance(pt_class_names, str):
        pt_class_names = [pt_class_names]
    task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
    tf_class_names = task_info.get("tf", ())
    if isinstance(tf_class_names, str):
        tf_class_names = [tf_class_names]
    task_info["tf"] = tuple(getattr(transformers, c) for c in tf_class_names)
    return task_info, None


492
def pipeline(
493
    task: str = None,
494
495
    model: Optional = None,
    config: Optional[Union[str, PretrainedConfig]] = None,
496
    tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
497
    feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
498
    image_processor: Optional[Union[str, BaseImageProcessor]] = None,
499
500
501
    framework: Optional[str] = None,
    revision: Optional[str] = None,
    use_fast: bool = True,
502
    use_auth_token: Optional[Union[str, bool]] = None,
503
    device: Optional[Union[int, str, "torch.device"]] = None,
504
505
    device_map=None,
    torch_dtype=None,
Sylvain Gugger's avatar
Sylvain Gugger committed
506
    trust_remote_code: Optional[bool] = None,
507
    model_kwargs: Dict[str, Any] = None,
508
    pipeline_class: Optional[Any] = None,
509
    **kwargs,
510
511
) -> Pipeline:
    """
512
    Utility factory method to build a [`Pipeline`].
513
514
515

    Pipelines are made of:

516
517
        - A [tokenizer](tokenizer) in charge of mapping raw textual input to token.
        - A [model](model) to make predictions from the inputs.
518
519
520
        - Some (optional) post processing for enhancing model's output.

    Args:
521
        task (`str`):
522
523
            The task defining which pipeline will be returned. Currently accepted tasks are:

524
            - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
Sylvain Gugger's avatar
Sylvain Gugger committed
525
            - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
526
            - `"conversational"`: will return a [`ConversationalPipeline`].
527
528
            - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
            - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
529
530
531
            - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
            - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
            - `"image-classification"`: will return a [`ImageClassificationPipeline`].
532
533
534
            - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
            - `"image-to-text"`: will return a [`ImageToTextPipeline`].
            - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
535
            - `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
536
            - `"summarization"`: will return a [`SummarizationPipeline`].
537
538
539
540
541
            - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
            - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`].
            - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
              [`TextClassificationPipeline`].
            - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
Sylvain Gugger's avatar
Sylvain Gugger committed
542
            - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
543
544
            - `"translation"`: will return a [`TranslationPipeline`].
            - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`].
545
546
            - `"video-classification"`: will return a [`VideoClassificationPipeline`].
            - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
547
            - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
548
            - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
549
            - `"zero-shot-audio-classification"`: will return a [`ZeroShotAudioClassificationPipeline`].
550
            - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
551
552

        model (`str` or [`PreTrainedModel`] or [`TFPreTrainedModel`], *optional*):
553
            The model that will be used by the pipeline to make predictions. This can be a model identifier or an
Sylvain Gugger's avatar
Sylvain Gugger committed
554
555
            actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch) or
            [`TFPreTrainedModel`] (for TensorFlow).
556

557
558
            If not provided, the default for the `task` will be loaded.
        config (`str` or [`PretrainedConfig`], *optional*):
559
            The configuration that will be used by the pipeline to instantiate the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
560
            identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`].
561
562

            If not provided, the default configuration file for the requested model will be used. That means that if
Sylvain Gugger's avatar
Sylvain Gugger committed
563
564
            `model` is given, its default configuration will be used. However, if `model` is not supplied, this
            `task`'s default model's config is used instead.
565
        tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
566
            The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
567
            identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
568

Sylvain Gugger's avatar
Sylvain Gugger committed
569
570
571
572
            If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
            is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
            However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
            will be loaded.
573
        feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
574
            The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
575
            identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
576
577
578
579

            Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
            models. Multi-modal models will also require a tokenizer to be passed.

Sylvain Gugger's avatar
Sylvain Gugger committed
580
581
582
583
            If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
            `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
            is a string). However, if `config` is also not given or not a string, then the default feature extractor
            for the given `task` will be loaded.
584
        framework (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
585
586
            The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be
            installed.
587
588

            If no framework is specified, will default to the one currently installed. If no framework is specified and
Sylvain Gugger's avatar
Sylvain Gugger committed
589
590
            both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is
            provided.
Stas Bekman's avatar
Stas Bekman committed
591
        revision (`str`, *optional*, defaults to `"main"`):
592
593
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
594
595
596
597
            artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
        use_fast (`bool`, *optional*, defaults to `True`):
            Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
        use_auth_token (`str` or *bool*, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
598
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
599
            when running `huggingface-cli login` (stored in `~/.huggingface`).
600
601
602
        device (`int` or `str` or `torch.device`):
            Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
            pipeline will be allocated.
603
604
        device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
605
606
607
            `device_map="auto"` to compute the most optimized `device_map` automatically (see
            [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
            for more information).
608
609
610
611
612
613
614
615
616
617

            <Tip warning={true}>

            Do not use `device_map` AND `device` at the same time as they will conflict

            </Tip>

        torch_dtype (`str` or `torch.dtype`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
            (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
Sylvain Gugger's avatar
Sylvain Gugger committed
618
619
620
621
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
            tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
            and in which you have read the code, as it will execute code present on the Hub on your local machine.
Funtowicz Morgan's avatar
Funtowicz Morgan committed
622
        model_kwargs:
Sylvain Gugger's avatar
Sylvain Gugger committed
623
624
            Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
            **model_kwargs)` function.
625
626
627
628
629
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).

    Returns:
630
        [`Pipeline`]: A suitable pipeline for the task.
631

632
    Examples:
633

634
635
    ```python
    >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
636

637
    >>> # Sentiment analysis pipeline
638
    >>> analyzer = pipeline("sentiment-analysis")
639

640
    >>> # Question answering pipeline, specifying the checkpoint identifier
641
642
643
    >>> oracle = pipeline(
    ...     "question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="bert-base-cased"
    ... )
644

645
646
647
    >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
    >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
    >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
648
    >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
649
    ```"""
650
651
    if model_kwargs is None:
        model_kwargs = {}
Sylvain Gugger's avatar
Sylvain Gugger committed
652
653
654
    # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs,
    # this is to keep BC).
    use_auth_token = model_kwargs.pop("use_auth_token", use_auth_token)
655
656
657
658
659
660
    hub_kwargs = {
        "revision": revision,
        "use_auth_token": use_auth_token,
        "trust_remote_code": trust_remote_code,
        "_commit_hash": None,
    }
661
662
663

    if task is None and model is None:
        raise RuntimeError(
664
665
            "Impossible to instantiate a pipeline without either a task or a model "
            "being specified. "
666
667
668
            "Please provide a task class or a model"
        )

669
670
    if model is None and tokenizer is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
671
672
673
            "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
            " may not be compatible with the default model. Please provide a PreTrainedModel class or a"
            " path/identifier to a pretrained model when providing tokenizer."
674
675
676
        )
    if model is None and feature_extractor is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
677
678
679
            "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
            " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
            " or a path/identifier to a pretrained model when providing feature_extractor."
680
        )
681
682
    if isinstance(model, Path):
        model = str(model)
683

Sylvain Gugger's avatar
Sylvain Gugger committed
684
685
686
687
    # Config is the primordial information item.
    # Instantiate config if needed
    if isinstance(config, str):
        config = AutoConfig.from_pretrained(config, _from_pipeline=task, **hub_kwargs, **model_kwargs)
688
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
689
690
    elif config is None and isinstance(model, str):
        config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
691
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
692
693
694
695
696
697
698
699
700
701
702
703
704

    custom_tasks = {}
    if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
        custom_tasks = config.custom_pipelines
        if task is None and trust_remote_code is not False:
            if len(custom_tasks) == 1:
                task = list(custom_tasks.keys())[0]
            else:
                raise RuntimeError(
                    "We can't infer the task automatically for this model as there are multiple tasks available. Pick "
                    f"one in {', '.join(custom_tasks.keys())}"
                )

705
706
707
708
709
710
711
712
    if task is None and model is not None:
        if not isinstance(model, str):
            raise RuntimeError(
                "Inferring the task automatically requires to check the hub with a model_id defined as a `str`."
                f"{model} is not a valid model_id."
            )
        task = get_task(model, use_auth_token)

713
    # Retrieve the task
Sylvain Gugger's avatar
Sylvain Gugger committed
714
    if task in custom_tasks:
Sylvain Gugger's avatar
Sylvain Gugger committed
715
        normalized_task = task
Sylvain Gugger's avatar
Sylvain Gugger committed
716
717
718
719
720
721
722
723
724
725
726
727
728
729
        targeted_task, task_options = clean_custom_task(custom_tasks[task])
        if pipeline_class is None:
            if not trust_remote_code:
                raise ValueError(
                    "Loading this pipeline requires you to execute the code in the pipeline file in that"
                    " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
                    " set the option `trust_remote_code=True` to remove this error."
                )
            class_ref = targeted_task["impl"]
            module_file, class_name = class_ref.split(".")
            pipeline_class = get_class_from_dynamic_module(
                model, module_file + ".py", class_name, revision=revision, use_auth_token=use_auth_token
            )
    else:
730
        normalized_task, targeted_task, task_options = check_task(task)
Sylvain Gugger's avatar
Sylvain Gugger committed
731
732
        if pipeline_class is None:
            pipeline_class = targeted_task["impl"]
733
734
735
736

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        # At that point framework might still be undetermined
737
738
739
740
741
742
743
        model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options)
        revision = revision if revision is not None else default_revision
        logger.warning(
            f"No model was supplied, defaulted to {model} and revision"
            f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
            "Using a pipeline without specifying a model name and revision in production is not recommended."
        )
Sylvain Gugger's avatar
Sylvain Gugger committed
744
745
        if config is None and isinstance(model, str):
            config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
746
            hub_kwargs["_commit_hash"] = config._commit_hash
747

748
749
750
751
752
753
    if device_map is not None:
        if "device_map" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
                " arguments might conflict, use only one.)"
            )
754
755
756
757
758
        if device is not None:
            logger.warning(
                "Both `device` and `device_map` are specified. `device` will override `device_map`. You"
                " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
            )
759
760
761
762
763
764
765
766
        model_kwargs["device_map"] = device_map
    if torch_dtype is not None:
        if "torch_dtype" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those'
                " arguments might conflict, use only one.)"
            )
        model_kwargs["torch_dtype"] = torch_dtype
767

768
    model_name = model if isinstance(model, str) else None
769

770
771
772
773
774
    # Infer the framework from the model
    # Forced if framework already defined, inferred if it's None
    # Will load the correct model if possible
    model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]}
    framework, model = infer_framework_load_model(
775
776
777
778
779
        model,
        model_classes=model_classes,
        config=config,
        framework=framework,
        task=task,
Sylvain Gugger's avatar
Sylvain Gugger committed
780
        **hub_kwargs,
781
        **model_kwargs,
782
    )
783

784
    model_config = model.config
785
    hub_kwargs["_commit_hash"] = model.config._commit_hash
786

787
788
    load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
    load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
789
    load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
790

Yih-Dar's avatar
Yih-Dar committed
791
792
793
794
795
796
797
798
    # If `model` (instance of `PretrainedModel` instead of `str`) is passed (and/or same for config), while
    # `image_processor` or `feature_extractor` is `None`, the loading will fail. This happens particularly for some
    # vision tasks when calling `pipeline()` with `model` and only one of the `image_processor` and `feature_extractor`.
    # TODO: we need to make `NO_IMAGE_PROCESSOR_TASKS` and `NO_FEATURE_EXTRACTOR_TASKS` more robust to avoid such issue.
    # This block is only temporarily to make CI green.
    if load_image_processor and load_feature_extractor:
        load_feature_extractor = False

799
800
801
802
803
804
805
806
807
808
809
    if (
        tokenizer is None
        and not load_tokenizer
        and normalized_task not in NO_TOKENIZER_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_tokenizer = True
Yih-Dar's avatar
Yih-Dar committed
810
811
812
813
814
815
816
817
818
819
820
821
    if (
        image_processor is None
        and not load_image_processor
        and normalized_task not in NO_IMAGE_PROCESSOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
        and normalized_task != "automatic-speech-recognition"
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_image_processor = True
822
823
824
825
826
827
828
829
830
831
832
833
    if (
        feature_extractor is None
        and not load_feature_extractor
        and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_feature_extractor = True

834
    if task in NO_TOKENIZER_TASKS:
835
        # These will never require a tokenizer.
836
837
838
839
        # the model on the other hand might have a tokenizer, but
        # the files could be missing from the hub, instead of failing
        # on such repos, we just force to not load it.
        load_tokenizer = False
840

841
842
    if task in NO_FEATURE_EXTRACTOR_TASKS:
        load_feature_extractor = False
843
844
    if task in NO_IMAGE_PROCESSOR_TASKS:
        load_image_processor = False
845

846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
    if load_tokenizer:
        # Try to infer tokenizer from model or config name (if provided as str)
        if tokenizer is None:
            if isinstance(model_name, str):
                tokenizer = model_name
            elif isinstance(config, str):
                tokenizer = config
            else:
                # Impossible to guess what is the right tokenizer here
                raise Exception(
                    "Impossible to guess which tokenizer to use. "
                    "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
                )

        # Instantiate tokenizer if needed
        if isinstance(tokenizer, (str, tuple)):
            if isinstance(tokenizer, tuple):
                # For tuple we have (tokenizer name, {kwargs})
                use_fast = tokenizer[1].pop("use_fast", use_fast)
                tokenizer_identifier = tokenizer[0]
                tokenizer_kwargs = tokenizer[1]
            else:
                tokenizer_identifier = tokenizer
                tokenizer_kwargs = model_kwargs

            tokenizer = AutoTokenizer.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
872
                tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
873
874
            )

875
876
877
878
879
880
881
    if load_image_processor:
        # Try to infer image processor from model or config name (if provided as str)
        if image_processor is None:
            if isinstance(model_name, str):
                image_processor = model_name
            elif isinstance(config, str):
                image_processor = config
882
883
884
885
            # Backward compatibility, as `feature_extractor` used to be the name
            # for `ImageProcessor`.
            elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
                image_processor = feature_extractor
886
887
888
889
890
891
892
893
894
895
896
897
898
899
            else:
                # Impossible to guess what is the right image_processor here
                raise Exception(
                    "Impossible to guess which image processor to use. "
                    "Please provide a PreTrainedImageProcessor class or a path/identifier "
                    "to a pretrained image processor."
                )

        # Instantiate image_processor if needed
        if isinstance(image_processor, (str, tuple)):
            image_processor = AutoImageProcessor.from_pretrained(
                image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
            )

900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
    if load_feature_extractor:
        # Try to infer feature extractor from model or config name (if provided as str)
        if feature_extractor is None:
            if isinstance(model_name, str):
                feature_extractor = model_name
            elif isinstance(config, str):
                feature_extractor = config
            else:
                # Impossible to guess what is the right feature_extractor here
                raise Exception(
                    "Impossible to guess which feature extractor to use. "
                    "Please provide a PreTrainedFeatureExtractor class or a path/identifier "
                    "to a pretrained feature extractor."
                )

        # Instantiate feature_extractor if needed
        if isinstance(feature_extractor, (str, tuple)):
            feature_extractor = AutoFeatureExtractor.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
918
                feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
919
920
            )

Nicolas Patry's avatar
Nicolas Patry committed
921
922
923
924
925
926
            if (
                feature_extractor._processor_class
                and feature_extractor._processor_class.endswith("WithLM")
                and isinstance(model_name, str)
            ):
                try:
927
                    import kenlm  # to trigger `ImportError` if not installed
Nicolas Patry's avatar
Nicolas Patry committed
928
929
                    from pyctcdecode import BeamSearchDecoderCTC

930
931
932
933
934
935
936
                    if os.path.isdir(model_name) or os.path.isfile(model_name):
                        decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
                    else:
                        language_model_glob = os.path.join(
                            BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
                        )
                        alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
937
938
                        allow_patterns = [language_model_glob, alphabet_filename]
                        decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
Nicolas Patry's avatar
Nicolas Patry committed
939
940

                    kwargs["decoder"] = decoder
941
                except ImportError as e:
942
943
944
945
946
947
                    logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
                    if not is_kenlm_available():
                        logger.warning("Try to install `kenlm`: `pip install kenlm")

                    if not is_pyctcdecode_available():
                        logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
Nicolas Patry's avatar
Nicolas Patry committed
948

949
950
951
952
953
954
955
956
957
    if task == "translation" and model.config.task_specific_params:
        for key in model.config.task_specific_params:
            if key.startswith("translation"):
                task = key
                warnings.warn(
                    f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"',
                    UserWarning,
                )
                break
958

959
960
961
962
963
964
    if tokenizer is not None:
        kwargs["tokenizer"] = tokenizer

    if feature_extractor is not None:
        kwargs["feature_extractor"] = feature_extractor

965
966
967
    if torch_dtype is not None:
        kwargs["torch_dtype"] = torch_dtype

968
969
970
    if image_processor is not None:
        kwargs["image_processor"] = image_processor

971
972
973
    if device is not None:
        kwargs["device"] = device

974
    return pipeline_class(model=model, framework=framework, task=task, **kwargs)