"...lm-evaluation-harness.git" did not exist on "0a6a9b7ee13805cfff1a7a47c97668d6e337eb73"
__init__.py 43.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sylvain Gugger's avatar
Sylvain Gugger committed
15
16
17
import io
import json
import os
18
import warnings
19
from pathlib import Path
20
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
21

22
from huggingface_hub import model_info
23
from numpy import isin
Sylvain Gugger's avatar
Sylvain Gugger committed
24

25
from ..configuration_utils import PretrainedConfig
Sylvain Gugger's avatar
Sylvain Gugger committed
26
from ..dynamic_module_utils import get_class_from_dynamic_module
27
from ..feature_extraction_utils import PreTrainedFeatureExtractor
28
from ..image_processing_utils import BaseImageProcessor
29
30
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
31
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
32
from ..models.auto.modeling_auto import AutoModelForDepthEstimation
33
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
34
from ..tokenization_utils import PreTrainedTokenizer
35
from ..tokenization_utils_fast import PreTrainedTokenizerFast
36
37
38
from ..utils import (
    HUGGINGFACE_CO_RESOLVE_ENDPOINT,
    is_kenlm_available,
39
    is_offline_mode,
40
41
42
43
44
    is_pyctcdecode_available,
    is_tf_available,
    is_torch_available,
    logging,
)
45
from .audio_classification import AudioClassificationPipeline
46
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
47
48
49
50
51
52
53
54
from .base import (
    ArgumentHandler,
    CsvPipelineDataFormat,
    JsonPipelineDataFormat,
    PipedPipelineDataFormat,
    Pipeline,
    PipelineDataFormat,
    PipelineException,
55
    PipelineRegistry,
56
    get_default_model_and_revision,
57
    infer_framework_load_model,
58
59
)
from .conversational import Conversation, ConversationalPipeline
60
from .depth_estimation import DepthEstimationPipeline
61
from .document_question_answering import DocumentQuestionAnsweringPipeline
62
63
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
64
from .image_classification import ImageClassificationPipeline
65
from .image_segmentation import ImageSegmentationPipeline
66
from .image_to_text import ImageToTextPipeline
67
from .object_detection import ObjectDetectionPipeline
68
69
70
71
72
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline
from .text_classification import TextClassificationPipeline
from .text_generation import TextGenerationPipeline
73
74
75
76
77
78
from .token_classification import (
    AggregationStrategy,
    NerPipeline,
    TokenClassificationArgumentHandler,
    TokenClassificationPipeline,
)
79
from .video_classification import VideoClassificationPipeline
80
from .visual_question_answering import VisualQuestionAnsweringPipeline
81
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
82
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
83
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
84
85
86
87
88
89
90
91
92
93
94
95
96


if is_tf_available():
    import tensorflow as tf

    from ..models.auto.modeling_tf_auto import (
        TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
        TF_MODEL_WITH_LM_HEAD_MAPPING,
        TFAutoModel,
        TFAutoModelForCausalLM,
97
        TFAutoModelForImageClassification,
98
99
100
101
        TFAutoModelForMaskedLM,
        TFAutoModelForQuestionAnswering,
        TFAutoModelForSeq2SeqLM,
        TFAutoModelForSequenceClassification,
Kamal Raj's avatar
Kamal Raj committed
102
        TFAutoModelForTableQuestionAnswering,
103
        TFAutoModelForTokenClassification,
104
        TFAutoModelForVision2Seq,
105
106
107
108
109
110
111
112
113
114
115
116
    )

if is_torch_available():
    import torch

    from ..models.auto.modeling_auto import (
        MODEL_FOR_MASKED_LM_MAPPING,
        MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
        MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
        MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
117
        MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
118
        AutoModel,
119
        AutoModelForAudioClassification,
120
        AutoModelForCausalLM,
121
        AutoModelForCTC,
122
        AutoModelForDocumentQuestionAnswering,
123
        AutoModelForImageClassification,
124
        AutoModelForImageSegmentation,
125
        AutoModelForMaskedLM,
126
        AutoModelForObjectDetection,
127
        AutoModelForQuestionAnswering,
128
        AutoModelForSemanticSegmentation,
129
130
        AutoModelForSeq2SeqLM,
        AutoModelForSequenceClassification,
131
        AutoModelForSpeechSeq2Seq,
132
133
        AutoModelForTableQuestionAnswering,
        AutoModelForTokenClassification,
134
        AutoModelForVideoClassification,
135
        AutoModelForVision2Seq,
136
        AutoModelForVisualQuestionAnswering,
137
        AutoModelForZeroShotObjectDetection,
138
139
140
141
142
143
144
145
146
    )
if TYPE_CHECKING:
    from ..modeling_tf_utils import TFPreTrainedModel
    from ..modeling_utils import PreTrainedModel

logger = logging.get_logger(__name__)


# Register all the supported tasks here
147
148
149
TASK_ALIASES = {
    "sentiment-analysis": "text-classification",
    "ner": "token-classification",
150
    "vqa": "visual-question-answering",
151
}
152
SUPPORTED_TASKS = {
153
154
155
156
    "audio-classification": {
        "impl": AudioClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
157
        "default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}},
158
        "type": "audio",
159
    },
160
161
162
    "automatic-speech-recognition": {
        "impl": AutomaticSpeechRecognitionPipeline,
        "tf": (),
163
        "pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
164
        "default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}},
165
        "type": "multimodal",
166
    },
167
168
    "feature-extraction": {
        "impl": FeatureExtractionPipeline,
169
170
        "tf": (TFAutoModel,) if is_tf_available() else (),
        "pt": (AutoModel,) if is_torch_available() else (),
171
        "default": {"model": {"pt": ("distilbert-base-cased", "935ac13"), "tf": ("distilbert-base-cased", "935ac13")}},
172
        "type": "multimodal",
173
    },
174
    "text-classification": {
175
        "impl": TextClassificationPipeline,
176
177
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
178
179
        "default": {
            "model": {
180
181
                "pt": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
                "tf": ("distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
182
183
            },
        },
184
        "type": "text",
185
    },
186
    "token-classification": {
187
        "impl": TokenClassificationPipeline,
188
189
        "tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (),
        "pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
190
191
        "default": {
            "model": {
192
193
                "pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
                "tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
194
195
            },
        },
196
        "type": "text",
197
198
199
    },
    "question-answering": {
        "impl": QuestionAnsweringPipeline,
200
201
        "tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (),
        "pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (),
202
        "default": {
203
204
205
206
            "model": {
                "pt": ("distilbert-base-cased-distilled-squad", "626af31"),
                "tf": ("distilbert-base-cased-distilled-squad", "626af31"),
            },
207
        },
208
        "type": "text",
209
210
211
    },
    "table-question-answering": {
        "impl": TableQuestionAnsweringPipeline,
212
        "pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
Kamal Raj's avatar
Kamal Raj committed
213
        "tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (),
214
215
        "default": {
            "model": {
216
217
                "pt": ("google/tapas-base-finetuned-wtq", "69ceee2"),
                "tf": ("google/tapas-base-finetuned-wtq", "69ceee2"),
218
219
            },
        },
220
        "type": "text",
221
    },
222
223
224
225
226
    "visual-question-answering": {
        "impl": VisualQuestionAnsweringPipeline,
        "pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
227
            "model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")},
228
229
230
        },
        "type": "multimodal",
    },
231
232
233
234
235
    "document-question-answering": {
        "impl": DocumentQuestionAnsweringPipeline,
        "pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
        "tf": (),
        "default": {
236
            "model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")},
237
238
239
        },
        "type": "multimodal",
    },
240
241
    "fill-mask": {
        "impl": FillMaskPipeline,
242
243
        "tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
        "pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
244
        "default": {"model": {"pt": ("distilroberta-base", "ec58a5b"), "tf": ("distilroberta-base", "ec58a5b")}},
245
        "type": "text",
246
247
248
    },
    "summarization": {
        "impl": SummarizationPipeline,
249
250
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
251
        "default": {"model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("t5-small", "d769bba")}},
252
        "type": "text",
253
254
255
256
    },
    # This task is a special case as it's parametrized by SRC, TGT languages.
    "translation": {
        "impl": TranslationPipeline,
257
258
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
259
        "default": {
260
261
262
            ("en", "fr"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "de"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
            ("en", "ro"): {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
263
        },
264
        "type": "text",
265
266
267
    },
    "text2text-generation": {
        "impl": Text2TextGenerationPipeline,
268
269
        "tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
270
        "default": {"model": {"pt": ("t5-base", "686f1db"), "tf": ("t5-base", "686f1db")}},
271
        "type": "text",
272
273
274
    },
    "text-generation": {
        "impl": TextGenerationPipeline,
275
276
        "tf": (TFAutoModelForCausalLM,) if is_tf_available() else (),
        "pt": (AutoModelForCausalLM,) if is_torch_available() else (),
277
        "default": {"model": {"pt": ("gpt2", "6c0e608"), "tf": ("gpt2", "6c0e608")}},
278
        "type": "text",
279
280
281
    },
    "zero-shot-classification": {
        "impl": ZeroShotClassificationPipeline,
282
283
        "tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
        "pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
284
        "default": {
285
286
            "model": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
            "config": {"pt": ("facebook/bart-large-mnli", "c626438"), "tf": ("roberta-large-mnli", "130fb28")},
287
        },
288
        "type": "text",
289
    },
290
291
292
293
    "zero-shot-image-classification": {
        "impl": ZeroShotImageClassificationPipeline,
        "tf": (TFAutoModel,) if is_tf_available() else (),
        "pt": (AutoModel,) if is_torch_available() else (),
294
295
296
297
298
299
        "default": {
            "model": {
                "pt": ("openai/clip-vit-base-patch32", "f4881ba"),
                "tf": ("openai/clip-vit-base-patch32", "f4881ba"),
            }
        },
300
301
        "type": "multimodal",
    },
302
303
    "conversational": {
        "impl": ConversationalPipeline,
304
305
        "tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (),
        "pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (),
306
307
308
        "default": {
            "model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")}
        },
309
        "type": "text",
310
    },
311
312
    "image-classification": {
        "impl": ImageClassificationPipeline,
313
        "tf": (TFAutoModelForImageClassification,) if is_tf_available() else (),
314
        "pt": (AutoModelForImageClassification,) if is_torch_available() else (),
315
316
317
318
319
320
        "default": {
            "model": {
                "pt": ("google/vit-base-patch16-224", "5dca96d"),
                "tf": ("google/vit-base-patch16-224", "5dca96d"),
            }
        },
321
        "type": "image",
322
    },
323
324
325
    "image-segmentation": {
        "impl": ImageSegmentationPipeline,
        "tf": (),
326
        "pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
327
        "default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
328
        "type": "multimodal",
329
    },
330
331
    "image-to-text": {
        "impl": ImageToTextPipeline,
332
333
334
335
336
337
338
339
340
341
        "tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
        "pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
        "default": {
            "model": {
                "pt": ("ydshieh/vit-gpt2-coco-en", "65636df"),
                "tf": ("ydshieh/vit-gpt2-coco-en", "65636df"),
            }
        },
        "type": "multimodal",
    },
342
343
344
345
    "object-detection": {
        "impl": ObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
346
        "default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}},
347
        "type": "multimodal",
348
    },
349
350
351
352
353
354
355
    "zero-shot-object-detection": {
        "impl": ZeroShotObjectDetectionPipeline,
        "tf": (),
        "pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
        "default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}},
        "type": "multimodal",
    },
356
357
358
359
360
361
362
    "depth-estimation": {
        "impl": DepthEstimationPipeline,
        "tf": (),
        "pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
        "default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}},
        "type": "image",
    },
363
364
365
366
367
368
369
    "video-classification": {
        "impl": VideoClassificationPipeline,
        "tf": (),
        "pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
        "default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}},
        "type": "video",
    },
370
371
}

372
NO_FEATURE_EXTRACTOR_TASKS = set()
373
NO_IMAGE_PROCESSOR_TASKS = set()
374
NO_TOKENIZER_TASKS = set()
375
376
377
378
# Those model configs are special, they are generic over their task, meaning
# any tokenizer/feature_extractor might be use for a given model so we cannot
# use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to
# see if the model defines such objects or not.
Nicolas Patry's avatar
Nicolas Patry committed
379
MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"}
380
381
382
for task, values in SUPPORTED_TASKS.items():
    if values["type"] == "text":
        NO_FEATURE_EXTRACTOR_TASKS.add(task)
383
        NO_IMAGE_PROCESSOR_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
384
    elif values["type"] in {"image", "video"}:
385
        NO_TOKENIZER_TASKS.add(task)
Yih-Dar's avatar
Yih-Dar committed
386
387
388
    elif values["type"] in {"audio"}:
        NO_TOKENIZER_TASKS.add(task)
        NO_IMAGE_PROCESSOR_TASKS.add(task)
389
390
391
    elif values["type"] != "multimodal":
        raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}")

392
393
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)

394

395
396
397
398
def get_supported_tasks() -> List[str]:
    """
    Returns a list of supported task strings.
    """
399
    return PIPELINE_REGISTRY.get_supported_tasks()
400
401


402
def get_task(model: str, use_auth_token: Optional[str] = None) -> str:
403
    if is_offline_mode():
Sylvain Gugger's avatar
Sylvain Gugger committed
404
        raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
405
    try:
406
        info = model_info(model, token=use_auth_token)
407
408
    except Exception as e:
        raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
409
    if not info.pipeline_tag:
410
411
412
        raise RuntimeError(
            f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
        )
413
414
415
    if getattr(info, "library_name", "transformers") != "transformers":
        raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
    task = info.pipeline_tag
416
417
418
    return task


419
def check_task(task: str) -> Tuple[str, Dict, Any]:
420
421
422
423
424
    """
    Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and
    default models if they exist.

    Args:
425
        task (`str`):
426
427
            The task defining which pipeline will be returned. Currently accepted tasks are:

428
429
430
            - `"audio-classification"`
            - `"automatic-speech-recognition"`
            - `"conversational"`
431
432
            - `"depth-estimation"`
            - `"document-question-answering"`
433
434
435
            - `"feature-extraction"`
            - `"fill-mask"`
            - `"image-classification"`
436
437
438
            - `"image-segmentation"`
            - `"image-to-text"`
            - `"object-detection"`
439
            - `"question-answering"`
440
            - `"summarization"`
441
442
443
444
445
446
447
            - `"table-question-answering"`
            - `"text2text-generation"`
            - `"text-classification"` (alias `"sentiment-analysis"` available)
            - `"text-generation"`
            - `"token-classification"` (alias `"ner"` available)
            - `"translation"`
            - `"translation_xx_to_yy"`
448
449
            - `"video-classification"`
            - `"visual-question-answering"`
450
            - `"zero-shot-classification"`
451
            - `"zero-shot-image-classification"`
452
            - `"zero-shot-object-detection"`
453
454

    Returns:
455
456
457
        (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name
        (removed alias and options). The actual dictionary required to initialize the pipeline and some extra task
        options for parametrized tasks like "translation_XX_to_YY"
458
459
460


    """
461
    return PIPELINE_REGISTRY.check_task(task)
462
463


Sylvain Gugger's avatar
Sylvain Gugger committed
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def clean_custom_task(task_info):
    import transformers

    if "impl" not in task_info:
        raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
    pt_class_names = task_info.get("pt", ())
    if isinstance(pt_class_names, str):
        pt_class_names = [pt_class_names]
    task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
    tf_class_names = task_info.get("tf", ())
    if isinstance(tf_class_names, str):
        tf_class_names = [tf_class_names]
    task_info["tf"] = tuple(getattr(transformers, c) for c in tf_class_names)
    return task_info, None


480
def pipeline(
481
    task: str = None,
482
483
    model: Optional = None,
    config: Optional[Union[str, PretrainedConfig]] = None,
484
    tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None,
485
    feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
486
    image_processor: Optional[Union[str, BaseImageProcessor]] = None,
487
488
489
    framework: Optional[str] = None,
    revision: Optional[str] = None,
    use_fast: bool = True,
490
    use_auth_token: Optional[Union[str, bool]] = None,
491
    device: Optional[Union[int, str, "torch.device"]] = None,
492
493
    device_map=None,
    torch_dtype=None,
Sylvain Gugger's avatar
Sylvain Gugger committed
494
    trust_remote_code: Optional[bool] = None,
495
    model_kwargs: Dict[str, Any] = None,
496
    pipeline_class: Optional[Any] = None,
497
    **kwargs,
498
499
) -> Pipeline:
    """
500
    Utility factory method to build a [`Pipeline`].
501
502
503

    Pipelines are made of:

504
505
        - A [tokenizer](tokenizer) in charge of mapping raw textual input to token.
        - A [model](model) to make predictions from the inputs.
506
507
508
        - Some (optional) post processing for enhancing model's output.

    Args:
509
        task (`str`):
510
511
            The task defining which pipeline will be returned. Currently accepted tasks are:

512
            - `"audio-classification"`: will return a [`AudioClassificationPipeline`].
Sylvain Gugger's avatar
Sylvain Gugger committed
513
            - `"automatic-speech-recognition"`: will return a [`AutomaticSpeechRecognitionPipeline`].
514
            - `"conversational"`: will return a [`ConversationalPipeline`].
515
516
            - `"depth-estimation"`: will return a [`DepthEstimationPipeline`].
            - `"document-question-answering"`: will return a [`DocumentQuestionAnsweringPipeline`].
517
518
519
            - `"feature-extraction"`: will return a [`FeatureExtractionPipeline`].
            - `"fill-mask"`: will return a [`FillMaskPipeline`]:.
            - `"image-classification"`: will return a [`ImageClassificationPipeline`].
520
521
522
            - `"image-segmentation"`: will return a [`ImageSegmentationPipeline`].
            - `"image-to-text"`: will return a [`ImageToTextPipeline`].
            - `"object-detection"`: will return a [`ObjectDetectionPipeline`].
523
            - `"question-answering"`: will return a [`QuestionAnsweringPipeline`].
524
            - `"summarization"`: will return a [`SummarizationPipeline`].
525
526
527
528
529
            - `"table-question-answering"`: will return a [`TableQuestionAnsweringPipeline`].
            - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`].
            - `"text-classification"` (alias `"sentiment-analysis"` available): will return a
              [`TextClassificationPipeline`].
            - `"text-generation"`: will return a [`TextGenerationPipeline`]:.
Sylvain Gugger's avatar
Sylvain Gugger committed
530
            - `"token-classification"` (alias `"ner"` available): will return a [`TokenClassificationPipeline`].
531
532
            - `"translation"`: will return a [`TranslationPipeline`].
            - `"translation_xx_to_yy"`: will return a [`TranslationPipeline`].
533
534
            - `"video-classification"`: will return a [`VideoClassificationPipeline`].
            - `"visual-question-answering"`: will return a [`VisualQuestionAnsweringPipeline`].
535
            - `"zero-shot-classification"`: will return a [`ZeroShotClassificationPipeline`].
536
537
            - `"zero-shot-image-classification"`: will return a [`ZeroShotImageClassificationPipeline`].
            - `"zero-shot-object-detection"`: will return a [`ZeroShotObjectDetectionPipeline`].
538
539

        model (`str` or [`PreTrainedModel`] or [`TFPreTrainedModel`], *optional*):
540
            The model that will be used by the pipeline to make predictions. This can be a model identifier or an
Sylvain Gugger's avatar
Sylvain Gugger committed
541
542
            actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch) or
            [`TFPreTrainedModel`] (for TensorFlow).
543

544
545
            If not provided, the default for the `task` will be loaded.
        config (`str` or [`PretrainedConfig`], *optional*):
546
            The configuration that will be used by the pipeline to instantiate the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
547
            identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`].
548
549

            If not provided, the default configuration file for the requested model will be used. That means that if
Sylvain Gugger's avatar
Sylvain Gugger committed
550
551
            `model` is given, its default configuration will be used. However, if `model` is not supplied, this
            `task`'s default model's config is used instead.
552
        tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
553
            The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
554
            identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].
555

Sylvain Gugger's avatar
Sylvain Gugger committed
556
557
558
559
            If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
            is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
            However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
            will be loaded.
560
        feature_extractor (`str` or [`PreTrainedFeatureExtractor`], *optional*):
561
            The feature extractor that will be used by the pipeline to encode data for the model. This can be a model
Sylvain Gugger's avatar
Sylvain Gugger committed
562
            identifier or an actual pretrained feature extractor inheriting from [`PreTrainedFeatureExtractor`].
563
564
565
566

            Feature extractors are used for non-NLP models, such as Speech or Vision models as well as multi-modal
            models. Multi-modal models will also require a tokenizer to be passed.

Sylvain Gugger's avatar
Sylvain Gugger committed
567
568
569
570
            If not provided, the default feature extractor for the given `model` will be loaded (if it is a string). If
            `model` is not specified or not a string, then the default feature extractor for `config` is loaded (if it
            is a string). However, if `config` is also not given or not a string, then the default feature extractor
            for the given `task` will be loaded.
571
        framework (`str`, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
572
573
            The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be
            installed.
574
575

            If no framework is specified, will default to the one currently installed. If no framework is specified and
Sylvain Gugger's avatar
Sylvain Gugger committed
576
577
            both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is
            provided.
Stas Bekman's avatar
Stas Bekman committed
578
        revision (`str`, *optional*, defaults to `"main"`):
579
580
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
581
582
583
584
            artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
        use_fast (`bool`, *optional*, defaults to `True`):
            Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
        use_auth_token (`str` or *bool*, *optional*):
Sylvain Gugger's avatar
Sylvain Gugger committed
585
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
586
            when running `huggingface-cli login` (stored in `~/.huggingface`).
587
588
589
        device (`int` or `str` or `torch.device`):
            Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this
            pipeline will be allocated.
590
591
        device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set
592
593
594
            `device_map="auto"` to compute the most optimized `device_map` automatically (see
            [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload)
            for more information).
595
596
597
598
599
600
601
602
603
604

            <Tip warning={true}>

            Do not use `device_map` AND `device` at the same time as they will conflict

            </Tip>

        torch_dtype (`str` or `torch.dtype`, *optional*):
            Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
            (`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
Sylvain Gugger's avatar
Sylvain Gugger committed
605
606
607
608
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether or not to allow for custom code defined on the Hub in their own modeling, configuration,
            tokenization or even pipeline files. This option should only be set to `True` for repositories you trust
            and in which you have read the code, as it will execute code present on the Hub on your local machine.
Funtowicz Morgan's avatar
Funtowicz Morgan committed
609
        model_kwargs:
Sylvain Gugger's avatar
Sylvain Gugger committed
610
611
            Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
            **model_kwargs)` function.
612
613
614
615
616
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).

    Returns:
617
        [`Pipeline`]: A suitable pipeline for the task.
618

619
    Examples:
620

621
622
    ```python
    >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
623

624
    >>> # Sentiment analysis pipeline
625
    >>> analyzer = pipeline("sentiment-analysis")
626

627
    >>> # Question answering pipeline, specifying the checkpoint identifier
628
629
630
    >>> oracle = pipeline(
    ...     "question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="bert-base-cased"
    ... )
631

632
633
634
    >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
    >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
    >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
635
    >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
636
    ```"""
637
638
    if model_kwargs is None:
        model_kwargs = {}
Sylvain Gugger's avatar
Sylvain Gugger committed
639
640
641
    # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs,
    # this is to keep BC).
    use_auth_token = model_kwargs.pop("use_auth_token", use_auth_token)
642
643
644
645
646
647
    hub_kwargs = {
        "revision": revision,
        "use_auth_token": use_auth_token,
        "trust_remote_code": trust_remote_code,
        "_commit_hash": None,
    }
648
649
650

    if task is None and model is None:
        raise RuntimeError(
651
652
            "Impossible to instantiate a pipeline without either a task or a model "
            "being specified. "
653
654
655
            "Please provide a task class or a model"
        )

656
657
    if model is None and tokenizer is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
658
659
660
            "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
            " may not be compatible with the default model. Please provide a PreTrainedModel class or a"
            " path/identifier to a pretrained model when providing tokenizer."
661
662
663
        )
    if model is None and feature_extractor is not None:
        raise RuntimeError(
Sylvain Gugger's avatar
Sylvain Gugger committed
664
665
666
            "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
            " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
            " or a path/identifier to a pretrained model when providing feature_extractor."
667
        )
668
669
    if isinstance(model, Path):
        model = str(model)
670

Sylvain Gugger's avatar
Sylvain Gugger committed
671
672
673
674
    # Config is the primordial information item.
    # Instantiate config if needed
    if isinstance(config, str):
        config = AutoConfig.from_pretrained(config, _from_pipeline=task, **hub_kwargs, **model_kwargs)
675
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
676
677
    elif config is None and isinstance(model, str):
        config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
678
        hub_kwargs["_commit_hash"] = config._commit_hash
Sylvain Gugger's avatar
Sylvain Gugger committed
679
680
681
682
683
684
685
686
687
688
689
690
691

    custom_tasks = {}
    if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
        custom_tasks = config.custom_pipelines
        if task is None and trust_remote_code is not False:
            if len(custom_tasks) == 1:
                task = list(custom_tasks.keys())[0]
            else:
                raise RuntimeError(
                    "We can't infer the task automatically for this model as there are multiple tasks available. Pick "
                    f"one in {', '.join(custom_tasks.keys())}"
                )

692
693
694
695
696
697
698
699
    if task is None and model is not None:
        if not isinstance(model, str):
            raise RuntimeError(
                "Inferring the task automatically requires to check the hub with a model_id defined as a `str`."
                f"{model} is not a valid model_id."
            )
        task = get_task(model, use_auth_token)

700
    # Retrieve the task
Sylvain Gugger's avatar
Sylvain Gugger committed
701
    if task in custom_tasks:
Sylvain Gugger's avatar
Sylvain Gugger committed
702
        normalized_task = task
Sylvain Gugger's avatar
Sylvain Gugger committed
703
704
705
706
707
708
709
710
711
712
713
714
715
716
        targeted_task, task_options = clean_custom_task(custom_tasks[task])
        if pipeline_class is None:
            if not trust_remote_code:
                raise ValueError(
                    "Loading this pipeline requires you to execute the code in the pipeline file in that"
                    " repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
                    " set the option `trust_remote_code=True` to remove this error."
                )
            class_ref = targeted_task["impl"]
            module_file, class_name = class_ref.split(".")
            pipeline_class = get_class_from_dynamic_module(
                model, module_file + ".py", class_name, revision=revision, use_auth_token=use_auth_token
            )
    else:
717
        normalized_task, targeted_task, task_options = check_task(task)
Sylvain Gugger's avatar
Sylvain Gugger committed
718
719
        if pipeline_class is None:
            pipeline_class = targeted_task["impl"]
720
721
722
723

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        # At that point framework might still be undetermined
724
725
726
727
728
729
730
        model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options)
        revision = revision if revision is not None else default_revision
        logger.warning(
            f"No model was supplied, defaulted to {model} and revision"
            f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
            "Using a pipeline without specifying a model name and revision in production is not recommended."
        )
Sylvain Gugger's avatar
Sylvain Gugger committed
731
732
        if config is None and isinstance(model, str):
            config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
733
            hub_kwargs["_commit_hash"] = config._commit_hash
734

735
736
737
738
739
740
741
742
743
744
745
746
747
748
    if device_map is not None:
        if "device_map" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
                " arguments might conflict, use only one.)"
            )
        model_kwargs["device_map"] = device_map
    if torch_dtype is not None:
        if "torch_dtype" in model_kwargs:
            raise ValueError(
                'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those'
                " arguments might conflict, use only one.)"
            )
        model_kwargs["torch_dtype"] = torch_dtype
749

750
    model_name = model if isinstance(model, str) else None
751

752
753
754
755
756
    # Infer the framework from the model
    # Forced if framework already defined, inferred if it's None
    # Will load the correct model if possible
    model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]}
    framework, model = infer_framework_load_model(
757
758
759
760
761
        model,
        model_classes=model_classes,
        config=config,
        framework=framework,
        task=task,
Sylvain Gugger's avatar
Sylvain Gugger committed
762
        **hub_kwargs,
763
        **model_kwargs,
764
    )
765

766
    model_config = model.config
767
    hub_kwargs["_commit_hash"] = model.config._commit_hash
768

769
770
    load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
    load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
771
    load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
772

Yih-Dar's avatar
Yih-Dar committed
773
774
775
776
777
778
779
780
    # If `model` (instance of `PretrainedModel` instead of `str`) is passed (and/or same for config), while
    # `image_processor` or `feature_extractor` is `None`, the loading will fail. This happens particularly for some
    # vision tasks when calling `pipeline()` with `model` and only one of the `image_processor` and `feature_extractor`.
    # TODO: we need to make `NO_IMAGE_PROCESSOR_TASKS` and `NO_FEATURE_EXTRACTOR_TASKS` more robust to avoid such issue.
    # This block is only temporarily to make CI green.
    if load_image_processor and load_feature_extractor:
        load_feature_extractor = False

781
782
783
784
785
786
787
788
789
790
791
    if (
        tokenizer is None
        and not load_tokenizer
        and normalized_task not in NO_TOKENIZER_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_tokenizer = True
Yih-Dar's avatar
Yih-Dar committed
792
793
794
795
796
797
798
799
800
801
802
803
    if (
        image_processor is None
        and not load_image_processor
        and normalized_task not in NO_IMAGE_PROCESSOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
        and normalized_task != "automatic-speech-recognition"
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_image_processor = True
804
805
806
807
808
809
810
811
812
813
814
815
    if (
        feature_extractor is None
        and not load_feature_extractor
        and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS
        # Using class name to avoid importing the real class.
        and model_config.__class__.__name__ in MULTI_MODEL_CONFIGS
    ):
        # This is a special category of models, that are fusions of multiple models
        # so the model_config might not define a tokenizer, but it seems to be
        # necessary for the task, so we're force-trying to load it.
        load_feature_extractor = True

816
    if task in NO_TOKENIZER_TASKS:
817
        # These will never require a tokenizer.
818
819
820
821
        # the model on the other hand might have a tokenizer, but
        # the files could be missing from the hub, instead of failing
        # on such repos, we just force to not load it.
        load_tokenizer = False
822

823
824
    if task in NO_FEATURE_EXTRACTOR_TASKS:
        load_feature_extractor = False
825
826
    if task in NO_IMAGE_PROCESSOR_TASKS:
        load_image_processor = False
827

828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
    if load_tokenizer:
        # Try to infer tokenizer from model or config name (if provided as str)
        if tokenizer is None:
            if isinstance(model_name, str):
                tokenizer = model_name
            elif isinstance(config, str):
                tokenizer = config
            else:
                # Impossible to guess what is the right tokenizer here
                raise Exception(
                    "Impossible to guess which tokenizer to use. "
                    "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
                )

        # Instantiate tokenizer if needed
        if isinstance(tokenizer, (str, tuple)):
            if isinstance(tokenizer, tuple):
                # For tuple we have (tokenizer name, {kwargs})
                use_fast = tokenizer[1].pop("use_fast", use_fast)
                tokenizer_identifier = tokenizer[0]
                tokenizer_kwargs = tokenizer[1]
            else:
                tokenizer_identifier = tokenizer
                tokenizer_kwargs = model_kwargs

            tokenizer = AutoTokenizer.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
854
                tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
855
856
            )

857
858
859
860
861
862
863
    if load_image_processor:
        # Try to infer image processor from model or config name (if provided as str)
        if image_processor is None:
            if isinstance(model_name, str):
                image_processor = model_name
            elif isinstance(config, str):
                image_processor = config
864
865
866
867
            # Backward compatibility, as `feature_extractor` used to be the name
            # for `ImageProcessor`.
            elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
                image_processor = feature_extractor
868
869
870
871
872
873
874
875
876
877
878
879
880
881
            else:
                # Impossible to guess what is the right image_processor here
                raise Exception(
                    "Impossible to guess which image processor to use. "
                    "Please provide a PreTrainedImageProcessor class or a path/identifier "
                    "to a pretrained image processor."
                )

        # Instantiate image_processor if needed
        if isinstance(image_processor, (str, tuple)):
            image_processor = AutoImageProcessor.from_pretrained(
                image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
            )

882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
    if load_feature_extractor:
        # Try to infer feature extractor from model or config name (if provided as str)
        if feature_extractor is None:
            if isinstance(model_name, str):
                feature_extractor = model_name
            elif isinstance(config, str):
                feature_extractor = config
            else:
                # Impossible to guess what is the right feature_extractor here
                raise Exception(
                    "Impossible to guess which feature extractor to use. "
                    "Please provide a PreTrainedFeatureExtractor class or a path/identifier "
                    "to a pretrained feature extractor."
                )

        # Instantiate feature_extractor if needed
        if isinstance(feature_extractor, (str, tuple)):
            feature_extractor = AutoFeatureExtractor.from_pretrained(
Sylvain Gugger's avatar
Sylvain Gugger committed
900
                feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
901
902
            )

Nicolas Patry's avatar
Nicolas Patry committed
903
904
905
906
907
908
            if (
                feature_extractor._processor_class
                and feature_extractor._processor_class.endswith("WithLM")
                and isinstance(model_name, str)
            ):
                try:
909
                    import kenlm  # to trigger `ImportError` if not installed
Nicolas Patry's avatar
Nicolas Patry committed
910
911
                    from pyctcdecode import BeamSearchDecoderCTC

912
913
914
915
916
917
918
                    if os.path.isdir(model_name) or os.path.isfile(model_name):
                        decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
                    else:
                        language_model_glob = os.path.join(
                            BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
                        )
                        alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
919
920
                        allow_patterns = [language_model_glob, alphabet_filename]
                        decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
Nicolas Patry's avatar
Nicolas Patry committed
921
922

                    kwargs["decoder"] = decoder
923
                except ImportError as e:
924
925
926
927
928
929
                    logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
                    if not is_kenlm_available():
                        logger.warning("Try to install `kenlm`: `pip install kenlm")

                    if not is_pyctcdecode_available():
                        logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
Nicolas Patry's avatar
Nicolas Patry committed
930

931
932
933
934
935
936
937
938
939
    if task == "translation" and model.config.task_specific_params:
        for key in model.config.task_specific_params:
            if key.startswith("translation"):
                task = key
                warnings.warn(
                    f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"',
                    UserWarning,
                )
                break
940

941
942
943
944
945
946
    if tokenizer is not None:
        kwargs["tokenizer"] = tokenizer

    if feature_extractor is not None:
        kwargs["feature_extractor"] = feature_extractor

947
948
949
    if torch_dtype is not None:
        kwargs["torch_dtype"] = torch_dtype

950
951
952
    if image_processor is not None:
        kwargs["image_processor"] = image_processor

953
954
955
    if device is not None:
        kwargs["device"] = device

956
    return pipeline_class(model=model, framework=framework, task=task, **kwargs)