Update doc for {Summarization,Translation}Pipeline and other tweaks

83703cd0 · Julien Chaumond · a1b3b416 · 83703cd0 · 83703cd0 · 83703cd0
Commit 83703cd0 authored Apr 07, 2020 by Julien Chaumond
9 changed files
--- a/README.md
+++ b/README.md
@@ -538,6 +538,8 @@ You can create `Pipeline` objects for the following down-stream tasks:
 - `text-classification`: Initialize a `TextClassificationPipeline` directly, or see `sentiment-analysis` for an example.
 - `question-answering`: Provided some context and a question refering to the context, it will extract the answer to the question in the context.
 - `fill-mask`: Takes an input sequence containing a masked token (e.g. `<mask>`) and return list of most probable filled sequences, with their probabilities.
+ - `summarization`
+ - `translation_xx_to_yy`
 ```python
 from transformers import pipeline

--- a/model_cards/bart-large-cnn/README.md
+++ b/model_cards/bart-large-cnn/README.md
+---
+tags:
+- summarization
+---
--- a/model_cards/bart-large-xsum/README.md
+++ b/model_cards/bart-large-xsum/README.md
+---
+tags:
+- summarization
+---
--- a/model_cards/t5-11b-README.md
+++ b/model_cards/t5-11b-README.md
+---
+tags:
+- summarization
+- translation
+---
--- a/model_cards/t5-3b-README.md
+++ b/model_cards/t5-3b-README.md
+---
+tags:
+- summarization
+- translation
+---
--- a/model_cards/t5-base-README.md
+++ b/model_cards/t5-base-README.md
+---
+tags:
+- summarization
+- translation
+---
--- a/model_cards/t5-large-README.md
+++ b/model_cards/t5-large-README.md
+---
+tags:
+- summarization
+- translation
+---
--- a/model_cards/t5-small-README.md
+++ b/model_cards/t5-small-README.md
+---
+tags:
+- summarization
+- translation
+---
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -564,8 +564,8 @@ class TextClassificationPipeline(Pipeline):
    - "sentiment-analysis", for classifying sequences according to positive or negative sentiments.
    The models that this pipeline can use are models that have been fine-tuned on a sequence classification task.
-    See the list of available community models fine-tuned on such a task on
+    See the up-to-date list of available models on
-    `huggingface.co/models <https://huggingface.co/models?search=&filter=text-classification>`__.
+    `huggingface.co/models <https://huggingface.co/models?filter=text-classification>`__.
    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
@@ -608,8 +608,8 @@ class FillMaskPipeline(Pipeline):
    The models that this pipeline can use are models that have been trained with a masked language modeling objective,
    which includes the bi-directional models in the library.
-    See the list of available community models on
+    See the up-to-date list of available models on
-    `huggingface.co/models <https://huggingface.co/models?search=&filter=lm-head>`__.
+    `huggingface.co/models <https://huggingface.co/models?filter=lm-head>`__.
    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
@@ -707,8 +707,8 @@ class NerPipeline(Pipeline):
    - "ner", for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous.
    The models that this pipeline can use are models that have been fine-tuned on a token classification task.
-    See the list of available community models fine-tuned on such a task on
+    See the up-to-date list of available models on
-    `huggingface.co/models <https://huggingface.co/models?search=&filter=token-classification>`__.
+    `huggingface.co/models <https://huggingface.co/models?filter=token-classification>`__.
    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
@@ -883,8 +883,8 @@ class QuestionAnsweringPipeline(Pipeline):
    - "question-answering", for answering questions given a context.
    The models that this pipeline can use are models that have been fine-tuned on a question answering task.
-    See the list of available community models fine-tuned on such a task on
+    See the up-to-date list of available models on
-    `huggingface.co/models <https://huggingface.co/models?search=&filter=question-answering>`__.
+    `huggingface.co/models <https://huggingface.co/models?filter=question-answering>`__.
    Arguments:
        model (:obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`):
@@ -1147,8 +1147,10 @@ class SummarizationPipeline(Pipeline):
        summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="tf")
        summarizer("Sam Shleifer writes the best docstring examples in the whole world.", min_length=5, max_length=20)
-    Supported Models:
+    The models that this pipeline can use are models that have been fine-tuned on a summarization task,
-        The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is currently, '`bart-large-cnn`', '`t5-small`', '`t5-base`', '`t5-large`', '`t5-3b`', '`t5-11b`'.
+    which is currently, '`bart-large-cnn`', '`t5-small`', '`t5-base`', '`t5-large`', '`t5-3b`', '`t5-11b`'.
+    See the up-to-date list of available models on
+    `huggingface.co/models <https://huggingface.co/models?filter=summarization>`__.
    Arguments:
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
@@ -1276,7 +1278,10 @@ class TranslationPipeline(Pipeline):
        en_fr_translator = pipeline("translation_en_to_fr")
        en_fr_translator("How old are you?")
-    Supported Models: "t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"
+    The models that this pipeline can use are models that have been fine-tuned on a translation task,
+    currently: "t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"
+    See the up-to-date list of available models on
+    `huggingface.co/models <https://huggingface.co/models?filter=translation>`__.
    Arguments:
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
@@ -1505,25 +1510,27 @@ def pipeline(
            - "ner": will return a :class:`~transformers.NerPipeline`
            - "question-answering": will return a :class:`~transformers.QuestionAnsweringPipeline`
            - "fill-mask": will return a :class:`~transformers.FillMaskPipeline`
+            - "summarization": will return a :class:`~transformers.SummarizationPipeline`
+            - "translation_xx_to_yy": will return a :class:`~transformers.TranslationPipeline`
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`, defaults to :obj:`None`):
-            The model that will be used by the pipeline to make predictions. This can be :obj:`None`, a string
+            The model that will be used by the pipeline to make predictions. This can be :obj:`None`,
-            checkpoint identifier or an actual pre-trained model inheriting from
+            a model identifier or an actual pre-trained model inheriting from
            :class:`~transformers.PreTrainedModel` for PyTorch and :class:`~transformers.TFPreTrainedModel` for
            TensorFlow.
-            If :obj:`None`, the default of the pipeline will be loaded.
+            If :obj:`None`, the default for this pipeline will be loaded.
        config (:obj:`str` or :obj:`~transformers.PretrainedConfig`, `optional`, defaults to :obj:`None`):
            The configuration that will be used by the pipeline to instantiate the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained model configuration inheriting from
+            a model identifier or an actual pre-trained model configuration inheriting from
            :class:`~transformers.PretrainedConfig`.
-            If :obj:`None`, the default of the pipeline will be loaded.
+            If :obj:`None`, the default for this pipeline will be loaded.
        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`, defaults to :obj:`None`):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be :obj:`None`,
-            a string checkpoint identifier or an actual pre-trained tokenizer inheriting from
+            a model identifier or an actual pre-trained tokenizer inheriting from
            :class:`~transformers.PreTrainedTokenizer`.
-            If :obj:`None`, the default of the pipeline will be loaded.
+            If :obj:`None`, the default for this pipeline will be loaded.
        framework (:obj:`str`, `optional`, defaults to :obj:`None`):
            The framework to use, either "pt" for PyTorch or "tf" for TensorFlow. The specified framework must be
            installed.
@@ -1549,11 +1556,6 @@ def pipeline(
        model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
        tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
        pipeline('ner', model=model, tokenizer=tokenizer)
-        # Named entity recognition pipeline, passing a model and configuration with a HTTPS URL.
-        model_url = "https://s3.amazonaws.com/models.huggingface.co/bert/dbmdz/bert-large-cased-finetuned-conll03-english/pytorch_model.bin"
-        config_url = "https://s3.amazonaws.com/models.huggingface.co/bert/dbmdz/bert-large-cased-finetuned-conll03-english/config.json"
-        pipeline('ner', model=model_url, config=config_url, tokenizer='bert-base-cased')
    """
    # Retrieve the task
    if task not in SUPPORTED_TASKS:
@@ -1579,7 +1581,7 @@ def pipeline(
            # Impossible to guest what is the right tokenizer here
            raise Exception(
                "Impossible to guess which tokenizer to use. "
-                "Please provided a PretrainedTokenizer class or a path/url/shortcut name to a pretrained tokenizer."
+                "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
            )
    modelcard = None
@@ -1623,4 +1625,4 @@ def pipeline(
            )
        model = model_class.from_pretrained(model, config=config, **model_kwargs)
-    return task_class(model=model, tokenizer=tokenizer, modelcard=modelcard, framework=framework, task=task, **kwargs,)
+    return task_class(model=model, tokenizer=tokenizer, modelcard=modelcard, framework=framework, task=task, **kwargs)