Fix doc errors and typos across the board (#8139)

* Fix doc errors and typos across the board * Fix a typo * Fix the CI * Fix more typos * Fix CI * More fixes * Fix CI * More fixes * More fixes

Fix doc errors and typos across the board (#8139)
* Fix doc errors and typos across the board * Fix a typo * Fix the CI * Fix more typos * Fix CI * More fixes * Fix CI * More fixes * More fixes
969859d5 · Santiago Castro · GitHub · 4731a00c · 969859d5 · 969859d5
Unverified Commit 969859d5 authored Oct 29, 2020 by Santiago Castro Committed by GitHub Oct 29, 2020
20 changed files
--- a/src/transformers/configuration_lxmert.py
+++ b/src/transformers/configuration_lxmert.py
@@ -57,7 +57,7 @@ class LxmertConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
@@ -95,10 +95,9 @@ class LxmertConfig(PretrainedConfig):
            Whether or not to add masked language modeling (as used in pretraining models such as BERT) to the loss
            objective.
        task_obj_predict (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether or not to add object predicition, attribute predicition and feature regression to the loss
+            Whether or not to add object prediction, attribute ppredictionand feature regression to the loss objective.
-            objective.
        task_qa (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether or not to add the question-asnwering loss to the objective
+            Whether or not to add the question-asansweringoss to the objective
        visual_obj_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether or not to calculate the object-prediction loss objective
        visual_attr_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
@@ -106,10 +105,10 @@ class LxmertConfig(PretrainedConfig):
        visual_feat_loss (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether or not to calculate the feature-regression loss objective
        output_attentions (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the model should return the attentions from the vision, langauge, and cross-modality layers
+            Whether or not the model should return the attentions from the vision, language, and cross-modality layers
            should be returned.
        output_hidden_states (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the model should return the hidden states from the vision, langauge, and cross-modality
+            Whether or not the model should return the hidden states from the vision, language, and cross-modality
            layers should be returned.
    """

--- a/src/transformers/configuration_marian.py
+++ b/src/transformers/configuration_marian.py
@@ -52,7 +52,7 @@ class MarianConfig(BartConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        dropout (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        activation_dropout (:obj:`float`, `optional`, defaults to 0.0):

--- a/src/transformers/configuration_mbart.py
+++ b/src/transformers/configuration_mbart.py
@@ -57,7 +57,7 @@ class MBartConfig(BartConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        dropout (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        activation_dropout (:obj:`float`, `optional`, defaults to 0.0):

--- a/src/transformers/configuration_pegasus.py
+++ b/src/transformers/configuration_pegasus.py
@@ -96,7 +96,7 @@ class PegasusConfig(BartConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        dropout (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        activation_dropout (:obj:`float`, `optional`, defaults to 0.0):

--- a/src/transformers/configuration_prophetnet.py
+++ b/src/transformers/configuration_prophetnet.py
@@ -60,7 +60,7 @@ class ProphetNetConfig(PretrainedConfig):
        attention_dropout (:obj:`float`, `optional`, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        dropout (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):
            The maximum sequence length that this model might ever be used with. Typically set this to something large
            just in case (e.g., 512 or 1024 or 2048).

--- a/src/transformers/configuration_rag.py
+++ b/src/transformers/configuration_rag.py
@@ -30,7 +30,7 @@ RAG_CONFIG_DOC = r"""
            Separator inserted between the title and the text of the retrieved document when calling
            :class:`~transformers.RagRetriever`.
        doc_sep (:obj:`str`, `optional`, defaults to  ``" // "``):
-            Separator inserted between the the text of the retrieved document and the original input when calliang
+            Separator inserted between the the text of the retrieved document and the original input when calling
            :class:`~transformers.RagRetriever`.
        n_docs (:obj:`int`, `optional`, defaults to 5):
            Number of documents to retrieve.
@@ -39,7 +39,7 @@ RAG_CONFIG_DOC = r"""
        retrieval_vector_size (:obj:`int`, `optional`, defaults to 768):
            Dimensionality of the document embeddings indexed by :class:`~transformers.RagRetriever`.
        retrieval_batch_size (:obj:`int`, `optional`, defaults to 8):
-            Retrieval batch size, defined as the number of queries issues concurrently to the faiss index excapsulated
+            Retrieval batch size, defined as the number of queries issues concurrently to the faiss index encapsulated
            :class:`~transformers.RagRetriever`.
        dataset (:obj:`str`, `optional`, defaults to :obj:`"wiki_dpr"`):
            A dataset identifier of the indexed dataset in HuggingFace Datasets (list all available datasets and ids

--- a/src/transformers/configuration_reformer.py
+++ b/src/transformers/configuration_reformer.py
@@ -82,7 +82,7 @@ class ReformerConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the feed forward layer in the residual attention
            block. If string, :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.05):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        hidden_size (:obj:`int`, `optional`, defaults to 256):
            Dimensionality of the output hidden states of the residual attention blocks.
        initializer_range (:obj:`float`, `optional`, defaults to 0.02):

--- a/src/transformers/configuration_retribert.py
+++ b/src/transformers/configuration_retribert.py
@@ -20,7 +20,7 @@ from .utils import logging
 logger = logging.get_logger(__name__)
-# TODO: uploadto AWS
+# TODO: upload to AWS
 RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    "retribert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-config.json",
 }
@@ -51,7 +51,7 @@ class RetriBertConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):

--- a/src/transformers/configuration_squeezebert.py
+++ b/src/transformers/configuration_squeezebert.py
@@ -52,7 +52,7 @@ class SqueezeBertConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string,
            :obj:`"gelu"`, :obj:`"relu"`, :obj:`"swish"` and :obj:`"gelu_new"` are supported.
        hidden_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (:obj:`float`, `optional`, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (:obj:`int`, `optional`, defaults to 512):

--- a/src/transformers/configuration_transfo_xl.py
+++ b/src/transformers/configuration_transfo_xl.py
@@ -77,7 +77,7 @@ class TransfoXLConfig(PretrainedConfig):
        adaptive (:obj:`boolean`, `optional`, defaults to :obj:`True`):
            Whether or not to use adaptive softmax.
        dropout (:obj:`float`, `optional`, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        dropatt (:obj:`float`, `optional`, defaults to 0):
            The dropout ratio for the attention probabilities.
        untie_r (:obj:`boolean`, `optional`, defaults to :obj:`True`):

--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -83,7 +83,7 @@ def generate_identified_filename(filename: Path, identifier: str) -> Path:
        filename: pathlib.Path The actual path object we would like to add an identifier suffix
        identifier: The suffix to add
-    Returns: String with concatenated indentifier at the end of the filename
+    Returns: String with concatenated identifier at the end of the filename
    """
    return filename.parent.joinpath(filename.stem + identifier).with_suffix(filename.suffix)

--- a/src/transformers/convert_longformer_original_pytorch_lightning_to_pytorch.py
+++ b/src/transformers/convert_longformer_original_pytorch_lightning_to_pytorch.py
@@ -30,7 +30,7 @@ class LightningModel(pl.LightningModule):
        self.num_labels = 2
        self.qa_outputs = torch.nn.Linear(self.model.config.hidden_size, self.num_labels)
-    # implement only because lighning requires to do so
+    # implement only because lightning requires to do so
    def forward(self):
        pass
@@ -57,7 +57,7 @@ def convert_longformer_qa_checkpoint_to_pytorch(
    # save model
    longformer_for_qa.save_pretrained(pytorch_dump_folder_path)
-    print("Conversion succesful. Model saved under {}".format(pytorch_dump_folder_path))
+    print("Conversion successful. Model saved under {}".format(pytorch_dump_folder_path))
 if __name__ == "__main__":
@@ -75,7 +75,7 @@ if __name__ == "__main__":
        default=None,
        type=str,
        required=True,
-        help="Path the official PyTorch Lighning Checkpoint.",
+        help="Path the official PyTorch Lightning Checkpoint.",
    )
    parser.add_argument(
        "--pytorch_dump_folder_path", default=None, type=str, required=True, help="Path to the output PyTorch model."

--- a/src/transformers/convert_marian_tatoeba_to_pytorch.py
+++ b/src/transformers/convert_marian_tatoeba_to_pytorch.py
@@ -34,7 +34,7 @@ class TatoebaConverter:
        1. convert numpy state dict to hf format (same code as OPUS-MT-Train conversion).
        2. rename opus model to huggingface format. This means replace each alpha3 code with an alpha2 code if a unique
-           one existes. e.g. aav-eng -> aav-en, heb-eng -> he-en
+           one exists. e.g. aav-eng -> aav-en, heb-eng -> he-en
        3. write a model card containing the original Tatoeba-Challenge/README.md and extra info about alpha3 group
           members.
    """

--- a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py
+++ b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py
@@ -123,7 +123,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "--force_download",
        action="store_true",
-        help="Re-dowload checkpoints.",
+        help="Re-download checkpoints.",
    )
    args = parser.parse_args()

--- a/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
@@ -104,7 +104,7 @@ if __name__ == "__main__":
        "--finetuning_task",
        default=None,
        type=str,
-        help="Name of a task on which the XLNet TensorFloaw model was fine-tuned",
+        help="Name of a task on which the XLNet TensorFlow model was fine-tuned",
    )
    args = parser.parse_args()
    print(args)

--- a/src/transformers/data/data_collator.py
+++ b/src/transformers/data/data_collator.py
@@ -330,7 +330,7 @@ class DataCollatorForSOP(DataCollatorForLanguageModeling):
        input_ids, labels, attention_mask = self.mask_tokens(input_ids)
        token_type_ids = [example["token_type_ids"] for example in examples]
-        # size of segment_ids varied because randomness, padding zero to the end as the orignal implementation
+        # size of segment_ids varied because randomness, padding zero to the end as the original implementation
        token_type_ids = pad_sequence(token_type_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
        sop_label_list = [example["sentence_order_label"] for example in examples]

--- a/src/transformers/data/datasets/language_modeling.py
+++ b/src/transformers/data/datasets/language_modeling.py
@@ -71,7 +71,7 @@ class TextDataset(Dataset):
                        tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
                    )
                # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
-                # If your dataset is small, first you should loook for a bigger one :-) and second you
+                # If your dataset is small, first you should look for a bigger one :-) and second you
                # can change this behavior by adding (model specific) padding.
                start = time.time()

--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -327,7 +327,7 @@ def squad_convert_examples_to_features(
        padding_strategy: Default to "max_length". Which padding strategy to use
        return_dataset: Default False. Either 'pt' or 'tf'.
            if 'pt': returns a torch.data.TensorDataset, if 'tf': returns a tf.data.Dataset
-        threads: multiple processing threadsa-smi
+        threads: multiple processing threads.
    Returns:
@@ -527,7 +527,7 @@ def squad_convert_examples_to_features(
 class SquadProcessor(DataProcessor):
    """
-    Processor for the SQuAD data set. Overriden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
+    Processor for the SQuAD data set. overridden by SquadV1Processor and SquadV2Processor, used by the version 1.1 and
    version 2.0 of SQuAD, respectively.
    """

--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -245,9 +245,6 @@ class SingleSentenceClassificationProcessor(DataProcessor):
        Args:
            tokenizer: Instance of a tokenizer that will tokenize the examples
            max_length: Maximum example length
-            task: GLUE task
-            label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
-            output_mode: String indicating the output mode. Either ``regression`` or ``classification``
            pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
            pad_token: Padding token
            mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values

--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -89,7 +89,7 @@ try:
    # Check we're not importing a "datasets" directory somewhere
    _datasets_available = hasattr(datasets, "__version__") and hasattr(datasets, "load_dataset")
    if _datasets_available:
-        logger.debug(f"Succesfully imported datasets version {datasets.__version__}")
+        logger.debug(f"Successfully imported datasets version {datasets.__version__}")
    else:
        logger.debug("Imported a datasets object but this doesn't seem to be the 🤗 datasets library.")
@@ -147,7 +147,7 @@ try:
    import faiss  # noqa: F401
    _faiss_available = True
-    logger.debug(f"Succesfully imported faiss version {faiss.__version__}")
+    logger.debug(f"Successfully imported faiss version {faiss.__version__}")
 except ImportError:
    _faiss_available = False
@@ -290,7 +290,7 @@ def torch_only_method(fn):
 # docstyle-ignore
 DATASETS_IMPORT_ERROR = """
-{0} requires the 🤗 Datasets library but it was not found in your enviromnent. You can install it with:
+{0} requires the 🤗 Datasets library but it was not found in your environment. You can install it with:
 ```
 pip install datasets
 ```
@@ -308,7 +308,7 @@ that python file if that's the case.
 # docstyle-ignore
 TOKENIZERS_IMPORT_ERROR = """
-{0} requires the 🤗 Tokenizers library but it was not found in your enviromnent. You can install it with:
+{0} requires the 🤗 Tokenizers library but it was not found in your environment. You can install it with:
 ```
 pip install tokenizers
 ```
@@ -321,30 +321,30 @@ In a notebook or a colab, you can install it by executing a cell with
 # docstyle-ignore
 SENTENCEPIECE_IMPORT_ERROR = """
-{0} requires the SentencePiece library but it was not found in your enviromnent. Checkout the instructions on the
+{0} requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
 installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
-that match your enviromnent.
+that match your environment.
 """
 # docstyle-ignore
 FAISS_IMPORT_ERROR = """
-{0} requires the faiss library but it was not found in your enviromnent. Checkout the instructions on the
+{0} requires the faiss library but it was not found in your environment. Checkout the instructions on the
 installation page of its repo: https://github.com/facebookresearch/faiss/blob/master/INSTALL.md and follow the ones
-that match your enviromnent.
+that match your environment.
 """
 # docstyle-ignore
 PYTORCH_IMPORT_ERROR = """
-{0} requires the PyTorch library but it was not found in your enviromnent. Checkout the instructions on the
+{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
-installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your enviromnent.
+installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
 """
 # docstyle-ignore
 SKLEARN_IMPORT_ERROR = """
-{0} requires the scikit-learn library but it was not found in your enviromnent. You can install it with:
+{0} requires the scikit-learn library but it was not found in your environment. You can install it with:
 ```
 pip install -U scikit-learn
 ```
@@ -357,15 +357,15 @@ In a notebook or a colab, you can install it by executing a cell with
 # docstyle-ignore
 TENSORFLOW_IMPORT_ERROR = """
-{0} requires the TensorFlow library but it was not found in your enviromnent. Checkout the instructions on the
+{0} requires the TensorFlow library but it was not found in your environment. Checkout the instructions on the
-installation page: https://www.tensorflow.org/install and follow the ones that match your enviromnent.
+installation page: https://www.tensorflow.org/install and follow the ones that match your environment.
 """
 # docstyle-ignore
 FLAX_IMPORT_ERROR = """
-{0} requires the FLAX library but it was not found in your enviromnent. Checkout the instructions on the
+{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the
-installation page: https://github.com/google/flax and follow the ones that match your enviromnent.
+installation page: https://github.com/google/flax and follow the ones that match your environment.
 """
@@ -918,13 +918,13 @@ def cached_path(
    Args:
        cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
-        force_download: if True, re-dowload the file even if it's already cached in the cache dir.
+        force_download: if True, re-download the file even if it's already cached in the cache dir.
-        resume_download: if True, resume the download if incompletly recieved file is found.
+        resume_download: if True, resume the download if incompletely received file is found.
        user_agent: Optional string or dict that will be appended to the user-agent on remote requests.
        extract_compressed_file: if True and the path point to a zip or tar file, extract the compressed
            file in a folder along the archive.
        force_extract: if True when extract_compressed_file is True and the archive was already extracted,
-            re-extract the archive and overide the folder where it was extracted.
+            re-extract the archive and override the folder where it was extracted.
    Return:
        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk). Local path (string)