fixed lots of typos. (#7758)

7e73c128 · Tiger · GitHub · 8cb4ecca · 7e73c128 · 7e73c128
Unverified Commit 7e73c128 authored Oct 13, 2020 by Tiger Committed by GitHub Oct 13, 2020
20 changed files
--- a/docs/source/main_classes/callback.rst
+++ b/docs/source/main_classes/callback.rst
@@ -12,7 +12,7 @@ subclass :class:`~transformers.Trainer` and override the methods you need (see :
 By default a :class:`~transformers.Trainer` will use the following callbacks:
- :class:`~transformers.DefaultFlowCallback` which handles the default beahvior for logging, saving and evaluation.
+- :class:`~transformers.DefaultFlowCallback` which handles the default behavior for logging, saving and evaluation.
 - :class:`~transformers.PrinterCallback` or :class:`~transformers.ProrgressCallback` to display progress and print the
  logs (the first one is used if you deactivate tqdm through the :class:`~transformers.TrainingArguments`, otherwise
  it's the second one).

--- a/docs/source/main_classes/trainer.rst
+++ b/docs/source/main_classes/trainer.rst
@@ -15,7 +15,7 @@ Both :class:`~transformers.Trainer` and :class:`~transformers.TFTrainer` contain
 previous features. To inject custom behavior you can subclass them and override the following methods:
 - **get_train_dataloader**/**get_train_tfdataset** -- Creates the training DataLoader (PyTorch) or TF Dataset.
- **get_eval_dataloader**/**get_eval_tfdataset** -- Creates the evaulation DataLoader (PyTorch) or TF Dataset.
+- **get_eval_dataloader**/**get_eval_tfdataset** -- Creates the evaluation DataLoader (PyTorch) or TF Dataset.
 - **get_test_dataloader**/**get_test_tfdataset** -- Creates the test DataLoader (PyTorch) or TF Dataset.
 - **log** -- Logs information on the various objects watching training.
 - **create_optimizer_and_scheduler** -- Setups the optimizer and learning rate scheduler if they were not passed at

--- a/docs/source/philosophy.rst
+++ b/docs/source/philosophy.rst
@@ -66,7 +66,7 @@ The library is built around three types of classes for each model:
 All these classes can be instantiated from pretrained instances and saved locally using two methods:
 - :obj:`from_pretrained()` lets you instantiate a model/configuration/tokenizer from a pretrained version either
-  provided by the library itself (the suported models are provided in the list :doc:`here <pretrained_models>`
+  provided by the library itself (the supported models are provided in the list :doc:`here <pretrained_models>`
  or stored locally (or on a server) by the user,
 - :obj:`save_pretrained()` lets you save a model/configuration/tokenizer locally so that it can be reloaded using
  :obj:`from_pretrained()`.

--- a/examples/seq2seq/bertabs/README.md
+++ b/examples/seq2seq/bertabs/README.md
@@ -39,7 +39,7 @@ python run_summarization.py \
    --compute_rouge true
 ```
-The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not suported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
+The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not supported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
 ## Summarize any text

--- a/src/transformers/configuration_mmbt.py
+++ b/src/transformers/configuration_mmbt.py
@@ -31,7 +31,7 @@ class MMBTConfig(object):
            Config of the underlying Transformer models. Its values are copied over to use a single config.
        num_labels (:obj:`int`, `optional`):
            Size of final Linear layer for classification.
-        modal_hidden_size (:obj:`int`, `optional`, defautls to 2048):
+        modal_hidden_size (:obj:`int`, `optional`, defaults to 2048):
            Embedding dimension of the non-text modality encoder.
    """

--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -274,7 +274,7 @@ class PretrainedConfig(object):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Wheter or not to force to (re-)download the configuration files and override the cached versions if they
+                Whether or not to force to (re-)download the configuration files and override the cached versions if they
                exist.
            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file

--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -211,7 +211,7 @@ def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokeniz
        pipeline_name: The kind of pipeline to use (ner, question-answering, etc.)
        framework: The actual model to convert the pipeline from ("pt" or "tf")
        model: The model name which will be loaded by the pipeline
-        tokenizer: The tokenizer name which will be loaded by the pipeline, defaut to the model's value
+        tokenizer: The tokenizer name which will be loaded by the pipeline, default to the model's value
    Returns: Pipeline object

--- a/src/transformers/data/processors/squad.py
+++ b/src/transformers/data/processors/squad.py
@@ -560,7 +560,7 @@ class SquadProcessor(DataProcessor):
        Args:
            dataset: The tfds dataset loaded from `tensorflow_datasets.load("squad")`
-            evaluate: boolean specifying if in evaluation mode or in training mode
+            evaluate: Boolean specifying if in evaluation mode or in training mode
        Returns:
            List of SquadExample

--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -1093,7 +1093,7 @@ def is_tensor(x):
 class ModelOutput(OrderedDict):
    """
    Base class for all model outputs as dataclass. Has a ``__getitem__`` that allows indexing by integer or slice (like
-    a tuple) or strings (like a dictionnary) that will ignore the ``None`` attributes. Otherwise behaves like a
+    a tuple) or strings (like a dictionary) that will ignore the ``None`` attributes. Otherwise behaves like a
    regular python dictionary.
    .. warning::

--- a/src/transformers/integrations.py
+++ b/src/transformers/integrations.py
@@ -197,7 +197,7 @@ class TensorBoardCallback(TrainerCallback):
    Args:
        tb_writer (:obj:`SummaryWriter`, `optional`):
-            The writer to use. Will instatiate one if not set.
+            The writer to use. Will instantiate one if not set.
    """
    def __init__(self, tb_writer=None):

--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -507,7 +507,7 @@ AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
                request.
            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
                messages.
            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to only look at local files (e.g., not try doanloading the model).

--- a/src/transformers/modeling_tf_auto.py
+++ b/src/transformers/modeling_tf_auto.py
@@ -390,7 +390,7 @@ TF_AUTO_MODEL_PRETRAINED_DOCSTRING = r"""
                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
                request.
            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
                messages.
            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to only look at local files (e.g., not try doanloading the model).

--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -569,7 +569,7 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin):
                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
                request.
            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
                messages.
            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to only look at local files (e.g., not try doanloading the model).

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -802,7 +802,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each
                request.
            output_loading_info(:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Whether ot not to also return a dictionnary containing missing keys, unexpected keys and error
+                Whether ot not to also return a dictionary containing missing keys, unexpected keys and error
                messages.
            local_files_only(:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to only look at local files (e.g., not try doanloading the model).

--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -169,7 +169,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
        epsilon (:obj:`float`, `optional`, defaults to 1e-7):
            The epsilon paramenter in Adam, which is a small constant for numerical stability.
        amsgrad (:obj:`bool`, `optional`, default to `False`):
-            Wheter to apply AMSGrad varient of this algorithm or not, see
+            Whether to apply AMSGrad varient of this algorithm or not, see
            `On the Convergence of Adam and Beyond <https://arxiv.org/abs/1904.09237>`__.
        weight_decay_rate (:obj:`float`, `optional`, defaults to 0):
            The weight decay to apply.

--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1766,7 +1766,7 @@ class QuestionAnsweringPipeline(Pipeline):
    def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int) -> Tuple:
        """
-        Take the output of any :obj:`ModelForQuestionAnswering` and will generate probalities for each span to be
+        Take the output of any :obj:`ModelForQuestionAnswering` and will generate probabilities for each span to be
        the actual answer.
        In addition, it filters out some unwanted/impossible cases like answer len being greater than
@@ -1807,7 +1807,7 @@ class QuestionAnsweringPipeline(Pipeline):
    def span_to_answer(self, text: str, start: int, end: int) -> Dict[str, Union[str, int]]:
        """
-        When decoding from token probalities, this method maps token indexes to actual word in
+        When decoding from token probabilities, this method maps token indexes to actual word in
        the initial context.
        Args:

--- a/src/transformers/tokenization_utils.py
+++ b/src/transformers/tokenization_utils.py
@@ -682,7 +682,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
            token_ids_1 (:obj:`List[int]`, `optional`):
                List of ids of the second sequence.
            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Wheter or not the token list is already formated with special tokens for the model.
+                Whether or not the token list is already formated with special tokens for the model.
        Returns:
            A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
@@ -815,7 +815,7 @@ class PreTrainedTokenizer(PreTrainedTokenizerBase):
            you want to reload it using the :meth:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
        Args:
-            save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
+            save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
        Returns:
            A tuple of :obj:`str`: The files saved.

--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -15,7 +15,7 @@
 """ Base classes common to both the slow and the fast tokenization classes:
    PreTrainedTokenizerBase (host all the user fronting encoding methodes)
    Special token mixing (host the special tokens logic) and
-    BatchEncoding (wrap the dictionnary of output with special method for the Fast tokenizers)
+    BatchEncoding (wrap the dictionary of output with special method for the Fast tokenizers)
 """
 import copy
@@ -249,7 +249,7 @@ class BatchEncoding(UserDict):
    def tokens(self, batch_index: int = 0) -> List[str]:
        """
-        Return the list of tokens (sub-parts of the input strings after word/subword splitting and before converstion
+        Return the list of tokens (sub-parts of the input strings after word/subword splitting and before conversion
        to integer indices) at a given batch index (only works for the output of a fast tokenizer).
        Args:
@@ -1121,7 +1121,7 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
            return_overflowing_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to return overflowing token sequences.
            return_special_tokens_mask (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Wheter or not to return special tokens mask information.
+                Whether or not to return special tokens mask information.
            return_offsets_mapping (:obj:`bool`, `optional`, defaults to :obj:`False`):
                Whether or not to return :obj:`(char_start, char_end)` for each token.
@@ -1153,13 +1153,13 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
            - **num_truncated_tokens** -- Number of tokens truncated (when a :obj:`max_length` is specified and
              :obj:`return_overflowing_tokens=True`).
            - **special_tokens_mask** -- List of 0s and 1s, with 0 specifying added special tokens and 1 specifying
-              regual sequence tokens (when :obj:`add_special_tokens=True` and :obj:`return_special_tokens_mask=True`).
+              regular sequence tokens (when :obj:`add_special_tokens=True` and :obj:`return_special_tokens_mask=True`).
            - **length** -- The length of the inputs (when :obj:`return_length=True`)
 """
 INIT_TOKENIZER_DOCSTRING = r"""
    Class attributes (overridden by derived classes)
-        - **vocab_files_names** (:obj:`Dict[str, str]`) -- A ditionary with, as keys, the ``__init__`` keyword name of
+        - **vocab_files_names** (:obj:`Dict[str, str]`) -- A dictionary with, as keys, the ``__init__`` keyword name of
          each vocabulary file required by the model, and as associated values, the filename for saving the associated
          file (string).
        - **pretrained_vocab_files_map** (:obj:`Dict[str, Dict[str, str]]`) -- A dictionary of dictionaries, with the
@@ -1170,7 +1170,7 @@ INIT_TOKENIZER_DOCSTRING = r"""
          :obj:`short-cut-names` of the pretrained models, and as associated values, the maximum length of the sequence
          inputs of this model, or :obj:`None` if the model has no maximum input size.
        - **pretrained_init_configuration** (:obj:`Dict[str, Dict[str, Any]]`) -- A dictionary with, as keys, the
-          :obj:`short-cut-names` of the pretrained models, and as associated values, a dictionnary of specific
+          :obj:`short-cut-names` of the pretrained models, and as associated values, a dictionary of specific
          arguments to pass to the ``__init__`` method of the tokenizer class for this pretrained model when loading the
          tokenizer with the :meth:`~transformers.tokenization_utils_base.PreTrainedTokenizerBase.from_pretrained`
          method.
@@ -1688,7 +1688,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
           modifying :obj:`tokenizer.do_lower_case` after creation).
        Args:
-            save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
+            save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
        Returns:
            A tuple of :obj:`str`: The files saved.
@@ -2383,7 +2383,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
        batch_size = len(encoded_inputs["input_ids"])
        assert all(
            len(v) == batch_size for v in encoded_inputs.values()
-        ), "Some items in the output dictionnary have a different batch size than others."
+        ), "Some items in the output dictionary have a different batch size than others."
        if padding_strategy == PaddingStrategy.LONGEST:
            max_length = max(len(inputs) for inputs in encoded_inputs["input_ids"])
@@ -2547,7 +2547,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
            sequence = ids + pair_ids if pair else ids
            token_type_ids = [0] * len(ids) + ([0] * len(pair_ids) if pair else [])
-        # Build output dictionnary
+        # Build output dictionary
        encoded_inputs["input_ids"] = sequence
        if return_token_type_ids:
            encoded_inputs["token_type_ids"] = token_type_ids
@@ -2819,7 +2819,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
            token_ids_1 (:obj:`List[int]`, `optional`):
                List of ids of the second sequence.
            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Wheter or not the token list is already formated with special tokens for the model.
+                Whether or not the token list is already formated with special tokens for the model.
        Returns:
            A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.

--- a/src/transformers/tokenization_utils_fast.py
+++ b/src/transformers/tokenization_utils_fast.py
@@ -552,7 +552,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
            you want to reload it using the :meth:`~transformers.PreTrainedTokenizerFast.from_pretrained` class method.
        Args:
-            save_directory (:obj:`str`): The path to adirectory where the tokenizer will be saved.
+            save_directory (:obj:`str`): The path to a directory where the tokenizer will be saved.
        Returns:
            A tuple of :obj:`str`: The files saved.

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -895,7 +895,7 @@ class Trainer:
                - the documentation of `tune.run <https://docs.ray.io/en/latest/tune/api_docs/execution.html#tune-run>`__
        Returns:
-            :class:`transformers.trainer_utils.BestRun`: All the informations about the best run.
+            :class:`transformers.trainer_utils.BestRun`: All the information about the best run.
        """
        if backend is None:
            backend = default_hp_search_backend()