[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts

[Docs] Fix spelling and grammar mistakes (#28825)
* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts
721ee783 · Klaus Hipp · GitHub · 2418c64a · 721ee783 · 721ee783
Unverified Commit 721ee783 authored Feb 02, 2024 by Klaus Hipp Committed by GitHub Feb 02, 2024
14 changed files
--- a/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py
+++ b/src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py
@@ -66,7 +66,7 @@ class Wav2Vec2BertConfig(PretrainedConfig):
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        feat_proj_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout probabilitiy for the feature projection.
+            The dropout probability for the feature projection.
        final_dropout (`float`, *optional*, defaults to 0.1):
            The dropout probability for the final projection layer of [`Wav2Vec2BertForCTC`].
        layerdrop (`float`, *optional*, defaults to 0.1):

--- a/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py
+++ b/src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py
@@ -84,7 +84,7 @@ class Wav2Vec2ConformerConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the 1D convolutional layers of the feature
            extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
        feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout probabilitiy for quantized feature encoder states.
+            The dropout probability for quantized feature encoder states.
        conv_dim (`Tuple[int]` or `List[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
            A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
@@ -138,7 +138,7 @@ class Wav2Vec2ConformerConfig(PretrainedConfig):
        contrastive_logits_temperature (`float`, *optional*, defaults to 0.1):
            The temperature *kappa* in the contrastive loss.
        feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout probabilitiy for the output of the feature encoder that's used by the quantizer.
+            The dropout probability for the output of the feature encoder that's used by the quantizer.
        num_negatives (`int`, *optional*, defaults to 100):
            Number of negative samples for the contrastive loss.
        codevector_dim (`int`, *optional*, defaults to 256):

--- a/src/transformers/models/yolos/configuration_yolos.py
+++ b/src/transformers/models/yolos/configuration_yolos.py
@@ -55,7 +55,7 @@ class YolosConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` are supported.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        initializer_range (`float`, *optional*, defaults to 0.02):

--- a/src/transformers/models/yoso/configuration_yoso.py
+++ b/src/transformers/models/yoso/configuration_yoso.py
@@ -53,7 +53,7 @@ class YosoConfig(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` are supported.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (`int`, *optional*, defaults to 512):

--- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
+++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
@@ -580,7 +580,7 @@ def parse_args():
        default=128,
        help=(
            "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
-            " sequences shorter will be padded if `--pad_to_max_lengh` is passed."
+            " sequences shorter will be padded if `--pad_to_max_length` is passed."
        ),
    )
    parser.add_argument(

--- a/templates/adding_a_new_model/README.md
+++ b/templates/adding_a_new_model/README.md
@@ -217,7 +217,7 @@ Next the questionnaire will ask
 Should we add # Copied from statements when creating the new modeling file?
 ```
-This is the intenal mechanism used in the library to make sure code copied from various modeling files stay consistent.
+This is the internal mechanism used in the library to make sure code copied from various modeling files stay consistent.
 If you plan to completely rewrite the modeling file, you should answer no, whereas if you just want to tweak one part
 of the model, you should answer yes.

--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration_{{cookiecutter.lowercase_modelname}}.py
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/configuration_{{cookiecutter.lowercase_modelname}}.py
@@ -56,7 +56,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
            The non-linear activation function (function or string) in the encoder and pooler.
            If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
-            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
            The dropout ratio for the attention probabilities.
        max_position_embeddings (`int`, *optional*, defaults to 512):

--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/to_replace_{{cookiecutter.lowercase_modelname}}.py
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/to_replace_{{cookiecutter.lowercase_modelname}}.py
@@ -17,7 +17,7 @@
 ##
 ## It is to be used as such:
 ## Put '# To replace in: "FILE_PATH"' in order to indicate the contents will be copied in the file at path FILE_PATH
-## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurence** of that line in the file at FILE_PATH
+## Put '# Below: "STATEMENT"' in order to copy the contents below **the first occurrence** of that line in the file at FILE_PATH
 ## Put '# Replace with:' followed by the lines containing the content to define the content
 ## End a statement with '# End.'. If starting a new statement without redefining the FILE_PATH, it will continue pasting
 ## content in that file.

--- a/tests/models/byt5/test_tokenization_byt5.py
+++ b/tests/models/byt5/test_tokenization_byt5.py
@@ -166,7 +166,7 @@ class ByT5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertEqual(expected_src_tokens, batch["input_ids"][0])
        self.assertEqual(expected_tgt_tokens, batch["labels"][0])
-    # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
+    # cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
    def test_save_and_load_tokenizer(self):
        # safety check on max_len default value so we are sure the test works
        tokenizers = self.get_tokenizers()

--- a/tests/models/canine/test_tokenization_canine.py
+++ b/tests/models/canine/test_tokenization_canine.py
@@ -82,7 +82,7 @@ class CanineTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        self.assertEqual(32, targets["input_ids"].shape[1])
-    # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
+    # cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
    def test_save_and_load_tokenizer(self):
        # safety check on max_len default value so we are sure the test works
        tokenizers = self.get_tokenizers()

--- a/tests/models/code_llama/test_tokenization_code_llama.py
+++ b/tests/models/code_llama/test_tokenization_code_llama.py
@@ -367,10 +367,10 @@ class LlamaIntegrationTest(unittest.TestCase):
        fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
        assert fast == [319, 4559, 1243, 2]
-        slow_tokenzier = CodeLlamaTokenizer.from_pretrained(
+        slow_tokenizer = CodeLlamaTokenizer.from_pretrained(
            "hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
        )
-        slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
+        slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
        assert slow == [319, 4559, 1243, 2]
        self.tokenizer.add_eos_token = False

--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@@ -360,10 +360,10 @@ class LlamaIntegrationTest(unittest.TestCase):
        fast = fast_tokenizer.encode("A sample test", add_special_tokens=True)
        assert fast == [319, 4559, 1243, 2]
-        slow_tokenzier = LlamaTokenizer.from_pretrained(
+        slow_tokenizer = LlamaTokenizer.from_pretrained(
            "hf-internal-testing/llama-tokenizer", add_eos_token=True, add_bos_token=False
        )
-        slow = slow_tokenzier.encode("A sample test", add_special_tokens=True)
+        slow = slow_tokenizer.encode("A sample test", add_special_tokens=True)
        assert slow == [319, 4559, 1243, 2]
        self.tokenizer.add_eos_token = False

--- a/tests/models/perceiver/test_tokenization_perceiver.py
+++ b/tests/models/perceiver/test_tokenization_perceiver.py
@@ -148,7 +148,7 @@ class PerceiverTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        self.assertEqual(32, targets["input_ids"].shape[1])
-    # cannot use default save_and_load_tokenzier test method because tokenzier has no vocab
+    # cannot use default save_and_load_tokenizer test method because tokenizer has no vocab
    def test_save_and_load_tokenizer(self):
        # safety check on max_len default value so we are sure the test works
        tokenizers = self.get_tokenizers()

--- a/tests/models/qwen2/test_tokenization_qwen2.py
+++ b/tests/models/qwen2/test_tokenization_qwen2.py
@@ -158,7 +158,7 @@ class Qwen2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            self.assertEqual(tokenizer_output_string, output_string)
    def test_slow_tokenizer_decode_spaces_between_special_tokens_default(self):
-        # Qwen2Tokenzier changes the default `spaces_between_special_tokens` in `decode` to False
+        # Qwen2Tokenizer changes the default `spaces_between_special_tokens` in `decode` to False
        if not self.test_slow_tokenizer:
            return