Fix some typos. (#17560)

* Fix some typos. Signed-off-by: Yulv-git <yulvchi@qq.com> * Fix typo. Signed-off-by: Yulv-git <yulvchi@qq.com> * make fixup.

Fix some typos. (#17560)
* Fix some typos. Signed-off-by: Yulv-git <yulvchi@qq.com> * Fix typo. Signed-off-by: Yulv-git <yulvchi@qq.com> * make fixup.
95113d13 · Yulv-git · GitHub · ad28ca29 · 95113d13 · 95113d13
Unverified Commit 95113d13 authored Jul 11, 2022 by Yulv-git Committed by GitHub Jul 11, 2022
14 changed files
--- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py
+++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py
@@ -231,7 +231,7 @@ def _compute_mask_indices(
    )
    spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
-    # add offset to the starting indexes so that that indexes now create a span
+    # add offset to the starting indexes so that indexes now create a span
    offsets = np.arange(mask_length)[None, None, :]
    offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
        batch_size, max_num_masked_span * mask_length

--- a/src/transformers/models/wavlm/configuration_wavlm.py
+++ b/src/transformers/models/wavlm/configuration_wavlm.py
@@ -82,10 +82,10 @@ class WavLMConfig(PretrainedConfig):
            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
        conv_stride (`Tuple[int]` or `List[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
            A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
-            of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
+            of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
        conv_kernel (`Tuple[int]` or `List[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
            A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
-            length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
+            length of *conv_kernel* defines the number of convolutional layers and has to match the length of
            *conv_dim*.
        conv_bias (`bool`, *optional*, defaults to `False`):
            Whether the 1D convolutional layers have a bias.

--- a/src/transformers/models/wavlm/modeling_wavlm.py
+++ b/src/transformers/models/wavlm/modeling_wavlm.py
@@ -183,7 +183,7 @@ def _compute_mask_indices(
    )
    spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(batch_size, max_num_masked_span * mask_length)
-    # add offset to the starting indexes so that that indexes now create a span
+    # add offset to the starting indexes so that indexes now create a span
    offsets = np.arange(mask_length)[None, None, :]
    offsets = np.broadcast_to(offsets, (batch_size, max_num_masked_span, mask_length)).reshape(
        batch_size, max_num_masked_span * mask_length

--- a/src/transformers/models/yolos/modeling_yolos.py
+++ b/src/transformers/models/yolos/modeling_yolos.py
@@ -1069,7 +1069,7 @@ class YolosLoss(nn.Module):
        # Retrieve the matching between the outputs of the last layer and the targets
        indices = self.matcher(outputs_without_aux, targets)
-        # Compute the average number of target boxes accross all nodes, for normalization purposes
+        # Compute the average number of target boxes across all nodes, for normalization purposes
        num_boxes = sum(len(t["class_labels"]) for t in targets)
        num_boxes = torch.as_tensor([num_boxes], dtype=torch.float, device=next(iter(outputs.values())).device)
        # (Niels): comment out function below, distributed training to be added

--- a/src/transformers/onnx/config.py
+++ b/src/transformers/onnx/config.py
@@ -487,7 +487,7 @@ class OnnxConfigWithPast(OnnxConfig, ABC):
    def fill_with_past_key_values_(self, inputs_or_outputs: Mapping[str, Mapping[int, str]], direction: str):
        """
-        Fill the input_or_ouputs mapping with past_key_values dynamic axes considering.
+        Fill the input_or_outputs mapping with past_key_values dynamic axes considering.
        Args:
            inputs_or_outputs: The mapping to fill.

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -412,8 +412,8 @@ class TrainingArguments:
            down the training and evaluation speed.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether or not to push the model to the Hub every time the model is saved. If this is activated,
-            `output_dir` will begin a git directory synced with the the repo (determined by `hub_model_id`) and the
+            `output_dir` will begin a git directory synced with the repo (determined by `hub_model_id`) and the content
-            content will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
+            will be pushed each time a save is triggered (depending on your `save_strategy`). Calling
            [`~Trainer.save_model`] will also trigger a push.
            <Tip warning={true}>
@@ -434,7 +434,7 @@ class TrainingArguments:
            `"organization_name/model"`. Will default to `user_name/output_dir_name` with *output_dir_name* being the
            name of `output_dir`.
-            Will default to to the name of `output_dir`.
+            Will default to the name of `output_dir`.
        hub_strategy (`str` or [`~trainer_utils.HubStrategy`], *optional*, defaults to `"every_save"`):
            Defines the scope of what is pushed to the Hub and when. Possible values are:

--- a/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
+++ b/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
@@ -990,7 +990,7 @@ tokenizer.
 For [camelcase name of model], the tokenizer files can be found here:
 - [To be filled out by mentor]
-and having implemented the  🤗Transformers' version of the tokenizer can be loaded as follows:
+and having implemented the 🤗 Transformers' version of the tokenizer can be loaded as follows:
 [To be filled out by mentor]

--- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
+++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
@@ -2821,7 +2821,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
        self.model = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="model")
        self.model._set_save_spec(inputs=self.serving.input_signature)
        self.use_cache = config.use_cache
-        # final_bias_logits is registered as a buffer in pytorch, so not trainable for the the sake of consistency.
+        # final_bias_logits is registered as a buffer in pytorch, so not trainable for the sake of consistency.
        self.final_logits_bias = self.add_weight(
            name="final_logits_bias", shape=[1, config.vocab_size], initializer="zeros", trainable=False
        )

--- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
@@ -2183,7 +2183,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
                total_length = len(sequence["input_ids"])
-                self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
+                self.assertGreater(
+                    total_length, 4, "Issue with the testing sequence, please update it, it's too short"
+                )
                # Test with max model input length
                model_max_length = tokenizer.model_max_length
@@ -2193,7 +2195,9 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
                total_length1 = len(sequence1["input_ids"])
                self.assertGreater(
-                    total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
+                    total_length1,
+                    model_max_length,
+                    "Issue with the testing sequence, please update it, it's too short",
                )
                # Simple

--- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
@@ -2097,7 +2097,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                sequence = tokenizer(seq_0, boxes=boxes_0, add_special_tokens=False)
                total_length = len(sequence["input_ids"])
-                self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
+                self.assertGreater(
+                    total_length, 4, "Issue with the testing sequence, please update it, it's too short"
+                )
                # Test with max model input length
                model_max_length = tokenizer.model_max_length
@@ -2107,7 +2109,9 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                sequence1 = tokenizer(seq_1, boxes=boxes_1, add_special_tokens=False)
                total_length1 = len(sequence1["input_ids"])
                self.assertGreater(
-                    total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
+                    total_length1,
+                    model_max_length,
+                    "Issue with the testing sequence, please update it, it's too short",
                )
                # Simple

--- a/tests/models/vit_mae/test_modeling_tf_vit_mae.py
+++ b/tests/models/vit_mae/test_modeling_tf_vit_mae.py
@@ -281,7 +281,7 @@ class TFViTMAEModelTest(TFModelTesterMixin, unittest.TestCase):
        super().check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)
    # overwrite from common since TFViTMAEForPretraining outputs loss along with
-    # logits and mask indices. loss and mask indicies are not suitable for integration
+    # logits and mask indices. loss and mask indices are not suitable for integration
    # with other keras modules.
    def test_compile_tf_model(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -278,7 +278,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
        NER_MODEL = "dbmdz/bert-large-cased-finetuned-conll03-english"
        model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
        tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, use_fast=True)
-        sentence = """Enzo works at the the UN"""
+        sentence = """Enzo works at the UN"""
        token_classifier = pipeline("ner", model=model, tokenizer=tokenizer)
        output = token_classifier(sentence)
        self.assertEqual(

--- a/tests/test_tokenization_common.py
+++ b/tests/test_tokenization_common.py
@@ -990,7 +990,9 @@ class TokenizerTesterMixin:
                sequence = tokenizer.encode(seq_0, add_special_tokens=False)
                total_length = len(sequence)
-                self.assertGreater(total_length, 4, "Issue with the testing sequence, please update it it's too short")
+                self.assertGreater(
+                    total_length, 4, "Issue with the testing sequence, please update it, it's too short"
+                )
                # Test with max model input length
                model_max_length = tokenizer.model_max_length
@@ -1000,7 +1002,9 @@ class TokenizerTesterMixin:
                sequence1 = tokenizer(seq_1, add_special_tokens=False)
                total_length1 = len(sequence1["input_ids"])
                self.assertGreater(
-                    total_length1, model_max_length, "Issue with the testing sequence, please update it it's too short"
+                    total_length1,
+                    model_max_length,
+                    "Issue with the testing sequence, please update it, it's too short",
                )
                # Simple

--- a/utils/check_table.py
+++ b/utils/check_table.py
@@ -53,7 +53,7 @@ def _find_text_in_file(filename, start_prompt, end_prompt):
    return "".join(lines[start_index:end_index]), start_index, end_index, lines
-# Add here suffixes that are used to identify models, seperated by |
+# Add here suffixes that are used to identify models, separated by |
 ALLOWED_MODEL_SUFFIXES = "Model|Encoder|Decoder|ForConditionalGeneration"
 # Regexes that match TF/Flax/PT model names.
 _re_tf_models = re.compile(r"TF(.*)(?:Model|Encoder|Decoder|ForConditionalGeneration)")