[Docs] Fix spelling and grammar mistakes (#28825)

* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts

[Docs] Fix spelling and grammar mistakes (#28825)
* Fix typos and grammar mistakes in docs and examples * Fix typos in docstrings and comments * Fix spelling of `tokenizer` in model tests * Remove erroneous spaces in decorators * Remove extra spaces in Markdown link texts
721ee783 · Klaus Hipp · GitHub · 2418c64a · 721ee783 · 721ee783
Unverified Commit 721ee783 authored Feb 02, 2024 by Klaus Hipp Committed by GitHub Feb 02, 2024
20 changed files
--- a/examples/legacy/seq2seq/run_distributed_eval.py
+++ b/examples/legacy/seq2seq/run_distributed_eval.py
@@ -154,7 +154,7 @@ def run_generate():
    parser.add_argument("--src_lang", type=str, default=None, required=False)
    parser.add_argument("--tgt_lang", type=str, default=None, required=False)
    parser.add_argument(
-        "--prefix", type=str, required=False, default=None, help="will be added to the begininng of src examples"
+        "--prefix", type=str, required=False, default=None, help="will be added to the beginning of src examples"
    )
    parser.add_argument("--fp16", action="store_true")
    parser.add_argument("--debug", action="store_true")

--- a/examples/legacy/seq2seq/run_eval.py
+++ b/examples/legacy/seq2seq/run_eval.py
@@ -107,7 +107,7 @@ def run_generate(verbose=True):
    parser.add_argument("--score_path", type=str, required=False, default="metrics.json", help="where to save metrics")
    parser.add_argument("--device", type=str, required=False, default=DEFAULT_DEVICE, help="cuda, cuda:1, cpu etc.")
    parser.add_argument(
-        "--prefix", type=str, required=False, default=None, help="will be added to the begininng of src examples"
+        "--prefix", type=str, required=False, default=None, help="will be added to the beginning of src examples"
    )
    parser.add_argument("--task", type=str, default="summarization", help="used for task_specific_params + metrics")
    parser.add_argument("--bs", type=int, default=8, required=False, help="batch size")

--- a/examples/legacy/seq2seq/seq2seq_trainer.py
+++ b/examples/legacy/seq2seq/seq2seq_trainer.py
@@ -65,7 +65,7 @@ class Seq2SeqTrainer(Trainer):

        if self.args.label_smoothing != 0 or (self.data_args is not None and self.data_args.ignore_pad_token_for_loss):
            assert self.config.pad_token_id is not None, (
-                "Make sure that `config.pad_token_id` is correcly defined when ignoring `pad_token` for loss"
+                "Make sure that `config.pad_token_id` is correctly defined when ignoring `pad_token` for loss"
                " calculation or doing label smoothing."
            )


--- a/examples/legacy/seq2seq/seq2seq_training_args.py
+++ b/examples/legacy/seq2seq/seq2seq_training_args.py
@@ -31,7 +31,7 @@ class Seq2SeqTrainingArguments(TrainingArguments):
        label_smoothing (:obj:`float`, `optional`, defaults to 0):
            The label smoothing epsilon to apply (if not zero).
        sortish_sampler (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether to SortishSamler or not. It sorts the inputs according to lengths in-order to minimizing the padding size.
+            Whether to SortishSampler or not. It sorts the inputs according to lengths in-order to minimizing the padding size.
        predict_with_generate (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether to use generate to calculate generative metrics (ROUGE, BLEU).
    """
@@ -39,7 +39,7 @@ class Seq2SeqTrainingArguments(TrainingArguments):
    label_smoothing: Optional[float] = field(
        default=0.0, metadata={"help": "The label smoothing epsilon to apply (if not zero)."}
    )
-    sortish_sampler: bool = field(default=False, metadata={"help": "Whether to SortishSamler or not."})
+    sortish_sampler: bool = field(default=False, metadata={"help": "Whether to SortishSampler or not."})
    predict_with_generate: bool = field(
        default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."}
    )

--- a/examples/pytorch/contrastive-image-text/run_clip.py
+++ b/examples/pytorch/contrastive-image-text/run_clip.py
@@ -289,7 +289,7 @@ def main():
    )
    logger.info(f"Training/evaluation parameters {training_args}")

-    # 3. Detecting last checkpoint and eventualy continue from last checkpoint
+    # 3. Detecting last checkpoint and eventually continue from last checkpoint
    last_checkpoint = None
    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
@@ -528,7 +528,7 @@ def main():
        # Transform images on the fly as doing it on the whole dataset takes too much time.
        test_dataset.set_transform(transform_images)

-    # 8. Initalize our trainer
+    # 8. Initialize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,

--- a/examples/pytorch/image-classification/README.md
+++ b/examples/pytorch/image-classification/README.md
@@ -114,10 +114,10 @@ from datasets import load_dataset
 # example 1: local folder
 dataset = load_dataset("imagefolder", data_dir="path_to_your_folder")

-# example 2: local files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
+# example 2: local files (supported formats are tar, gzip, zip, xz, rar, zstd)
 dataset = load_dataset("imagefolder", data_files="path_to_zip_file")

-# example 3: remote files (suppoted formats are tar, gzip, zip, xz, rar, zstd)
+# example 3: remote files (supported formats are tar, gzip, zip, xz, rar, zstd)
 dataset = load_dataset("imagefolder", data_files="https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip")

 # example 4: providing several splits

--- a/examples/pytorch/image-classification/run_image_classification.py
+++ b/examples/pytorch/image-classification/run_image_classification.py
@@ -404,7 +404,7 @@ def main():
        # Set the validation transforms
        dataset["validation"].set_transform(val_transforms)

-    # Initalize our trainer
+    # Initialize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,

--- a/examples/pytorch/image-pretraining/README.md
+++ b/examples/pytorch/image-pretraining/README.md
@@ -25,7 +25,7 @@ NOTE: If you encounter problems/have suggestions for improvement, open an issue

 ## SimMIM

-The `run_mim.py` script can be used to pre-train any Transformer-based vision model in the library (concretly, any model supported by the `AutoModelForMaskedImageModeling` API) for masked image modeling as proposed in [SimMIM: A Simple Framework for Masked Image Modeling](https://arxiv.org/abs/2111.09886) using PyTorch.
+The `run_mim.py` script can be used to pre-train any Transformer-based vision model in the library (concretely, any model supported by the `AutoModelForMaskedImageModeling` API) for masked image modeling as proposed in [SimMIM: A Simple Framework for Masked Image Modeling](https://arxiv.org/abs/2111.09886) using PyTorch.

 <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/simmim_architecture.jpg"
 alt="drawing" width="300"/> 

--- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py
+++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py
@@ -90,7 +90,7 @@ def parse_args():
        default=128,
        help=(
            "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
-            " sequences shorter will be padded if `--pad_to_max_lengh` is passed."
+            " sequences shorter will be padded if `--pad_to_max_length` is passed."
        ),
    )
    parser.add_argument(

--- a/examples/pytorch/question-answering/run_qa.py
+++ b/examples/pytorch/question-answering/run_qa.py
@@ -378,7 +378,7 @@ def main():
        )

    # Preprocessing the datasets.
-    # Preprocessing is slighlty different for training and evaluation.
+    # Preprocessing is slightly different for training and evaluation.
    if training_args.do_train:
        column_names = raw_datasets["train"].column_names
    elif training_args.do_eval:

--- a/examples/pytorch/question-answering/run_qa_beam_search.py
+++ b/examples/pytorch/question-answering/run_qa_beam_search.py
@@ -354,7 +354,7 @@ def main():
    )

    # Preprocessing the datasets.
-    # Preprocessing is slighlty different for training and evaluation.
+    # Preprocessing is slightly different for training and evaluation.
    if training_args.do_train:
        column_names = raw_datasets["train"].column_names
    elif training_args.do_eval:

--- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
+++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
@@ -119,7 +119,7 @@ def parse_args():
        default=384,
        help=(
            "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
-            " sequences shorter will be padded if `--pad_to_max_lengh` is passed."
+            " sequences shorter will be padded if `--pad_to_max_length` is passed."
        ),
    )
    parser.add_argument(
@@ -385,7 +385,7 @@ def main():
    )

    # Preprocessing the datasets.
-    # Preprocessing is slighlty different for training and evaluation.
+    # Preprocessing is slightly different for training and evaluation.
    column_names = raw_datasets["train"].column_names

    question_column_name = "question" if "question" in column_names else column_names[0]
@@ -508,7 +508,7 @@ def main():
        raise ValueError("--do_train requires a train dataset")
    train_dataset = raw_datasets["train"]
    if args.max_train_samples is not None:
-        # We will select sample from whole data if agument is specified
+        # We will select sample from whole data if argument is specified
        train_dataset = train_dataset.select(range(args.max_train_samples))
    # Create train feature from dataset
    with accelerator.main_process_first():
@@ -877,7 +877,7 @@ def main():
                    commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
                )

-    # intialize all lists to collect the batches
+    # initialize all lists to collect the batches
    all_start_top_log_probs = []
    all_start_top_index = []
    all_end_top_log_probs = []
@@ -936,7 +936,7 @@ def main():
    logger.info(f"Evaluation metrics: {eval_metric}")

    if args.do_predict:
-        # intialize all lists to collect the batches
+        # initialize all lists to collect the batches

        all_start_top_log_probs = []
        all_start_top_index = []

--- a/examples/pytorch/question-answering/run_qa_no_trainer.py
+++ b/examples/pytorch/question-answering/run_qa_no_trainer.py
@@ -123,7 +123,7 @@ def parse_args():
        default=384,
        help=(
            "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
-            " sequences shorter will be padded if `--pad_to_max_lengh` is passed."
+            " sequences shorter will be padded if `--pad_to_max_length` is passed."
        ),
    )
    parser.add_argument(
@@ -460,7 +460,7 @@ def main():
        model = AutoModelForQuestionAnswering.from_config(config, trust_remote_code=args.trust_remote_code)

    # Preprocessing the datasets.
-    # Preprocessing is slighlty different for training and evaluation.
+    # Preprocessing is slightly different for training and evaluation.

    column_names = raw_datasets["train"].column_names

@@ -561,7 +561,7 @@ def main():
        raise ValueError("--do_train requires a train dataset")
    train_dataset = raw_datasets["train"]
    if args.max_train_samples is not None:
-        # We will select sample from whole data if agument is specified
+        # We will select sample from whole data if argument is specified
        train_dataset = train_dataset.select(range(args.max_train_samples))

    # Create train feature from dataset

--- a/examples/pytorch/question-answering/run_seq2seq_qa.py
+++ b/examples/pytorch/question-answering/run_seq2seq_qa.py
@@ -559,7 +559,7 @@ def main():
            raise ValueError("--do_train requires a train dataset")
        train_dataset = raw_datasets["train"]
        if data_args.max_train_samples is not None:
-            # We will select sample from whole data if agument is specified
+            # We will select sample from whole data if argument is specified
            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
            train_dataset = train_dataset.select(range(max_train_samples))
        # Create train feature from dataset

--- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
+++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py
@@ -503,7 +503,7 @@ def main():
        # Set the validation transforms
        dataset["validation"].set_transform(preprocess_val)

-    # Initalize our trainer
+    # Initialize our trainer
    trainer = Trainer(
        model=model,
        args=training_args,

--- a/examples/pytorch/speech-recognition/README.md
+++ b/examples/pytorch/speech-recognition/README.md
@@ -446,7 +446,7 @@ A very common use case is to leverage a pretrained speech encoder model,

 By pairing a pretrained speech model with a pretrained text model, the warm-started model has prior knowledge of both the source audio and target text domains. However, the cross-attention weights between the encoder and decoder are randomly initialised. Thus, the model requires fine-tuning to learn the cross-attention weights and align the encoder mapping with that of the decoder. We can perform this very fine-tuning procedure using the example script.

-As an example, let's instantiate a *Wav2Vec2-2-Bart* model with the `SpeechEnocderDecoderModel` framework. First create an empty repo on `hf.co`:
+As an example, let's instantiate a *Wav2Vec2-2-Bart* model with the `SpeechEncoderDecoderModel` framework. First create an empty repo on `hf.co`:

 ```bash
 huggingface-cli repo create wav2vec2-2-bart-base
@@ -506,7 +506,7 @@ Having warm-started the speech-encoder-decoder model under `<your-user-name>/wav
 In the script [`run_speech_recognition_seq2seq`], we load the warm-started model, 
 feature extractor, and tokenizer, process a speech recognition dataset, 
 and subsequently make use of the [`Seq2SeqTrainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Seq2SeqTrainer) to train our system.
-Note that it is important to align the target transcriptions with the decoder's vocabulary. For example, the [`Librispeech`](https://huggingface.co/datasets/librispeech_asr) dataset only contains captilized letters in the transcriptions,
+Note that it is important to align the target transcriptions with the decoder's vocabulary. For example, the [`Librispeech`](https://huggingface.co/datasets/librispeech_asr) dataset only contains capitalized letters in the transcriptions,
 whereas BART was pretrained mostly on normalized text. Thus, it is recommended to add the argument 
 `--do_lower_case` to the fine-tuning script when using a warm-started `SpeechEncoderDecoderModel`. 
 The model is fine-tuned on the standard cross-entropy language modeling

--- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
+++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py
@@ -146,7 +146,7 @@ class DataTrainingArguments:
                " should be trained on in ISO 693-3 code, e.g. `tur` for Turkish"
                " Wav2Vec2's MMS ISO codes can be looked up here: https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html"
                " If you are not training the adapter layers on a language, simply choose"
-                " another accronym that fits your data."
+                " another acronym that fits your data."
            )
        },
    )

--- a/examples/pytorch/text-classification/README.md
+++ b/examples/pytorch/text-classification/README.md
@@ -129,7 +129,7 @@ python run_classification.py \
    --num_train_epochs 15 \
    --output_dir /tmp/${dataset}_${subset}/ 
 ```
- It results in a Micro F1 score of around 0.82 without any text and label filtering. Note that you have to explictly remove the "unused" split from the dataset, since it is not used for classification.
+ It results in a Micro F1 score of around 0.82 without any text and label filtering. Note that you have to explicitly remove the "unused" split from the dataset, since it is not used for classification.

 ### Mixed precision training


--- a/examples/pytorch/text-classification/run_classification.py
+++ b/examples/pytorch/text-classification/run_classification.py
@@ -83,7 +83,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The name of the text column in the input dataset or a CSV/JSON file. "
-                'If not specified, will use the "sentence" column for single/multi-label classifcation task.'
+                'If not specified, will use the "sentence" column for single/multi-label classification task.'
            )
        },
    )
@@ -121,7 +121,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The name of the label column in the input dataset or a CSV/JSON file. "
-                'If not specified, will use the "label" column for single/multi-label classifcation task'
+                'If not specified, will use the "label" column for single/multi-label classification task'
            )
        },
    )
@@ -260,7 +260,7 @@ class ModelArguments:


 def get_label_list(raw_dataset, split="train") -> List[str]:
-    """Get the list of labels from a mutli-label dataset"""
+    """Get the list of labels from a multi-label dataset"""

    if isinstance(raw_dataset[split]["label"][0], list):
        label_list = [label for sample in raw_dataset[split]["label"] for label in sample]
@@ -343,7 +343,7 @@ def main():

    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files, or specify a dataset name
    # to load from huggingface/datasets. In ether case, you can specify a the key of the column(s) containing the text and
-    # the key of the column containing the label. If multiple columns are specified for the text, they will be joined togather
+    # the key of the column containing the label. If multiple columns are specified for the text, they will be joined together
    # for the actual text value.
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.

--- a/examples/pytorch/text-generation/README.md
+++ b/examples/pytorch/text-generation/README.md
@@ -18,7 +18,7 @@ limitations under the License.

 Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).

-Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPTJ, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
+Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, GPT-J, Transformer-XL, XLNet, CTRL, BLOOM, LLAMA, OPT.
 A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
 can try out the different models available in the library.