Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings

Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
40ea9ab2 · Tom Aarsen · GitHub · 3bc65505 · 40ea9ab2 · 40ea9ab2
Unverified Commit 40ea9ab2 authored Oct 12, 2023 by Tom Aarsen Committed by GitHub Oct 12, 2023
20 changed files
--- a/examples/pytorch/summarization/run_summarization.py
+++ b/examples/pytorch/summarization/run_summarization.py
@@ -120,7 +120,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -205,7 +205,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The maximum total sequence length for validation target text after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
                "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
                "during ``evaluate`` and ``predict``."
            )
@@ -271,7 +271,7 @@ class DataTrainingArguments:
        default=None,
        metadata={
            "help": (
-                "The token to force as the first generated token after the decoder_start_token_id."
+                "The token to force as the first generated token after the decoder_start_token_id. "
                "Useful for multilingual models like mBART where the first generated token"
                "needs to be the target language token (Usually it is the target language token)"
            )
@@ -556,7 +556,7 @@ def main():
    if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
        logger.warning(
-            "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
+            "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
            f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
        )

--- a/examples/pytorch/summarization/run_summarization_no_trainer.py
+++ b/examples/pytorch/summarization/run_summarization_no_trainer.py
@@ -146,7 +146,7 @@ def parse_args():
        default=128,
        help=(
            "The maximum total sequence length for target text after "
-            "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
+            "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. "
            "during ``evaluate`` and ``predict``."
        ),
    )
@@ -272,7 +272,7 @@ def parse_args():
        default=False,
        help=(
            "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-            "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+            "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
            "execute code present on the Hub on your local machine."
        ),
    )
@@ -299,7 +299,7 @@ def parse_args():
        default="all",
        help=(
            'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
-            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
+            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
            "Only applicable when `--with_tracking` is passed."
        ),
    )
@@ -433,7 +433,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/pytorch/text-classification/run_classification.py
+++ b/examples/pytorch/text-classification/run_classification.py
@@ -82,7 +82,7 @@ class DataTrainingArguments:
        default=None,
        metadata={
            "help": (
-                "The name of the text column in the input dataset or a CSV/JSON file."
+                "The name of the text column in the input dataset or a CSV/JSON file. "
                'If not specified, will use the "sentence" column for single/multi-label classifcation task.'
            )
        },
@@ -120,7 +120,7 @@ class DataTrainingArguments:
        default=None,
        metadata={
            "help": (
-                "The name of the label column in the input dataset or a CSV/JSON file."
+                "The name of the label column in the input dataset or a CSV/JSON file. "
                'If not specified, will use the "label" column for single/multi-label classifcation task'
            )
        },
@@ -248,7 +248,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -552,7 +552,7 @@ def main():
    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/pytorch/text-classification/run_glue.py
+++ b/examples/pytorch/text-classification/run_glue.py
@@ -209,7 +209,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -456,7 +456,7 @@ def main():
    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/pytorch/text-classification/run_glue_no_trainer.py
+++ b/examples/pytorch/text-classification/run_glue_no_trainer.py
@@ -162,7 +162,7 @@ def parse_args():
        default=False,
        help=(
            "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-            "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+            "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
            "execute code present on the Hub on your local machine."
        ),
    )
@@ -189,7 +189,7 @@ def parse_args():
        default="all",
        help=(
            'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
-            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
+            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
            "Only applicable when `--with_tracking` is passed."
        ),
    )

--- a/examples/pytorch/text-classification/run_xnli.py
+++ b/examples/pytorch/text-classification/run_xnli.py
@@ -173,7 +173,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },

--- a/examples/pytorch/token-classification/run_ner.py
+++ b/examples/pytorch/token-classification/run_ner.py
@@ -100,7 +100,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },

--- a/examples/pytorch/token-classification/run_ner_no_trainer.py
+++ b/examples/pytorch/token-classification/run_ner_no_trainer.py
@@ -216,7 +216,7 @@ def parse_args():
        default=False,
        help=(
            "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-            "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+            "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
            "execute code present on the Hub on your local machine."
        ),
    )
@@ -243,7 +243,7 @@ def parse_args():
        default="all",
        help=(
            'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
-            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
+            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
            "Only applicable when `--with_tracking` is passed."
        ),
    )
@@ -412,7 +412,7 @@ def main():
    tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path
    if not tokenizer_name_or_path:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/pytorch/translation/run_translation.py
+++ b/examples/pytorch/translation/run_translation.py
@@ -110,7 +110,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -173,7 +173,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The maximum total sequence length for validation target text after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
                "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
                "during ``evaluate`` and ``predict``."
            )
@@ -468,7 +468,7 @@ def main():
    if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
        logger.warning(
-            "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
+            "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
            f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
        )

--- a/examples/pytorch/translation/run_translation_no_trainer.py
+++ b/examples/pytorch/translation/run_translation_no_trainer.py
@@ -118,7 +118,7 @@ def parse_args():
        default=128,
        help=(
            "The maximum total sequence length for target text after "
-            "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
+            "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded "
            "during ``evaluate`` and ``predict``."
        ),
    )
@@ -139,7 +139,7 @@ def parse_args():
        default=False,
        help=(
            "Whether to pad all samples to model maximum sentence "
-            "length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More"
+            "length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
            "efficient on GPU but very bad for TPU."
        ),
    )
@@ -263,7 +263,7 @@ def parse_args():
        default=False,
        help=(
            "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-            "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+            "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
            "execute code present on the Hub on your local machine."
        ),
    )
@@ -290,7 +290,7 @@ def parse_args():
        default="all",
        help=(
            'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
-            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
+            ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
            "Only applicable when `--with_tracking` is passed."
        ),
    )
@@ -413,7 +413,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
+++ b/examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
@@ -575,7 +575,7 @@ def main():
        type=str,
        default="O1",
        help=(
-            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
            "See details at https://nvidia.github.io/apex/amp.html"
        ),
    )

--- a/examples/research_projects/deebert/run_glue_deebert.py
+++ b/examples/research_projects/deebert/run_glue_deebert.py
@@ -532,7 +532,7 @@ def main():
        type=str,
        default="O1",
        help=(
-            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
            "See details at https://nvidia.github.io/apex/amp.html"
        ),
    )

--- a/examples/research_projects/distillation/run_squad_w_distillation.py
+++ b/examples/research_projects/distillation/run_squad_w_distillation.py
@@ -696,7 +696,7 @@ def main():
        type=str,
        default="O1",
        help=(
-            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
            "See details at https://nvidia.github.io/apex/amp.html"
        ),
    )

--- a/examples/research_projects/distillation/train.py
+++ b/examples/research_projects/distillation/train.py
@@ -208,7 +208,7 @@ def main():
        type=str,
        default="O1",
        help=(
-            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
+            "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
            "See details at https://nvidia.github.io/apex/amp.html"
        ),
    )

--- a/examples/research_projects/information-gain-filtration/run_clm_igf.py
+++ b/examples/research_projects/information-gain-filtration/run_clm_igf.py
@@ -354,7 +354,7 @@ def main():
        default=10,
        type=int,
        help=(
-            "decay the selectivity of our secondary learner filter from"
+            "decay the selectivity of our secondary learner filter from "
            "1 standard deviation above average to 1 below average after 10 batches"
        ),
    )

--- a/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
+++ b/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
@@ -341,7 +341,7 @@ if __name__ == "__main__":
        and not training_args.overwrite_output_dir
    ):
        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty."
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
@@ -399,7 +399,7 @@ if __name__ == "__main__":
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
+++ b/examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
@@ -78,7 +78,7 @@ class ModelArguments:
    text_model_name_or_path: str = field(
        metadata={
            "help": (
-                "The text model checkpoint for weights initialization."
+                "The text model checkpoint for weights initialization. "
                "Don't set if you want to train a model from scratch."
            )
        },
@@ -86,7 +86,7 @@ class ModelArguments:
    vision_model_name_or_path: str = field(
        metadata={
            "help": (
-                "The vision model checkpoint for weights initialization."
+                "The vision model checkpoint for weights initialization. "
                "Don't set if you want to train a model from scratch."
            )
        },
@@ -311,7 +311,7 @@ def main():
        and not training_args.overwrite_output_dir
    ):
        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty."
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
@@ -341,7 +341,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
+++ b/examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
@@ -246,7 +246,7 @@ def main():
        and not training_args.overwrite_output_dir
    ):
        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty."
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
@@ -325,7 +325,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )
@@ -368,7 +368,7 @@ def main():
    else:
        if data_args.block_size > tokenizer.model_max_length:
            logger.warning(
-                f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
+                f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
                f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
            )
        block_size = min(data_args.block_size, tokenizer.model_max_length)

--- a/examples/research_projects/luke/run_luke_ner_no_trainer.py
+++ b/examples/research_projects/luke/run_luke_ner_no_trainer.py
@@ -357,7 +357,7 @@ def main():
    tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path
    if not tokenizer_name_or_path:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

--- a/examples/research_projects/mlm_wwm/run_mlm_wwm.py
+++ b/examples/research_projects/mlm_wwm/run_mlm_wwm.py
@@ -314,7 +314,7 @@ def main():
        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )