Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings

Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
40ea9ab2 · Tom Aarsen · GitHub · 3bc65505 · 40ea9ab2 · 40ea9ab2
Unverified Commit 40ea9ab2 authored Oct 12, 2023 by Tom Aarsen Committed by GitHub Oct 12, 2023
20 changed files
--- a/examples/research_projects/xtreme-s/run_xtreme_s.py
+++ b/examples/research_projects/xtreme-s/run_xtreme_s.py
@@ -116,8 +116,8 @@ class ModelArguments:
        default=0.05,
        metadata={
            "help": (
-                "Probability of each feature vector along the time axis to be chosen as the start of the vector"
-                "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
+                "Probability of each feature vector along the time axis to be chosen as the start of the vector "
+                "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature "
                "vectors will be masked along the time axis."
            )
        },
@@ -455,7 +455,7 @@ def main():

    # Log on each process the small summary:
    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):

--- a/examples/research_projects/zero-shot-distillation/distill_classifier.py
+++ b/examples/research_projects/zero-shot-distillation/distill_classifier.py
@@ -41,7 +41,7 @@ class TeacherModelArguments:
        default="This example is {}.",
        metadata={
            "help": (
-                "Template used to turn class names into mock hypotheses for teacher NLI model. Must include {{}}"
+                "Template used to turn class names into mock hypotheses for teacher NLI model. Must include {{}} "
                "where class name is inserted."
            )
        },
@@ -53,7 +53,7 @@ class TeacherModelArguments:
        default=False,
        metadata={
            "help": (
-                "Allow multiple classes to be true rather than forcing them to sum to 1 (sometimes called"
+                "Allow multiple classes to be true rather than forcing them to sum to 1 (sometimes called "
                "multi-class multi-label classification)."
            )
        },
@@ -98,7 +98,7 @@ class DistillTrainingArguments(TrainingArguments):
        default=True,
        metadata={
            "help": (
-                "Whether to evaluate the agreement of the final student predictions and the teacher predictions"
+                "Whether to evaluate the agreement of the final student predictions and the teacher predictions "
                "after training."
            )
        },
@@ -107,7 +107,7 @@ class DistillTrainingArguments(TrainingArguments):
        default=0,
        metadata={
            "help": (
-                "Limit the total amount of checkpoints."
+                "Limit the total amount of checkpoints. "
                "Deletes the older checkpoints in the output_dir. Default is 0 (no checkpoints)."
            )
        },

--- a/examples/tensorflow/contrastive-image-text/run_clip.py
+++ b/examples/tensorflow/contrastive-image-text/run_clip.py
@@ -113,7 +113,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -387,7 +387,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )


--- a/examples/tensorflow/image-classification/run_image_classification.py
+++ b/examples/tensorflow/image-classification/run_image_classification.py
@@ -178,7 +178,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },

--- a/examples/tensorflow/language-modeling/run_clm.py
+++ b/examples/tensorflow/language-modeling/run_clm.py
@@ -133,7 +133,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -399,7 +399,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )
    # endregion
@@ -432,7 +432,7 @@ def main():
    else:
        if data_args.block_size > tokenizer.model_max_length:
            logger.warning(
-                f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
+                f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
                f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
            )
        block_size = min(data_args.block_size, tokenizer.model_max_length)

--- a/examples/tensorflow/language-modeling/run_mlm.py
+++ b/examples/tensorflow/language-modeling/run_mlm.py
@@ -131,7 +131,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -383,7 +383,7 @@ def main():
        )
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )
    # endregion
@@ -404,7 +404,7 @@ def main():
    else:
        if data_args.max_seq_length > tokenizer.model_max_length:
            logger.warning(
-                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
                f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
            )
        max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/tensorflow/multiple-choice/run_swag.py
+++ b/examples/tensorflow/multiple-choice/run_swag.py
@@ -167,7 +167,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -383,7 +383,7 @@ def main():
    else:
        if data_args.max_seq_length > tokenizer.model_max_length:
            logger.warning(
-                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
                f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
            )
        max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/tensorflow/question-answering/run_qa.py
+++ b/examples/tensorflow/question-answering/run_qa.py
@@ -98,7 +98,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -400,7 +400,7 @@ def main():

    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/tensorflow/summarization/run_summarization.py
+++ b/examples/tensorflow/summarization/run_summarization.py
@@ -120,7 +120,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -194,7 +194,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The maximum total sequence length for validation target text after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
                "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
                "during ``evaluate`` and ``predict``."
            )

--- a/examples/tensorflow/text-classification/run_glue.py
+++ b/examples/tensorflow/text-classification/run_glue.py
@@ -185,7 +185,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -353,7 +353,7 @@ def main():

    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/tensorflow/text-classification/run_text_classification.py
+++ b/examples/tensorflow/text-classification/run_text_classification.py
@@ -101,7 +101,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "Whether to pad all samples to `max_seq_length`. "
-                "If False, will pad the samples dynamically when batching to the maximum length in the batch."
+                "If False, will pad the samples dynamically when batching to the maximum length in the batch. "
                "Data will always be padded when using TPUs."
            )
        },
@@ -191,7 +191,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -360,7 +360,7 @@ def main():

    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/examples/tensorflow/token-classification/run_ner.py
+++ b/examples/tensorflow/token-classification/run_ner.py
@@ -96,7 +96,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -334,7 +334,7 @@ def main():
    tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
    if not tokenizer_name_or_path:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )


--- a/examples/tensorflow/translation/run_translation.py
+++ b/examples/tensorflow/translation/run_translation.py
@@ -114,7 +114,7 @@ class ModelArguments:
        metadata={
            "help": (
                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -182,7 +182,7 @@ class DataTrainingArguments:
        metadata={
            "help": (
                "The maximum total sequence length for validation target text after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+                "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
                "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
                "during ``evaluate`` and ``predict``."
            )

--- a/src/transformers/commands/serving.py
+++ b/src/transformers/commands/serving.py
@@ -131,7 +131,7 @@ class ServeCommand(BaseTransformersCLICommand):
        if not _serve_dependencies_installed:
            raise RuntimeError(
                "Using serve command requires FastAPI and uvicorn. "
-                'Please install transformers with [serving]: pip install "transformers[serving]".'
+                'Please install transformers with [serving]: pip install "transformers[serving]". '
                "Or install FastAPI and uvicorn separately."
            )
        else:

--- a/src/transformers/dynamic_module_utils.py
+++ b/src/transformers/dynamic_module_utils.py
@@ -575,7 +575,7 @@ def custom_object_save(obj: Any, folder: Union[str, os.PathLike], config: Option

 def _raise_timeout_error(signum, frame):
    raise ValueError(
-        "Loading this model requires you to execute custom code contained in the model repository on your local"
+        "Loading this model requires you to execute custom code contained in the model repository on your local "
        "machine. Please set the option `trust_remote_code=True` to permit loading of this model."
    )

@@ -593,7 +593,7 @@ def resolve_trust_remote_code(trust_remote_code, model_name, has_local_code, has
                signal.alarm(TIME_OUT_REMOTE_CODE)
                while trust_remote_code is None:
                    answer = input(
-                        f"The repository for {model_name} contains custom code which must be executed to correctly"
+                        f"The repository for {model_name} contains custom code which must be executed to correctly "
                        f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
                        f"You can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n\n"
                        f"Do you wish to run the custom code? [y/N] "
@@ -606,7 +606,7 @@ def resolve_trust_remote_code(trust_remote_code, model_name, has_local_code, has
            except Exception:
                # OS which does not support signal.SIGALRM
                raise ValueError(
-                    f"The repository for {model_name} contains custom code which must be executed to correctly"
+                    f"The repository for {model_name} contains custom code which must be executed to correctly "
                    f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
                    f"Please pass the argument `trust_remote_code=True` to allow custom code to be run."
                )

--- a/src/transformers/generation/tf_utils.py
+++ b/src/transformers/generation/tf_utils.py
@@ -1198,7 +1198,7 @@ class TFGenerationMixin:
        inputs_kwarg = model_kwargs.pop(input_name, None)
        if inputs_kwarg is not None and inputs is not None:
            raise ValueError(
-                f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed."
+                f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. "
                f"Make sure to either pass {inputs} or {input_name}=..."
            )
        elif inputs_kwarg is not None:

--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -544,7 +544,7 @@ class GenerationMixin:
        inputs_kwarg = model_kwargs.pop(input_name, None)
        if inputs_kwarg is not None and inputs is not None:
            raise ValueError(
-                f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed."
+                f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. "
                f"Make sure to either pass {inputs} or {input_name}=..."
            )
        elif inputs_kwarg is not None:
@@ -1816,7 +1816,7 @@ class GenerationMixin:

                def typeerror():
                    raise ValueError(
-                        "`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]`"
+                        "`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]` "
                        f"of positive integers, but is {generation_config.force_words_ids}."
                    )


--- a/src/transformers/integrations/integration_utils.py
+++ b/src/transformers/integrations/integration_utils.py
@@ -1312,7 +1312,7 @@ class NeptuneCallback(TrainerCallback):
                target_path = consistent_checkpoint_path
            except IOError as e:
                logger.warning(
-                    "NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'."
+                    "NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'. "
                    "Could fail trying to upload.".format(e)
                )


--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -850,7 +850,7 @@ def load_tf_shard(model, model_layer_map, resolved_archive_file, ignore_mismatch
            raise OSError(
                f"Unable to load weights from TF checkpoint file for '{resolved_archive_file}' "
                f"at '{resolved_archive_file}'. "
-                "If you tried to load a TF model from a sharded checkpoint, you should try converting the model"
+                "If you tried to load a TF model from a sharded checkpoint, you should try converting the model "
                "by loading it in pytorch and saving it localy. A convertion script should be realeased soon."
            )


--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -2635,8 +2635,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
                else:
                    raise RuntimeError("No GPU found. A GPU is needed for quantization.")
                logger.info(
-                    "The device_map was not initialized."
-                    "Setting device_map to {'':torch.cuda.current_device()}."
+                    "The device_map was not initialized. "
+                    "Setting device_map to {'':torch.cuda.current_device()}. "
                    "If you want to use the model for inference, please set device_map ='auto' "
                )
                if low_cpu_mem_usage is None:
@@ -2762,8 +2762,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
                    else:
                        raise RuntimeError("No GPU found. A GPU is needed for quantization.")
                    logger.info(
-                        "The device_map was not initialized."
-                        "Setting device_map to {'':torch.cuda.current_device()}."
+                        "The device_map was not initialized. "
+                        "Setting device_map to {'':torch.cuda.current_device()}. "
                        "If you want to use the model for inference, please set device_map ='auto' "
                    )
                    if low_cpu_mem_usage is None:
@@ -3149,7 +3149,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix

        if load_in_8bit and torch_dtype is None:
            logger.warning(
-                "You are loading your model in 8bit but you did not specify a `torch_dtype` attribute."
+                "You are loading your model in 8bit but you did not specify a `torch_dtype` attribute. "
                "All non-linear modules will be loaded in full precision."
                " If you want to load the other modules in other precision, please specify a `torch_dtype` attribute."
            )
@@ -3193,8 +3193,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
                else:
                    raise ValueError(
                        "You are using `device_map='auto'` on a 4bit loaded version of the model. To automatically compute"
-                        " the appropriate device map, you should upgrade your `accelerate` library,"
-                        "`pip install --upgrade accelerate` or install it from source to support fp4 auto device map"
+                        " the appropriate device map, you should upgrade your `accelerate` library, "
+                        "`pip install --upgrade accelerate` or install it from source to support fp4 auto device map "
                        "calculation. You may encounter unexpected behavior, or pass your own device map"
                    )
            elif load_in_8bit:
@@ -3202,7 +3202,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix

            if model._no_split_modules is None:
                raise ValueError(
-                    f"{model.__class__.__name__} does not support `device_map='{device_map}'`. To implement support, the model"
+                    f"{model.__class__.__name__} does not support `device_map='{device_map}'`. To implement support, the model "
                    "class needs to implement the `_no_split_modules` attribute."
                )
            no_split_modules = model._no_split_modules