Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings

Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
40ea9ab2 · Tom Aarsen · GitHub · 3bc65505 · 40ea9ab2 · 40ea9ab2
Unverified Commit 40ea9ab2 authored Oct 12, 2023 by Tom Aarsen Committed by GitHub Oct 12, 2023
14 changed files
--- a/src/transformers/models/wavlm/modeling_wavlm.py
+++ b/src/transformers/models/wavlm/modeling_wavlm.py
@@ -1146,7 +1146,7 @@ class WavLMModel(WavLMPreTrainedModel):
        not be updated during training.
        """
        warnings.warn(
-            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
+            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
            "Please use the equivalent `freeze_feature_encoder` method instead.",
            FutureWarning,
        )
@@ -1322,7 +1322,7 @@ class WavLMForCTC(WavLMPreTrainedModel):
        not be updated during training.
        """
        warnings.warn(
-            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
+            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
            "Please use the equivalent `freeze_feature_encoder` method instead.",
            FutureWarning,
        )
@@ -1455,7 +1455,7 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
        not be updated during training.
        """
        warnings.warn(
-            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
+            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
            "Please use the equivalent `freeze_feature_encoder` method instead.",
            FutureWarning,
        )
@@ -1578,7 +1578,7 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
        not be updated during training.
        """
        warnings.warn(
-            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
+            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
            "Please use the equivalent `freeze_feature_encoder` method instead.",
            FutureWarning,
        )
@@ -1745,7 +1745,7 @@ class WavLMForXVector(WavLMPreTrainedModel):
        not be updated during training.
        """
        warnings.warn(
-            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
+            "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
            "Please use the equivalent `freeze_feature_encoder` method instead.",
            FutureWarning,
        )

--- a/src/transformers/models/whisper/modeling_tf_whisper.py
+++ b/src/transformers/models/whisper/modeling_tf_whisper.py
@@ -1452,8 +1452,8 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
        if return_timestamps is not None:
            if not hasattr(generation_config, "no_timestamps_token_id"):
                raise ValueError(
-                    "You are trying to return timestamps, but the generation config is not properly set."
-                    "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`."
+                    "You are trying to return timestamps, but the generation config is not properly set. "
+                    "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
                    "For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
                )


--- a/src/transformers/models/whisper/modeling_whisper.py
+++ b/src/transformers/models/whisper/modeling_whisper.py
@@ -1652,8 +1652,8 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
        if return_timestamps is not None:
            if not hasattr(generation_config, "no_timestamps_token_id"):
                raise ValueError(
-                    "You are trying to return timestamps, but the generation config is not properly set."
-                    "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`."
+                    "You are trying to return timestamps, but the generation config is not properly set. "
+                    "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
                    "For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
                )

@@ -1664,7 +1664,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
        if language is not None:
            if not hasattr(generation_config, "lang_to_id"):
                raise ValueError(
-                    "The generation config is outdated and is thus not compatible with the `language` argument"
+                    "The generation config is outdated and is thus not compatible with the `language` argument "
                    "to `generate`. Either set the language using the `forced_decoder_ids` in the model config, "
                    "or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
                )
@@ -1673,7 +1673,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
        if task is not None:
            if not hasattr(generation_config, "task_to_id"):
                raise ValueError(
-                    "The generation config is outdated and is thus not compatible with the `task` argument"
+                    "The generation config is outdated and is thus not compatible with the `task` argument "
                    "to `generate`. Either set the task using the `forced_decoder_ids` in the model config, "
                    "or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
                )

--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -1137,7 +1137,7 @@ class YolosImageProcessor(BaseImageProcessor):
        if annotations is not None:
            if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
                raise ValueError(
-                    "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts"
+                    "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
                    "(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
                    "being a list of annotations in the COCO format."
                )

--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -766,7 +766,7 @@ def pipeline(
    if task is None and model is not None:
        if not isinstance(model, str):
            raise RuntimeError(
-                "Inferring the task automatically requires to check the hub with a model_id defined as a `str`."
+                "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
                f"{model} is not a valid model_id."
            )
        task = get_task(model, use_auth_token)

--- a/src/transformers/pipelines/automatic_speech_recognition.py
+++ b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -403,7 +403,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
                raise ValueError("CTC with LM can only predict word level timestamps, set `return_timestamps='word'`")
            if self.type == "ctc" and return_timestamps not in ["char", "word"]:
                raise ValueError(
-                    "CTC can either predict character level timestamps, or word level timestamps."
+                    "CTC can either predict character level timestamps, or word level timestamps. "
                    "Set `return_timestamps='char'` or `return_timestamps='word'` as required."
                )
            if self.type == "seq2seq_whisper" and return_timestamps == "char":

--- a/src/transformers/tools/image_segmentation.py
+++ b/src/transformers/tools/image_segmentation.py
@@ -28,7 +28,7 @@ if is_vision_available():

 class ImageSegmentationTool(PipelineTool):
    description = (
-        "This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image."
+        "This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image. "
        "It takes two arguments named `image` which should be the original image, and `label` which should be a text "
        "describing the elements what should be identified in the segmentation mask. The tool returns the mask."
    )

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -508,7 +508,7 @@ class Trainer:
            self.optimizer is not None or self.lr_scheduler is not None
        ):
            raise RuntimeError(
-                "Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled."
+                "Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled. "
                "You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method."
            )
        default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to)
@@ -563,8 +563,8 @@ class Trainer:
                # When there's mismatch between SMP config and trainer argument, use SMP config as truth
                if args.fp16 != smp.state.cfg.fp16:
                    logger.warning(
-                        f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16},"
-                        f"but FP16 provided in trainer argument is {args.fp16},"
+                        f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16}, "
+                        f"but FP16 provided in trainer argument is {args.fp16}, "
                        f"setting to {smp.state.cfg.fp16}"
                    )
                    args.fp16 = smp.state.cfg.fp16

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -756,7 +756,7 @@ class TrainingArguments:
        default=500,
        metadata={
            "help": (
-                "Log every X updates steps. Should be an integer or a float in range `[0,1)`."
+                "Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
                "If smaller than 1, will be interpreted as ratio of total training steps."
            )
        },
@@ -770,7 +770,7 @@ class TrainingArguments:
        default=500,
        metadata={
            "help": (
-                "Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`."
+                "Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`. "
                "If smaller than 1, will be interpreted as ratio of total training steps."
            )
        },
@@ -923,7 +923,7 @@ class TrainingArguments:
        default=None,
        metadata={
            "help": (
-                "Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`."
+                "Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`. "
                "If smaller than 1, will be interpreted as ratio of total training steps."
            )
        },
@@ -1006,7 +1006,7 @@ class TrainingArguments:
        default=None,
        metadata={
            "help": (
-                "Config to be used with FSDP (Pytorch Fully Sharded  Data Parallel). The value is either a"
+                "Config to be used with FSDP (Pytorch Fully Sharded  Data Parallel). The value is either a "
                "fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`."
            )
        },
@@ -1207,7 +1207,7 @@ class TrainingArguments:
    dispatch_batches: Optional[bool] = field(
        default=None,
        metadata={
-            "help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process"
+            "help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process "
            "and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose"
            "underlying dataset is an `IterableDataset`, `False` otherwise."
        },
@@ -1297,7 +1297,7 @@ class TrainingArguments:
                    if not (self.eval_steps < 1 and self.save_steps < 1):
                        raise ValueError(
                            "--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
-                            "steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps"
+                            "steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps "
                            f"{self.save_steps} and eval_steps {self.eval_steps}."
                        )
                    # Work around floating point precision issues
@@ -1807,7 +1807,7 @@ class TrainingArguments:
        elif self.distributed_state.distributed_type == DistributedType.NO:
            if self.use_mps_device:
                warnings.warn(
-                    "`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers."
+                    "`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. "
                    "`mps` device will be used by default if available similar to the way `cuda` device is used."
                    "Therefore, no action from user is required. "
                )

--- a/src/transformers/utils/doc.py
+++ b/src/transformers/utils/doc.py
@@ -124,7 +124,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
        params_docstring = _convert_output_args_doc(params_docstring)
    else:
        raise ValueError(
-            f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has"
+            f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has "
            "docstring and contain either `Args` or `Parameters`."
        )


--- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
+++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py
@@ -67,7 +67,7 @@ class ModelArguments:
    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
-            "help": "The model checkpoint for weights initialization."
+            "help": "The model checkpoint for weights initialization. "
            "Don't set if you want to train a model from scratch."
        },
    )
@@ -128,8 +128,8 @@ class ModelArguments:
        default=False,
        metadata={
            "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
-                "should only be set to `True` for repositories you trust and in which you have read the code, as it will"
+                "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
+                "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
                "execute code present on the Hub on your local machine."
            )
        },
@@ -325,7 +325,7 @@ def main():
        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

@@ -756,7 +756,7 @@ def main():
        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, use_fast=not args.use_slow_tokenizer)
    else:
        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )


--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -1172,7 +1172,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        # CTC models must specify return_timestamps type - cannot set `return_timestamps=True` blindly
        with self.assertRaisesRegex(
            ValueError,
-            "^CTC can either predict character level timestamps, or word level timestamps."
+            "^CTC can either predict character level timestamps, or word level timestamps. "
            "Set `return_timestamps='char'` or `return_timestamps='word'` as required.$",
        ):
            _ = speech_recognizer(audio, return_timestamps=True)

--- a/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
+++ b/tests/sagemaker/scripts/pytorch/run_glue_model_parallelism.py
@@ -375,7 +375,7 @@ def main():

    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

--- a/utils/create_dummy_models.py
+++ b/utils/create_dummy_models.py
@@ -1509,7 +1509,7 @@ if __name__ == "__main__":
        "--models_to_skip",
        type=list_str,
        help=(
-            "Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually"
+            "Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually "
            "the list of model classes that have their tiny versions already uploaded to the Hub."
        ),
    )