Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
...@@ -1146,7 +1146,7 @@ class WavLMModel(WavLMPreTrainedModel): ...@@ -1146,7 +1146,7 @@ class WavLMModel(WavLMPreTrainedModel):
not be updated during training. not be updated during training.
""" """
warnings.warn( warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5." "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.", "Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning, FutureWarning,
) )
...@@ -1322,7 +1322,7 @@ class WavLMForCTC(WavLMPreTrainedModel): ...@@ -1322,7 +1322,7 @@ class WavLMForCTC(WavLMPreTrainedModel):
not be updated during training. not be updated during training.
""" """
warnings.warn( warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5." "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.", "Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning, FutureWarning,
) )
...@@ -1455,7 +1455,7 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel): ...@@ -1455,7 +1455,7 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
not be updated during training. not be updated during training.
""" """
warnings.warn( warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5." "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.", "Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning, FutureWarning,
) )
...@@ -1578,7 +1578,7 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel): ...@@ -1578,7 +1578,7 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
not be updated during training. not be updated during training.
""" """
warnings.warn( warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5." "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.", "Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning, FutureWarning,
) )
...@@ -1745,7 +1745,7 @@ class WavLMForXVector(WavLMPreTrainedModel): ...@@ -1745,7 +1745,7 @@ class WavLMForXVector(WavLMPreTrainedModel):
not be updated during training. not be updated during training.
""" """
warnings.warn( warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5." "The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.", "Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning, FutureWarning,
) )
......
...@@ -1452,8 +1452,8 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua ...@@ -1452,8 +1452,8 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
if return_timestamps is not None: if return_timestamps is not None:
if not hasattr(generation_config, "no_timestamps_token_id"): if not hasattr(generation_config, "no_timestamps_token_id"):
raise ValueError( raise ValueError(
"You are trying to return timestamps, but the generation config is not properly set." "You are trying to return timestamps, but the generation config is not properly set. "
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`." "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
"For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363" "For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
) )
......
...@@ -1652,8 +1652,8 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel): ...@@ -1652,8 +1652,8 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if return_timestamps is not None: if return_timestamps is not None:
if not hasattr(generation_config, "no_timestamps_token_id"): if not hasattr(generation_config, "no_timestamps_token_id"):
raise ValueError( raise ValueError(
"You are trying to return timestamps, but the generation config is not properly set." "You are trying to return timestamps, but the generation config is not properly set. "
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`." "Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
"For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363" "For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
) )
...@@ -1664,7 +1664,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel): ...@@ -1664,7 +1664,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if language is not None: if language is not None:
if not hasattr(generation_config, "lang_to_id"): if not hasattr(generation_config, "lang_to_id"):
raise ValueError( raise ValueError(
"The generation config is outdated and is thus not compatible with the `language` argument" "The generation config is outdated and is thus not compatible with the `language` argument "
"to `generate`. Either set the language using the `forced_decoder_ids` in the model config, " "to `generate`. Either set the language using the `forced_decoder_ids` in the model config, "
"or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224" "or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
) )
...@@ -1673,7 +1673,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel): ...@@ -1673,7 +1673,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if task is not None: if task is not None:
if not hasattr(generation_config, "task_to_id"): if not hasattr(generation_config, "task_to_id"):
raise ValueError( raise ValueError(
"The generation config is outdated and is thus not compatible with the `task` argument" "The generation config is outdated and is thus not compatible with the `task` argument "
"to `generate`. Either set the task using the `forced_decoder_ids` in the model config, " "to `generate`. Either set the task using the `forced_decoder_ids` in the model config, "
"or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224" "or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
) )
......
...@@ -1137,7 +1137,7 @@ class YolosImageProcessor(BaseImageProcessor): ...@@ -1137,7 +1137,7 @@ class YolosImageProcessor(BaseImageProcessor):
if annotations is not None: if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations): if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError( raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts" "Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter " "(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format." "being a list of annotations in the COCO format."
) )
......
...@@ -766,7 +766,7 @@ def pipeline( ...@@ -766,7 +766,7 @@ def pipeline(
if task is None and model is not None: if task is None and model is not None:
if not isinstance(model, str): if not isinstance(model, str):
raise RuntimeError( raise RuntimeError(
"Inferring the task automatically requires to check the hub with a model_id defined as a `str`." "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
f"{model} is not a valid model_id." f"{model} is not a valid model_id."
) )
task = get_task(model, use_auth_token) task = get_task(model, use_auth_token)
......
...@@ -403,7 +403,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline): ...@@ -403,7 +403,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
raise ValueError("CTC with LM can only predict word level timestamps, set `return_timestamps='word'`") raise ValueError("CTC with LM can only predict word level timestamps, set `return_timestamps='word'`")
if self.type == "ctc" and return_timestamps not in ["char", "word"]: if self.type == "ctc" and return_timestamps not in ["char", "word"]:
raise ValueError( raise ValueError(
"CTC can either predict character level timestamps, or word level timestamps." "CTC can either predict character level timestamps, or word level timestamps. "
"Set `return_timestamps='char'` or `return_timestamps='word'` as required." "Set `return_timestamps='char'` or `return_timestamps='word'` as required."
) )
if self.type == "seq2seq_whisper" and return_timestamps == "char": if self.type == "seq2seq_whisper" and return_timestamps == "char":
......
...@@ -28,7 +28,7 @@ if is_vision_available(): ...@@ -28,7 +28,7 @@ if is_vision_available():
class ImageSegmentationTool(PipelineTool): class ImageSegmentationTool(PipelineTool):
description = ( description = (
"This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image." "This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image. "
"It takes two arguments named `image` which should be the original image, and `label` which should be a text " "It takes two arguments named `image` which should be the original image, and `label` which should be a text "
"describing the elements what should be identified in the segmentation mask. The tool returns the mask." "describing the elements what should be identified in the segmentation mask. The tool returns the mask."
) )
......
...@@ -508,7 +508,7 @@ class Trainer: ...@@ -508,7 +508,7 @@ class Trainer:
self.optimizer is not None or self.lr_scheduler is not None self.optimizer is not None or self.lr_scheduler is not None
): ):
raise RuntimeError( raise RuntimeError(
"Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled." "Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled. "
"You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method." "You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method."
) )
default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to) default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to)
...@@ -563,8 +563,8 @@ class Trainer: ...@@ -563,8 +563,8 @@ class Trainer:
# When there's mismatch between SMP config and trainer argument, use SMP config as truth # When there's mismatch between SMP config and trainer argument, use SMP config as truth
if args.fp16 != smp.state.cfg.fp16: if args.fp16 != smp.state.cfg.fp16:
logger.warning( logger.warning(
f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16}," f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16}, "
f"but FP16 provided in trainer argument is {args.fp16}," f"but FP16 provided in trainer argument is {args.fp16}, "
f"setting to {smp.state.cfg.fp16}" f"setting to {smp.state.cfg.fp16}"
) )
args.fp16 = smp.state.cfg.fp16 args.fp16 = smp.state.cfg.fp16
......
...@@ -756,7 +756,7 @@ class TrainingArguments: ...@@ -756,7 +756,7 @@ class TrainingArguments:
default=500, default=500,
metadata={ metadata={
"help": ( "help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`." "Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps." "If smaller than 1, will be interpreted as ratio of total training steps."
) )
}, },
...@@ -770,7 +770,7 @@ class TrainingArguments: ...@@ -770,7 +770,7 @@ class TrainingArguments:
default=500, default=500,
metadata={ metadata={
"help": ( "help": (
"Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`." "Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps." "If smaller than 1, will be interpreted as ratio of total training steps."
) )
}, },
...@@ -923,7 +923,7 @@ class TrainingArguments: ...@@ -923,7 +923,7 @@ class TrainingArguments:
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
"Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`." "Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps." "If smaller than 1, will be interpreted as ratio of total training steps."
) )
}, },
...@@ -1006,7 +1006,7 @@ class TrainingArguments: ...@@ -1006,7 +1006,7 @@ class TrainingArguments:
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
"Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a" "Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a "
"fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`." "fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`."
) )
}, },
...@@ -1207,7 +1207,7 @@ class TrainingArguments: ...@@ -1207,7 +1207,7 @@ class TrainingArguments:
dispatch_batches: Optional[bool] = field( dispatch_batches: Optional[bool] = field(
default=None, default=None,
metadata={ metadata={
"help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process" "help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process "
"and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose" "and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose"
"underlying dataset is an `IterableDataset`, `False` otherwise." "underlying dataset is an `IterableDataset`, `False` otherwise."
}, },
...@@ -1297,7 +1297,7 @@ class TrainingArguments: ...@@ -1297,7 +1297,7 @@ class TrainingArguments:
if not (self.eval_steps < 1 and self.save_steps < 1): if not (self.eval_steps < 1 and self.save_steps < 1):
raise ValueError( raise ValueError(
"--load_best_model_at_end requires the saving steps to be a multiple of the evaluation " "--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
"steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps" "steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps "
f"{self.save_steps} and eval_steps {self.eval_steps}." f"{self.save_steps} and eval_steps {self.eval_steps}."
) )
# Work around floating point precision issues # Work around floating point precision issues
...@@ -1807,7 +1807,7 @@ class TrainingArguments: ...@@ -1807,7 +1807,7 @@ class TrainingArguments:
elif self.distributed_state.distributed_type == DistributedType.NO: elif self.distributed_state.distributed_type == DistributedType.NO:
if self.use_mps_device: if self.use_mps_device:
warnings.warn( warnings.warn(
"`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers." "`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. "
"`mps` device will be used by default if available similar to the way `cuda` device is used." "`mps` device will be used by default if available similar to the way `cuda` device is used."
"Therefore, no action from user is required. " "Therefore, no action from user is required. "
) )
......
...@@ -124,7 +124,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None): ...@@ -124,7 +124,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
params_docstring = _convert_output_args_doc(params_docstring) params_docstring = _convert_output_args_doc(params_docstring)
else: else:
raise ValueError( raise ValueError(
f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has" f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has "
"docstring and contain either `Args` or `Parameters`." "docstring and contain either `Args` or `Parameters`."
) )
......
...@@ -67,7 +67,7 @@ class ModelArguments: ...@@ -67,7 +67,7 @@ class ModelArguments:
model_name_or_path: Optional[str] = field( model_name_or_path: Optional[str] = field(
default=None, default=None,
metadata={ metadata={
"help": "The model checkpoint for weights initialization." "help": "The model checkpoint for weights initialization. "
"Don't set if you want to train a model from scratch." "Don't set if you want to train a model from scratch."
}, },
) )
...@@ -128,8 +128,8 @@ class ModelArguments: ...@@ -128,8 +128,8 @@ class ModelArguments:
default=False, default=False,
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -325,7 +325,7 @@ def main(): ...@@ -325,7 +325,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
...@@ -756,7 +756,7 @@ def main(): ...@@ -756,7 +756,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, use_fast=not args.use_slow_tokenizer) tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, use_fast=not args.use_slow_tokenizer)
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -1172,7 +1172,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ...@@ -1172,7 +1172,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
# CTC models must specify return_timestamps type - cannot set `return_timestamps=True` blindly # CTC models must specify return_timestamps type - cannot set `return_timestamps=True` blindly
with self.assertRaisesRegex( with self.assertRaisesRegex(
ValueError, ValueError,
"^CTC can either predict character level timestamps, or word level timestamps." "^CTC can either predict character level timestamps, or word level timestamps. "
"Set `return_timestamps='char'` or `return_timestamps='word'` as required.$", "Set `return_timestamps='char'` or `return_timestamps='word'` as required.$",
): ):
_ = speech_recognizer(audio, return_timestamps=True) _ = speech_recognizer(audio, return_timestamps=True)
......
...@@ -375,7 +375,7 @@ def main(): ...@@ -375,7 +375,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length: if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
) )
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
...@@ -1509,7 +1509,7 @@ if __name__ == "__main__": ...@@ -1509,7 +1509,7 @@ if __name__ == "__main__":
"--models_to_skip", "--models_to_skip",
type=list_str, type=list_str,
help=( help=(
"Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually" "Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually "
"the list of model classes that have their tiny versions already uploaded to the Hub." "the list of model classes that have their tiny versions already uploaded to the Hub."
), ),
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment