Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
......@@ -1146,7 +1146,7 @@ class WavLMModel(WavLMPreTrainedModel):
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
......@@ -1322,7 +1322,7 @@ class WavLMForCTC(WavLMPreTrainedModel):
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
......@@ -1455,7 +1455,7 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
......@@ -1578,7 +1578,7 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
......@@ -1745,7 +1745,7 @@ class WavLMForXVector(WavLMPreTrainedModel):
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. "
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
......
......@@ -1452,8 +1452,8 @@ class TFWhisperForConditionalGeneration(TFWhisperPreTrainedModel, TFCausalLangua
if return_timestamps is not None:
if not hasattr(generation_config, "no_timestamps_token_id"):
raise ValueError(
"You are trying to return timestamps, but the generation config is not properly set."
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`."
"You are trying to return timestamps, but the generation config is not properly set. "
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
"For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
)
......
......@@ -1652,8 +1652,8 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if return_timestamps is not None:
if not hasattr(generation_config, "no_timestamps_token_id"):
raise ValueError(
"You are trying to return timestamps, but the generation config is not properly set."
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`."
"You are trying to return timestamps, but the generation config is not properly set. "
"Make sure to initialize the generation config with the correct attributes that are needed such as `no_timestamps_token_id`. "
"For more details on how to generate the approtiate config, refer to https://github.com/huggingface/transformers/issues/21878#issuecomment-1451902363"
)
......@@ -1664,7 +1664,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if language is not None:
if not hasattr(generation_config, "lang_to_id"):
raise ValueError(
"The generation config is outdated and is thus not compatible with the `language` argument"
"The generation config is outdated and is thus not compatible with the `language` argument "
"to `generate`. Either set the language using the `forced_decoder_ids` in the model config, "
"or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
)
......@@ -1673,7 +1673,7 @@ class WhisperForConditionalGeneration(WhisperPreTrainedModel):
if task is not None:
if not hasattr(generation_config, "task_to_id"):
raise ValueError(
"The generation config is outdated and is thus not compatible with the `task` argument"
"The generation config is outdated and is thus not compatible with the `task` argument "
"to `generate`. Either set the task using the `forced_decoder_ids` in the model config, "
"or update the generation config as per the instructions https://github.com/huggingface/transformers/issues/25084#issuecomment-1664398224"
)
......
......@@ -1137,7 +1137,7 @@ class YolosImageProcessor(BaseImageProcessor):
if annotations is not None:
if format == AnnotionFormat.COCO_DETECTION and not valid_coco_detection_annotations(annotations):
raise ValueError(
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts"
"Invalid COCO detection annotations. Annotations must a dict (single image) of list of dicts "
"(batch of images) with the following keys: `image_id` and `annotations`, with the latter "
"being a list of annotations in the COCO format."
)
......
......@@ -766,7 +766,7 @@ def pipeline(
if task is None and model is not None:
if not isinstance(model, str):
raise RuntimeError(
"Inferring the task automatically requires to check the hub with a model_id defined as a `str`."
"Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
f"{model} is not a valid model_id."
)
task = get_task(model, use_auth_token)
......
......@@ -403,7 +403,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
raise ValueError("CTC with LM can only predict word level timestamps, set `return_timestamps='word'`")
if self.type == "ctc" and return_timestamps not in ["char", "word"]:
raise ValueError(
"CTC can either predict character level timestamps, or word level timestamps."
"CTC can either predict character level timestamps, or word level timestamps. "
"Set `return_timestamps='char'` or `return_timestamps='word'` as required."
)
if self.type == "seq2seq_whisper" and return_timestamps == "char":
......
......@@ -28,7 +28,7 @@ if is_vision_available():
class ImageSegmentationTool(PipelineTool):
description = (
"This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image."
"This is a tool that creates a segmentation mask of an image according to a label. It cannot create an image. "
"It takes two arguments named `image` which should be the original image, and `label` which should be a text "
"describing the elements what should be identified in the segmentation mask. The tool returns the mask."
)
......
......@@ -508,7 +508,7 @@ class Trainer:
self.optimizer is not None or self.lr_scheduler is not None
):
raise RuntimeError(
"Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled."
"Passing `optimizers` is not allowed if Deepspeed or PyTorch FSDP is enabled. "
"You should subclass `Trainer` and override the `create_optimizer_and_scheduler` method."
)
default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to)
......@@ -563,8 +563,8 @@ class Trainer:
# When there's mismatch between SMP config and trainer argument, use SMP config as truth
if args.fp16 != smp.state.cfg.fp16:
logger.warning(
f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16},"
f"but FP16 provided in trainer argument is {args.fp16},"
f"FP16 provided in SM_HP_MP_PARAMETERS is {smp.state.cfg.fp16}, "
f"but FP16 provided in trainer argument is {args.fp16}, "
f"setting to {smp.state.cfg.fp16}"
)
args.fp16 = smp.state.cfg.fp16
......
......@@ -756,7 +756,7 @@ class TrainingArguments:
default=500,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`."
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
},
......@@ -770,7 +770,7 @@ class TrainingArguments:
default=500,
metadata={
"help": (
"Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`."
"Save checkpoint every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
},
......@@ -923,7 +923,7 @@ class TrainingArguments:
default=None,
metadata={
"help": (
"Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`."
"Run an evaluation every X steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
},
......@@ -1006,7 +1006,7 @@ class TrainingArguments:
default=None,
metadata={
"help": (
"Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a"
"Config to be used with FSDP (Pytorch Fully Sharded Data Parallel). The value is either a "
"fsdp json config file (e.g., `fsdp_config.json`) or an already loaded json file as `dict`."
)
},
......@@ -1207,7 +1207,7 @@ class TrainingArguments:
dispatch_batches: Optional[bool] = field(
default=None,
metadata={
"help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process"
"help": "Whether to dispatch batches across devices in distributed training. If set to `True`, the dataloader prepared by the Accelerator is only iterated through on the main process "
"and then the batches are split and broadcast to each process. Will default to `True` for `DataLoader` whose"
"underlying dataset is an `IterableDataset`, `False` otherwise."
},
......@@ -1297,7 +1297,7 @@ class TrainingArguments:
if not (self.eval_steps < 1 and self.save_steps < 1):
raise ValueError(
"--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
"steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps"
"steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps "
f"{self.save_steps} and eval_steps {self.eval_steps}."
)
# Work around floating point precision issues
......@@ -1807,7 +1807,7 @@ class TrainingArguments:
elif self.distributed_state.distributed_type == DistributedType.NO:
if self.use_mps_device:
warnings.warn(
"`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers."
"`use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. "
"`mps` device will be used by default if available similar to the way `cuda` device is used."
"Therefore, no action from user is required. "
)
......
......@@ -124,7 +124,7 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
params_docstring = _convert_output_args_doc(params_docstring)
else:
raise ValueError(
f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has"
f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has "
"docstring and contain either `Args` or `Parameters`."
)
......
......@@ -67,7 +67,7 @@ class ModelArguments:
model_name_or_path: Optional[str] = field(
default=None,
metadata={
"help": "The model checkpoint for weights initialization."
"help": "The model checkpoint for weights initialization. "
"Don't set if you want to train a model from scratch."
},
)
......@@ -128,8 +128,8 @@ class ModelArguments:
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -325,7 +325,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......@@ -756,7 +756,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, use_fast=not args.use_slow_tokenizer)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -1172,7 +1172,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
# CTC models must specify return_timestamps type - cannot set `return_timestamps=True` blindly
with self.assertRaisesRegex(
ValueError,
"^CTC can either predict character level timestamps, or word level timestamps."
"^CTC can either predict character level timestamps, or word level timestamps. "
"Set `return_timestamps='char'` or `return_timestamps='word'` as required.$",
):
_ = speech_recognizer(audio, return_timestamps=True)
......
......@@ -375,7 +375,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -1509,7 +1509,7 @@ if __name__ == "__main__":
"--models_to_skip",
type=list_str,
help=(
"Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually"
"Comma-separated list of model class names(s) from which the tiny models won't be created.\nThis is usually "
"the list of model classes that have their tiny versions already uploaded to the Hub."
),
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment