Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
......@@ -116,8 +116,8 @@ class ModelArguments:
default=0.05,
metadata={
"help": (
"Probability of each feature vector along the time axis to be chosen as the start of the vector"
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
"Probability of each feature vector along the time axis to be chosen as the start of the vector "
"span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature "
"vectors will be masked along the time axis."
)
},
......@@ -455,7 +455,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
......
......@@ -41,7 +41,7 @@ class TeacherModelArguments:
default="This example is {}.",
metadata={
"help": (
"Template used to turn class names into mock hypotheses for teacher NLI model. Must include {{}}"
"Template used to turn class names into mock hypotheses for teacher NLI model. Must include {{}} "
"where class name is inserted."
)
},
......@@ -53,7 +53,7 @@ class TeacherModelArguments:
default=False,
metadata={
"help": (
"Allow multiple classes to be true rather than forcing them to sum to 1 (sometimes called"
"Allow multiple classes to be true rather than forcing them to sum to 1 (sometimes called "
"multi-class multi-label classification)."
)
},
......@@ -98,7 +98,7 @@ class DistillTrainingArguments(TrainingArguments):
default=True,
metadata={
"help": (
"Whether to evaluate the agreement of the final student predictions and the teacher predictions"
"Whether to evaluate the agreement of the final student predictions and the teacher predictions "
"after training."
)
},
......@@ -107,7 +107,7 @@ class DistillTrainingArguments(TrainingArguments):
default=0,
metadata={
"help": (
"Limit the total amount of checkpoints."
"Limit the total amount of checkpoints. "
"Deletes the older checkpoints in the output_dir. Default is 0 (no checkpoints)."
)
},
......
......@@ -113,7 +113,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -387,7 +387,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -178,7 +178,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......
......@@ -133,7 +133,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -399,7 +399,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
# endregion
......@@ -432,7 +432,7 @@ def main():
else:
if data_args.block_size > tokenizer.model_max_length:
logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.model_max_length)
......
......@@ -131,7 +131,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -383,7 +383,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
# endregion
......@@ -404,7 +404,7 @@ def main():
else:
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -167,7 +167,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -383,7 +383,7 @@ def main():
else:
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -98,7 +98,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -400,7 +400,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -120,7 +120,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -194,7 +194,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
......
......@@ -185,7 +185,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -353,7 +353,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -101,7 +101,7 @@ class DataTrainingArguments:
metadata={
"help": (
"Whether to pad all samples to `max_seq_length`. "
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
"If False, will pad the samples dynamically when batching to the maximum length in the batch. "
"Data will always be padded when using TPUs."
)
},
......@@ -191,7 +191,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -360,7 +360,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -96,7 +96,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -334,7 +334,7 @@ def main():
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
if not tokenizer_name_or_path:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -114,7 +114,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -182,7 +182,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
......
......@@ -131,7 +131,7 @@ class ServeCommand(BaseTransformersCLICommand):
if not _serve_dependencies_installed:
raise RuntimeError(
"Using serve command requires FastAPI and uvicorn. "
'Please install transformers with [serving]: pip install "transformers[serving]".'
'Please install transformers with [serving]: pip install "transformers[serving]". '
"Or install FastAPI and uvicorn separately."
)
else:
......
......@@ -575,7 +575,7 @@ def custom_object_save(obj: Any, folder: Union[str, os.PathLike], config: Option
def _raise_timeout_error(signum, frame):
raise ValueError(
"Loading this model requires you to execute custom code contained in the model repository on your local"
"Loading this model requires you to execute custom code contained in the model repository on your local "
"machine. Please set the option `trust_remote_code=True` to permit loading of this model."
)
......@@ -593,7 +593,7 @@ def resolve_trust_remote_code(trust_remote_code, model_name, has_local_code, has
signal.alarm(TIME_OUT_REMOTE_CODE)
while trust_remote_code is None:
answer = input(
f"The repository for {model_name} contains custom code which must be executed to correctly"
f"The repository for {model_name} contains custom code which must be executed to correctly "
f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
f"You can avoid this prompt in future by passing the argument `trust_remote_code=True`.\n\n"
f"Do you wish to run the custom code? [y/N] "
......@@ -606,7 +606,7 @@ def resolve_trust_remote_code(trust_remote_code, model_name, has_local_code, has
except Exception:
# OS which does not support signal.SIGALRM
raise ValueError(
f"The repository for {model_name} contains custom code which must be executed to correctly"
f"The repository for {model_name} contains custom code which must be executed to correctly "
f"load the model. You can inspect the repository content at https://hf.co/{model_name}.\n"
f"Please pass the argument `trust_remote_code=True` to allow custom code to be run."
)
......
......@@ -1198,7 +1198,7 @@ class TFGenerationMixin:
inputs_kwarg = model_kwargs.pop(input_name, None)
if inputs_kwarg is not None and inputs is not None:
raise ValueError(
f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed."
f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. "
f"Make sure to either pass {inputs} or {input_name}=..."
)
elif inputs_kwarg is not None:
......
......@@ -544,7 +544,7 @@ class GenerationMixin:
inputs_kwarg = model_kwargs.pop(input_name, None)
if inputs_kwarg is not None and inputs is not None:
raise ValueError(
f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed."
f"`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. "
f"Make sure to either pass {inputs} or {input_name}=..."
)
elif inputs_kwarg is not None:
......@@ -1816,7 +1816,7 @@ class GenerationMixin:
def typeerror():
raise ValueError(
"`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]`"
"`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]` "
f"of positive integers, but is {generation_config.force_words_ids}."
)
......
......@@ -1312,7 +1312,7 @@ class NeptuneCallback(TrainerCallback):
target_path = consistent_checkpoint_path
except IOError as e:
logger.warning(
"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'."
"NeptuneCallback was unable to made a copy of checkpoint due to I/O exception: '{}'. "
"Could fail trying to upload.".format(e)
)
......
......@@ -850,7 +850,7 @@ def load_tf_shard(model, model_layer_map, resolved_archive_file, ignore_mismatch
raise OSError(
f"Unable to load weights from TF checkpoint file for '{resolved_archive_file}' "
f"at '{resolved_archive_file}'. "
"If you tried to load a TF model from a sharded checkpoint, you should try converting the model"
"If you tried to load a TF model from a sharded checkpoint, you should try converting the model "
"by loading it in pytorch and saving it localy. A convertion script should be realeased soon."
)
......
......@@ -2635,8 +2635,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
else:
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
logger.info(
"The device_map was not initialized."
"Setting device_map to {'':torch.cuda.current_device()}."
"The device_map was not initialized. "
"Setting device_map to {'':torch.cuda.current_device()}. "
"If you want to use the model for inference, please set device_map ='auto' "
)
if low_cpu_mem_usage is None:
......@@ -2762,8 +2762,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
else:
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
logger.info(
"The device_map was not initialized."
"Setting device_map to {'':torch.cuda.current_device()}."
"The device_map was not initialized. "
"Setting device_map to {'':torch.cuda.current_device()}. "
"If you want to use the model for inference, please set device_map ='auto' "
)
if low_cpu_mem_usage is None:
......@@ -3149,7 +3149,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if load_in_8bit and torch_dtype is None:
logger.warning(
"You are loading your model in 8bit but you did not specify a `torch_dtype` attribute."
"You are loading your model in 8bit but you did not specify a `torch_dtype` attribute. "
"All non-linear modules will be loaded in full precision."
" If you want to load the other modules in other precision, please specify a `torch_dtype` attribute."
)
......@@ -3193,8 +3193,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
else:
raise ValueError(
"You are using `device_map='auto'` on a 4bit loaded version of the model. To automatically compute"
" the appropriate device map, you should upgrade your `accelerate` library,"
"`pip install --upgrade accelerate` or install it from source to support fp4 auto device map"
" the appropriate device map, you should upgrade your `accelerate` library, "
"`pip install --upgrade accelerate` or install it from source to support fp4 auto device map "
"calculation. You may encounter unexpected behavior, or pass your own device map"
)
elif load_in_8bit:
......@@ -3202,7 +3202,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
if model._no_split_modules is None:
raise ValueError(
f"{model.__class__.__name__} does not support `device_map='{device_map}'`. To implement support, the model"
f"{model.__class__.__name__} does not support `device_map='{device_map}'`. To implement support, the model "
"class needs to implement the `_no_split_modules` attribute."
)
no_split_modules = model._no_split_modules
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment