"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "81db12c3ba0c2067f43c4a63edf5e45f54161042"
Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
......@@ -37,7 +37,7 @@ class ModelArguments:
encoder_model_name_or_path: str = field(
metadata={
"help": (
"The encoder model checkpoint for weights initialization."
"The encoder model checkpoint for weights initialization. "
"Don't set if you want to train an encoder model from scratch."
)
},
......@@ -45,7 +45,7 @@ class ModelArguments:
decoder_model_name_or_path: str = field(
metadata={
"help": (
"The decoder model checkpoint for weights initialization."
"The decoder model checkpoint for weights initialization. "
"Don't set if you want to train a decoder model from scratch."
)
},
......
......@@ -203,7 +203,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -256,7 +256,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
......@@ -423,7 +423,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -685,7 +685,7 @@ def main():
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
raise ValueError(
"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
"`training_args.block_size` needs to be a multiple of the global train/eval batch size. "
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
)
......
......@@ -487,7 +487,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -606,7 +606,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -190,7 +190,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -368,7 +368,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -524,7 +524,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......@@ -586,7 +586,7 @@ def main():
else:
if data_args.block_size > tokenizer.model_max_length:
logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.model_max_length)
......
......@@ -195,7 +195,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -411,7 +411,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -556,7 +556,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -528,7 +528,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -647,7 +647,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -176,7 +176,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -582,7 +582,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
......@@ -415,7 +415,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use `--overwrite_output_dir` to overcome."
)
......
......@@ -209,7 +209,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -268,7 +268,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
......@@ -451,7 +451,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......@@ -558,7 +558,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -122,7 +122,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......
......@@ -170,7 +170,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......
......@@ -180,7 +180,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -291,7 +291,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)
......
......@@ -379,7 +379,7 @@ class SwagProcessor(DataProcessor):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code"
"For swag testing, the input file does not contain a label column. It can not be tested in current code "
"setting!"
)
return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test")
......@@ -541,7 +541,7 @@ def convert_examples_to_features(
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info(
"Attention! you are cropping tokens (swag task is ok). "
"If you are training ARC and RACE and you are poping question + options,"
"If you are training ARC and RACE and you are poping question + options, "
"you need to try to use a bigger max seq length!"
)
......
......@@ -313,7 +313,7 @@ def add_generic_args(parser, root_dir) -> None:
type=str,
default="O2",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
......
......@@ -663,7 +663,7 @@ def main():
type=str,
default="O1",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
......
......@@ -149,7 +149,7 @@ class DataTrainingArguments:
default=-1,
metadata={
"help": (
"Optional input sequence length after tokenization."
"Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
......@@ -283,7 +283,7 @@ def main():
if config.model_type in ["bert", "roberta", "distilbert", "camembert"] and not data_args.mlm:
raise ValueError(
"BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the"
"BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the "
"--mlm flag (masked language modeling)."
)
......
......@@ -579,7 +579,7 @@ def main():
type=str,
default="O1",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
......
......@@ -172,7 +172,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -189,14 +189,14 @@ class ModelArguments:
if not self.freeze_feature_extractor and self.freeze_feature_encoder:
warnings.warn(
"The argument `--freeze_feature_extractor` is deprecated and "
"will be removed in a future version. Use `--freeze_feature_encoder`"
"will be removed in a future version. Use `--freeze_feature_encoder` "
"instead. Setting `freeze_feature_encoder==True`.",
FutureWarning,
)
if self.freeze_feature_extractor and not self.freeze_feature_encoder:
raise ValueError(
"The argument `--freeze_feature_extractor` is deprecated and "
"should not be used in combination with `--freeze_feature_encoder`."
"should not be used in combination with `--freeze_feature_encoder`. "
"Only make use of `--freeze_feature_encoder`."
)
......
......@@ -107,7 +107,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......@@ -358,7 +358,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
......
......@@ -163,7 +163,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment