"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "2156619f10b33fb58520830bff0a92c36d13128e"
Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
...@@ -37,7 +37,7 @@ class ModelArguments: ...@@ -37,7 +37,7 @@ class ModelArguments:
encoder_model_name_or_path: str = field( encoder_model_name_or_path: str = field(
metadata={ metadata={
"help": ( "help": (
"The encoder model checkpoint for weights initialization." "The encoder model checkpoint for weights initialization. "
"Don't set if you want to train an encoder model from scratch." "Don't set if you want to train an encoder model from scratch."
) )
}, },
...@@ -45,7 +45,7 @@ class ModelArguments: ...@@ -45,7 +45,7 @@ class ModelArguments:
decoder_model_name_or_path: str = field( decoder_model_name_or_path: str = field(
metadata={ metadata={
"help": ( "help": (
"The decoder model checkpoint for weights initialization." "The decoder model checkpoint for weights initialization. "
"Don't set if you want to train a decoder model from scratch." "Don't set if you want to train a decoder model from scratch."
) )
}, },
......
...@@ -203,7 +203,7 @@ class ModelArguments: ...@@ -203,7 +203,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -256,7 +256,7 @@ class DataTrainingArguments: ...@@ -256,7 +256,7 @@ class DataTrainingArguments:
metadata={ metadata={
"help": ( "help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer " "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used " "This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation." "during evaluation."
) )
...@@ -423,7 +423,7 @@ def main(): ...@@ -423,7 +423,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -685,7 +685,7 @@ def main(): ...@@ -685,7 +685,7 @@ def main():
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count() eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0: if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
raise ValueError( raise ValueError(
"`training_args.block_size` needs to be a multiple of the global train/eval batch size." "`training_args.block_size` needs to be a multiple of the global train/eval batch size. "
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead." f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
) )
......
...@@ -487,7 +487,7 @@ def main(): ...@@ -487,7 +487,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -606,7 +606,7 @@ def main(): ...@@ -606,7 +606,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -190,7 +190,7 @@ class ModelArguments: ...@@ -190,7 +190,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -368,7 +368,7 @@ def main(): ...@@ -368,7 +368,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -524,7 +524,7 @@ def main(): ...@@ -524,7 +524,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
...@@ -586,7 +586,7 @@ def main(): ...@@ -586,7 +586,7 @@ def main():
else: else:
if data_args.block_size > tokenizer.model_max_length: if data_args.block_size > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model" f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}." f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
) )
block_size = min(data_args.block_size, tokenizer.model_max_length) block_size = min(data_args.block_size, tokenizer.model_max_length)
......
...@@ -195,7 +195,7 @@ class ModelArguments: ...@@ -195,7 +195,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -411,7 +411,7 @@ def main(): ...@@ -411,7 +411,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -556,7 +556,7 @@ def main(): ...@@ -556,7 +556,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -528,7 +528,7 @@ def main(): ...@@ -528,7 +528,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -647,7 +647,7 @@ def main(): ...@@ -647,7 +647,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -176,7 +176,7 @@ class ModelArguments: ...@@ -176,7 +176,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -582,7 +582,7 @@ def main(): ...@@ -582,7 +582,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length: if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
) )
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
...@@ -415,7 +415,7 @@ def main(): ...@@ -415,7 +415,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use `--overwrite_output_dir` to overcome." "Use `--overwrite_output_dir` to overcome."
) )
......
...@@ -209,7 +209,7 @@ class ModelArguments: ...@@ -209,7 +209,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -268,7 +268,7 @@ class DataTrainingArguments: ...@@ -268,7 +268,7 @@ class DataTrainingArguments:
metadata={ metadata={
"help": ( "help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer " "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used " "This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation." "during evaluation."
) )
...@@ -451,7 +451,7 @@ def main(): ...@@ -451,7 +451,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -558,7 +558,7 @@ def main(): ...@@ -558,7 +558,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -122,7 +122,7 @@ class ModelArguments: ...@@ -122,7 +122,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
......
...@@ -170,7 +170,7 @@ class ModelArguments: ...@@ -170,7 +170,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
......
...@@ -180,7 +180,7 @@ class ModelArguments: ...@@ -180,7 +180,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -291,7 +291,7 @@ def main(): ...@@ -291,7 +291,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
......
...@@ -379,7 +379,7 @@ class SwagProcessor(DataProcessor): ...@@ -379,7 +379,7 @@ class SwagProcessor(DataProcessor):
"""See base class.""" """See base class."""
logger.info("LOOKING AT {} dev".format(data_dir)) logger.info("LOOKING AT {} dev".format(data_dir))
raise ValueError( raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code" "For swag testing, the input file does not contain a label column. It can not be tested in current code "
"setting!" "setting!"
) )
return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test") return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test")
...@@ -541,7 +541,7 @@ def convert_examples_to_features( ...@@ -541,7 +541,7 @@ def convert_examples_to_features(
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0: if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info( logger.info(
"Attention! you are cropping tokens (swag task is ok). " "Attention! you are cropping tokens (swag task is ok). "
"If you are training ARC and RACE and you are poping question + options," "If you are training ARC and RACE and you are poping question + options, "
"you need to try to use a bigger max seq length!" "you need to try to use a bigger max seq length!"
) )
......
...@@ -313,7 +313,7 @@ def add_generic_args(parser, root_dir) -> None: ...@@ -313,7 +313,7 @@ def add_generic_args(parser, root_dir) -> None:
type=str, type=str,
default="O2", default="O2",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -663,7 +663,7 @@ def main(): ...@@ -663,7 +663,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -149,7 +149,7 @@ class DataTrainingArguments: ...@@ -149,7 +149,7 @@ class DataTrainingArguments:
default=-1, default=-1,
metadata={ metadata={
"help": ( "help": (
"Optional input sequence length after tokenization." "Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training." "The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)." "Default to the model max input length for single sentence inputs (take into account special tokens)."
) )
...@@ -283,7 +283,7 @@ def main(): ...@@ -283,7 +283,7 @@ def main():
if config.model_type in ["bert", "roberta", "distilbert", "camembert"] and not data_args.mlm: if config.model_type in ["bert", "roberta", "distilbert", "camembert"] and not data_args.mlm:
raise ValueError( raise ValueError(
"BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the" "BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the "
"--mlm flag (masked language modeling)." "--mlm flag (masked language modeling)."
) )
......
...@@ -579,7 +579,7 @@ def main(): ...@@ -579,7 +579,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -172,7 +172,7 @@ class ModelArguments: ...@@ -172,7 +172,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -189,14 +189,14 @@ class ModelArguments: ...@@ -189,14 +189,14 @@ class ModelArguments:
if not self.freeze_feature_extractor and self.freeze_feature_encoder: if not self.freeze_feature_extractor and self.freeze_feature_encoder:
warnings.warn( warnings.warn(
"The argument `--freeze_feature_extractor` is deprecated and " "The argument `--freeze_feature_extractor` is deprecated and "
"will be removed in a future version. Use `--freeze_feature_encoder`" "will be removed in a future version. Use `--freeze_feature_encoder` "
"instead. Setting `freeze_feature_encoder==True`.", "instead. Setting `freeze_feature_encoder==True`.",
FutureWarning, FutureWarning,
) )
if self.freeze_feature_extractor and not self.freeze_feature_encoder: if self.freeze_feature_extractor and not self.freeze_feature_encoder:
raise ValueError( raise ValueError(
"The argument `--freeze_feature_extractor` is deprecated and " "The argument `--freeze_feature_extractor` is deprecated and "
"should not be used in combination with `--freeze_feature_encoder`." "should not be used in combination with `--freeze_feature_encoder`. "
"Only make use of `--freeze_feature_encoder`." "Only make use of `--freeze_feature_encoder`."
) )
......
...@@ -107,7 +107,7 @@ class ModelArguments: ...@@ -107,7 +107,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -358,7 +358,7 @@ def main(): ...@@ -358,7 +358,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -163,7 +163,7 @@ class ModelArguments: ...@@ -163,7 +163,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment