"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b189226e8cb31361916d638c5a1e2540e3208a61"
Unverified Commit 40ea9ab2 authored by Tom Aarsen's avatar Tom Aarsen Committed by GitHub
Browse files

Add many missing spaces in adjacent strings (#26751)

Add missing spaces in adjacent strings
parent 3bc65505
...@@ -120,7 +120,7 @@ class ModelArguments: ...@@ -120,7 +120,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -205,7 +205,7 @@ class DataTrainingArguments: ...@@ -205,7 +205,7 @@ class DataTrainingArguments:
metadata={ metadata={
"help": ( "help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer " "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``." "during ``evaluate`` and ``predict``."
) )
...@@ -271,7 +271,7 @@ class DataTrainingArguments: ...@@ -271,7 +271,7 @@ class DataTrainingArguments:
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
"The token to force as the first generated token after the decoder_start_token_id." "The token to force as the first generated token after the decoder_start_token_id. "
"Useful for multilingual models like mBART where the first generated token" "Useful for multilingual models like mBART where the first generated token"
"needs to be the target language token (Usually it is the target language token)" "needs to be the target language token (Usually it is the target language token)"
) )
...@@ -556,7 +556,7 @@ def main(): ...@@ -556,7 +556,7 @@ def main():
if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"): if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
logger.warning( logger.warning(
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for" "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory" f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
) )
......
...@@ -146,7 +146,7 @@ def parse_args(): ...@@ -146,7 +146,7 @@ def parse_args():
default=128, default=128,
help=( help=(
"The maximum total sequence length for target text after " "The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded. "
"during ``evaluate`` and ``predict``." "during ``evaluate`` and ``predict``."
), ),
) )
...@@ -272,7 +272,7 @@ def parse_args(): ...@@ -272,7 +272,7 @@ def parse_args():
default=False, default=False,
help=( help=(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
), ),
) )
...@@ -299,7 +299,7 @@ def parse_args(): ...@@ -299,7 +299,7 @@ def parse_args():
default="all", default="all",
help=( help=(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.' ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
"Only applicable when `--with_tracking` is passed." "Only applicable when `--with_tracking` is passed."
), ),
) )
...@@ -433,7 +433,7 @@ def main(): ...@@ -433,7 +433,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -82,7 +82,7 @@ class DataTrainingArguments: ...@@ -82,7 +82,7 @@ class DataTrainingArguments:
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
"The name of the text column in the input dataset or a CSV/JSON file." "The name of the text column in the input dataset or a CSV/JSON file. "
'If not specified, will use the "sentence" column for single/multi-label classifcation task.' 'If not specified, will use the "sentence" column for single/multi-label classifcation task.'
) )
}, },
...@@ -120,7 +120,7 @@ class DataTrainingArguments: ...@@ -120,7 +120,7 @@ class DataTrainingArguments:
default=None, default=None,
metadata={ metadata={
"help": ( "help": (
"The name of the label column in the input dataset or a CSV/JSON file." "The name of the label column in the input dataset or a CSV/JSON file. "
'If not specified, will use the "label" column for single/multi-label classifcation task' 'If not specified, will use the "label" column for single/multi-label classifcation task'
) )
}, },
...@@ -248,7 +248,7 @@ class ModelArguments: ...@@ -248,7 +248,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -552,7 +552,7 @@ def main(): ...@@ -552,7 +552,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length: if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
) )
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
...@@ -209,7 +209,7 @@ class ModelArguments: ...@@ -209,7 +209,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -456,7 +456,7 @@ def main(): ...@@ -456,7 +456,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length: if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
) )
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
......
...@@ -162,7 +162,7 @@ def parse_args(): ...@@ -162,7 +162,7 @@ def parse_args():
default=False, default=False,
help=( help=(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
), ),
) )
...@@ -189,7 +189,7 @@ def parse_args(): ...@@ -189,7 +189,7 @@ def parse_args():
default="all", default="all",
help=( help=(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.' ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
"Only applicable when `--with_tracking` is passed." "Only applicable when `--with_tracking` is passed."
), ),
) )
......
...@@ -173,7 +173,7 @@ class ModelArguments: ...@@ -173,7 +173,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
......
...@@ -100,7 +100,7 @@ class ModelArguments: ...@@ -100,7 +100,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
......
...@@ -216,7 +216,7 @@ def parse_args(): ...@@ -216,7 +216,7 @@ def parse_args():
default=False, default=False,
help=( help=(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
), ),
) )
...@@ -243,7 +243,7 @@ def parse_args(): ...@@ -243,7 +243,7 @@ def parse_args():
default="all", default="all",
help=( help=(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.' ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
"Only applicable when `--with_tracking` is passed." "Only applicable when `--with_tracking` is passed."
), ),
) )
...@@ -412,7 +412,7 @@ def main(): ...@@ -412,7 +412,7 @@ def main():
tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path
if not tokenizer_name_or_path: if not tokenizer_name_or_path:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -110,7 +110,7 @@ class ModelArguments: ...@@ -110,7 +110,7 @@ class ModelArguments:
metadata={ metadata={
"help": ( "help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
) )
}, },
...@@ -173,7 +173,7 @@ class DataTrainingArguments: ...@@ -173,7 +173,7 @@ class DataTrainingArguments:
metadata={ metadata={
"help": ( "help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer " "The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`." "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used " "This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``." "during ``evaluate`` and ``predict``."
) )
...@@ -468,7 +468,7 @@ def main(): ...@@ -468,7 +468,7 @@ def main():
if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"): if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
logger.warning( logger.warning(
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for" "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory" f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
) )
......
...@@ -118,7 +118,7 @@ def parse_args(): ...@@ -118,7 +118,7 @@ def parse_args():
default=128, default=128,
help=( help=(
"The maximum total sequence length for target text after " "The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded." "tokenization. Sequences longer than this will be truncated, sequences shorter will be padded "
"during ``evaluate`` and ``predict``." "during ``evaluate`` and ``predict``."
), ),
) )
...@@ -139,7 +139,7 @@ def parse_args(): ...@@ -139,7 +139,7 @@ def parse_args():
default=False, default=False,
help=( help=(
"Whether to pad all samples to model maximum sentence " "Whether to pad all samples to model maximum sentence "
"length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More" "length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
"efficient on GPU but very bad for TPU." "efficient on GPU but very bad for TPU."
), ),
) )
...@@ -263,7 +263,7 @@ def parse_args(): ...@@ -263,7 +263,7 @@ def parse_args():
default=False, default=False,
help=( help=(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will" "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine." "execute code present on the Hub on your local machine."
), ),
) )
...@@ -290,7 +290,7 @@ def parse_args(): ...@@ -290,7 +290,7 @@ def parse_args():
default="all", default="all",
help=( help=(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.' ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
"Only applicable when `--with_tracking` is passed." "Only applicable when `--with_tracking` is passed."
), ),
) )
...@@ -413,7 +413,7 @@ def main(): ...@@ -413,7 +413,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -575,7 +575,7 @@ def main(): ...@@ -575,7 +575,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -532,7 +532,7 @@ def main(): ...@@ -532,7 +532,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -696,7 +696,7 @@ def main(): ...@@ -696,7 +696,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -208,7 +208,7 @@ def main(): ...@@ -208,7 +208,7 @@ def main():
type=str, type=str,
default="O1", default="O1",
help=( help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html" "See details at https://nvidia.github.io/apex/amp.html"
), ),
) )
......
...@@ -354,7 +354,7 @@ def main(): ...@@ -354,7 +354,7 @@ def main():
default=10, default=10,
type=int, type=int,
help=( help=(
"decay the selectivity of our secondary learner filter from" "decay the selectivity of our secondary learner filter from "
"1 standard deviation above average to 1 below average after 10 batches" "1 standard deviation above average to 1 below average after 10 batches"
), ),
) )
......
...@@ -341,7 +341,7 @@ if __name__ == "__main__": ...@@ -341,7 +341,7 @@ if __name__ == "__main__":
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -399,7 +399,7 @@ if __name__ == "__main__": ...@@ -399,7 +399,7 @@ if __name__ == "__main__":
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -78,7 +78,7 @@ class ModelArguments: ...@@ -78,7 +78,7 @@ class ModelArguments:
text_model_name_or_path: str = field( text_model_name_or_path: str = field(
metadata={ metadata={
"help": ( "help": (
"The text model checkpoint for weights initialization." "The text model checkpoint for weights initialization. "
"Don't set if you want to train a model from scratch." "Don't set if you want to train a model from scratch."
) )
}, },
...@@ -86,7 +86,7 @@ class ModelArguments: ...@@ -86,7 +86,7 @@ class ModelArguments:
vision_model_name_or_path: str = field( vision_model_name_or_path: str = field(
metadata={ metadata={
"help": ( "help": (
"The vision model checkpoint for weights initialization." "The vision model checkpoint for weights initialization. "
"Don't set if you want to train a model from scratch." "Don't set if you want to train a model from scratch."
) )
}, },
...@@ -311,7 +311,7 @@ def main(): ...@@ -311,7 +311,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -341,7 +341,7 @@ def main(): ...@@ -341,7 +341,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -246,7 +246,7 @@ def main(): ...@@ -246,7 +246,7 @@ def main():
and not training_args.overwrite_output_dir and not training_args.overwrite_output_dir
): ):
raise ValueError( raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty." f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome." "Use --overwrite_output_dir to overcome."
) )
...@@ -325,7 +325,7 @@ def main(): ...@@ -325,7 +325,7 @@ def main():
) )
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
...@@ -368,7 +368,7 @@ def main(): ...@@ -368,7 +368,7 @@ def main():
else: else:
if data_args.block_size > tokenizer.model_max_length: if data_args.block_size > tokenizer.model_max_length:
logger.warning( logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model" f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}." f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
) )
block_size = min(data_args.block_size, tokenizer.model_max_length) block_size = min(data_args.block_size, tokenizer.model_max_length)
......
...@@ -357,7 +357,7 @@ def main(): ...@@ -357,7 +357,7 @@ def main():
tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path tokenizer_name_or_path = args.tokenizer_name if args.tokenizer_name else args.model_name_or_path
if not tokenizer_name_or_path: if not tokenizer_name_or_path:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
...@@ -314,7 +314,7 @@ def main(): ...@@ -314,7 +314,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else: else:
raise ValueError( raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script." "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name." "You can do it from another script, save it, and load it from here, using --tokenizer_name."
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment