Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
40ea9ab2
Unverified
Commit
40ea9ab2
authored
Oct 12, 2023
by
Tom Aarsen
Committed by
GitHub
Oct 12, 2023
Browse files
Add many missing spaces in adjacent strings (#26751)
Add missing spaces in adjacent strings
parent
3bc65505
Changes
154
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
45 additions
and
45 deletions
+45
-45
examples/pytorch/summarization/run_summarization.py
examples/pytorch/summarization/run_summarization.py
+4
-4
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+4
-4
examples/pytorch/text-classification/run_classification.py
examples/pytorch/text-classification/run_classification.py
+4
-4
examples/pytorch/text-classification/run_glue.py
examples/pytorch/text-classification/run_glue.py
+2
-2
examples/pytorch/text-classification/run_glue_no_trainer.py
examples/pytorch/text-classification/run_glue_no_trainer.py
+2
-2
examples/pytorch/text-classification/run_xnli.py
examples/pytorch/text-classification/run_xnli.py
+1
-1
examples/pytorch/token-classification/run_ner.py
examples/pytorch/token-classification/run_ner.py
+1
-1
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+3
-3
examples/pytorch/translation/run_translation.py
examples/pytorch/translation/run_translation.py
+3
-3
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+5
-5
examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
...earch_projects/bert-loses-patience/run_glue_with_pabee.py
+1
-1
examples/research_projects/deebert/run_glue_deebert.py
examples/research_projects/deebert/run_glue_deebert.py
+1
-1
examples/research_projects/distillation/run_squad_w_distillation.py
...esearch_projects/distillation/run_squad_w_distillation.py
+1
-1
examples/research_projects/distillation/train.py
examples/research_projects/distillation/train.py
+1
-1
examples/research_projects/information-gain-filtration/run_clm_igf.py
...earch_projects/information-gain-filtration/run_clm_igf.py
+1
-1
examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
...cts/jax-projects/dataset-streaming/run_mlm_flax_stream.py
+2
-2
examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
...arch_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
+4
-4
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
...search_projects/jax-projects/model_parallel/run_clm_mp.py
+3
-3
examples/research_projects/luke/run_luke_ner_no_trainer.py
examples/research_projects/luke/run_luke_ner_no_trainer.py
+1
-1
examples/research_projects/mlm_wwm/run_mlm_wwm.py
examples/research_projects/mlm_wwm/run_mlm_wwm.py
+1
-1
No files found.
examples/pytorch/summarization/run_summarization.py
View file @
40ea9ab2
...
...
@@ -120,7 +120,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
@@ -205,7 +205,7 @@ class DataTrainingArguments:
metadata
=
{
"help"
:
(
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`.
"
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
...
...
@@ -271,7 +271,7 @@ class DataTrainingArguments:
default
=
None
,
metadata
=
{
"help"
:
(
"The token to force as the first generated token after the decoder_start_token_id."
"The token to force as the first generated token after the decoder_start_token_id.
"
"Useful for multilingual models like mBART where the first generated token"
"needs to be the target language token (Usually it is the target language token)"
)
...
...
@@ -556,7 +556,7 @@ def main():
if
training_args
.
label_smoothing_factor
>
0
and
not
hasattr
(
model
,
"prepare_decoder_input_ids_from_labels"
):
logger
.
warning
(
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for
"
f
"`
{
model
.
__class__
.
__name__
}
`. This will lead to loss being calculated twice and will take up more memory"
)
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
40ea9ab2
...
...
@@ -146,7 +146,7 @@ def parse_args():
default
=
128
,
help
=
(
"The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded."
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.
"
"during ``evaluate`` and ``predict``."
),
)
...
...
@@ -272,7 +272,7 @@ def parse_args():
default
=
False
,
help
=
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
),
)
...
...
@@ -299,7 +299,7 @@ def parse_args():
default
=
"all"
,
help
=
(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.
'
"Only applicable when `--with_tracking` is passed."
),
)
...
...
@@ -433,7 +433,7 @@ def main():
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/pytorch/text-classification/run_classification.py
View file @
40ea9ab2
...
...
@@ -82,7 +82,7 @@ class DataTrainingArguments:
default
=
None
,
metadata
=
{
"help"
:
(
"The name of the text column in the input dataset or a CSV/JSON file."
"The name of the text column in the input dataset or a CSV/JSON file.
"
'If not specified, will use the "sentence" column for single/multi-label classifcation task.'
)
},
...
...
@@ -120,7 +120,7 @@ class DataTrainingArguments:
default
=
None
,
metadata
=
{
"help"
:
(
"The name of the label column in the input dataset or a CSV/JSON file."
"The name of the label column in the input dataset or a CSV/JSON file.
"
'If not specified, will use the "label" column for single/multi-label classifcation task'
)
},
...
...
@@ -248,7 +248,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
@@ -552,7 +552,7 @@ def main():
if
data_args
.
max_seq_length
>
tokenizer
.
model_max_length
:
logger
.
warning
(
f
"The max_seq_length passed (
{
data_args
.
max_seq_length
}
) is larger than the maximum length for the"
f
"The max_seq_length passed (
{
data_args
.
max_seq_length
}
) is larger than the maximum length for the
"
f
"model (
{
tokenizer
.
model_max_length
}
). Using max_seq_length=
{
tokenizer
.
model_max_length
}
."
)
max_seq_length
=
min
(
data_args
.
max_seq_length
,
tokenizer
.
model_max_length
)
...
...
examples/pytorch/text-classification/run_glue.py
View file @
40ea9ab2
...
...
@@ -209,7 +209,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
@@ -456,7 +456,7 @@ def main():
if
data_args
.
max_seq_length
>
tokenizer
.
model_max_length
:
logger
.
warning
(
f
"The max_seq_length passed (
{
data_args
.
max_seq_length
}
) is larger than the maximum length for the"
f
"The max_seq_length passed (
{
data_args
.
max_seq_length
}
) is larger than the maximum length for the
"
f
"model (
{
tokenizer
.
model_max_length
}
). Using max_seq_length=
{
tokenizer
.
model_max_length
}
."
)
max_seq_length
=
min
(
data_args
.
max_seq_length
,
tokenizer
.
model_max_length
)
...
...
examples/pytorch/text-classification/run_glue_no_trainer.py
View file @
40ea9ab2
...
...
@@ -162,7 +162,7 @@ def parse_args():
default
=
False
,
help
=
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
),
)
...
...
@@ -189,7 +189,7 @@ def parse_args():
default
=
"all"
,
help
=
(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.
'
"Only applicable when `--with_tracking` is passed."
),
)
...
...
examples/pytorch/text-classification/run_xnli.py
View file @
40ea9ab2
...
...
@@ -173,7 +173,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
examples/pytorch/token-classification/run_ner.py
View file @
40ea9ab2
...
...
@@ -100,7 +100,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
40ea9ab2
...
...
@@ -216,7 +216,7 @@ def parse_args():
default
=
False
,
help
=
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
),
)
...
...
@@ -243,7 +243,7 @@ def parse_args():
default
=
"all"
,
help
=
(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.
'
"Only applicable when `--with_tracking` is passed."
),
)
...
...
@@ -412,7 +412,7 @@ def main():
tokenizer_name_or_path
=
args
.
tokenizer_name
if
args
.
tokenizer_name
else
args
.
model_name_or_path
if
not
tokenizer_name_or_path
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/pytorch/translation/run_translation.py
View file @
40ea9ab2
...
...
@@ -110,7 +110,7 @@ class ModelArguments:
metadata
=
{
"help"
:
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
)
},
...
...
@@ -173,7 +173,7 @@ class DataTrainingArguments:
metadata
=
{
"help"
:
(
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`.
"
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
...
...
@@ -468,7 +468,7 @@ def main():
if
training_args
.
label_smoothing_factor
>
0
and
not
hasattr
(
model
,
"prepare_decoder_input_ids_from_labels"
):
logger
.
warning
(
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
"label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for
"
f
"`
{
model
.
__class__
.
__name__
}
`. This will lead to loss being calculated twice and will take up more memory"
)
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
40ea9ab2
...
...
@@ -118,7 +118,7 @@ def parse_args():
default
=
128
,
help
=
(
"The maximum total sequence length for target text after "
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded
.
"
"tokenization. Sequences longer than this will be truncated, sequences shorter will be padded
"
"during ``evaluate`` and ``predict``."
),
)
...
...
@@ -139,7 +139,7 @@ def parse_args():
default
=
False
,
help
=
(
"Whether to pad all samples to model maximum sentence "
"length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More"
"length. If False, will pad the samples dynamically when batching to the maximum length in the batch. More
"
"efficient on GPU but very bad for TPU."
),
)
...
...
@@ -263,7 +263,7 @@ def parse_args():
default
=
False
,
help
=
(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will
"
"execute code present on the Hub on your local machine."
),
)
...
...
@@ -290,7 +290,7 @@ def parse_args():
default
=
"all"
,
help
=
(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.
'
"Only applicable when `--with_tracking` is passed."
),
)
...
...
@@ -413,7 +413,7 @@ def main():
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/research_projects/bert-loses-patience/run_glue_with_pabee.py
View file @
40ea9ab2
...
...
@@ -575,7 +575,7 @@ def main():
type
=
str
,
default
=
"O1"
,
help
=
(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3'].
"
"See details at https://nvidia.github.io/apex/amp.html"
),
)
...
...
examples/research_projects/deebert/run_glue_deebert.py
View file @
40ea9ab2
...
...
@@ -532,7 +532,7 @@ def main():
type
=
str
,
default
=
"O1"
,
help
=
(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3'].
"
"See details at https://nvidia.github.io/apex/amp.html"
),
)
...
...
examples/research_projects/distillation/run_squad_w_distillation.py
View file @
40ea9ab2
...
...
@@ -696,7 +696,7 @@ def main():
type
=
str
,
default
=
"O1"
,
help
=
(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3'].
"
"See details at https://nvidia.github.io/apex/amp.html"
),
)
...
...
examples/research_projects/distillation/train.py
View file @
40ea9ab2
...
...
@@ -208,7 +208,7 @@ def main():
type
=
str
,
default
=
"O1"
,
help
=
(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3'].
"
"See details at https://nvidia.github.io/apex/amp.html"
),
)
...
...
examples/research_projects/information-gain-filtration/run_clm_igf.py
View file @
40ea9ab2
...
...
@@ -354,7 +354,7 @@ def main():
default
=
10
,
type
=
int
,
help
=
(
"decay the selectivity of our secondary learner filter from"
"decay the selectivity of our secondary learner filter from
"
"1 standard deviation above average to 1 below average after 10 batches"
),
)
...
...
examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
View file @
40ea9ab2
...
...
@@ -341,7 +341,7 @@ if __name__ == "__main__":
and
not
training_args
.
overwrite_output_dir
):
raise
ValueError
(
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty."
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty.
"
"Use --overwrite_output_dir to overcome."
)
...
...
@@ -399,7 +399,7 @@ if __name__ == "__main__":
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/research_projects/jax-projects/hybrid_clip/run_hybrid_clip.py
View file @
40ea9ab2
...
...
@@ -78,7 +78,7 @@ class ModelArguments:
text_model_name_or_path
:
str
=
field
(
metadata
=
{
"help"
:
(
"The text model checkpoint for weights initialization."
"The text model checkpoint for weights initialization.
"
"Don't set if you want to train a model from scratch."
)
},
...
...
@@ -86,7 +86,7 @@ class ModelArguments:
vision_model_name_or_path
:
str
=
field
(
metadata
=
{
"help"
:
(
"The vision model checkpoint for weights initialization."
"The vision model checkpoint for weights initialization.
"
"Don't set if you want to train a model from scratch."
)
},
...
...
@@ -311,7 +311,7 @@ def main():
and
not
training_args
.
overwrite_output_dir
):
raise
ValueError
(
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty."
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty.
"
"Use --overwrite_output_dir to overcome."
)
...
...
@@ -341,7 +341,7 @@ def main():
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
View file @
40ea9ab2
...
...
@@ -246,7 +246,7 @@ def main():
and
not
training_args
.
overwrite_output_dir
):
raise
ValueError
(
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty."
f
"Output directory (
{
training_args
.
output_dir
}
) already exists and is not empty.
"
"Use --overwrite_output_dir to overcome."
)
...
...
@@ -325,7 +325,7 @@ def main():
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
@@ -368,7 +368,7 @@ def main():
else
:
if
data_args
.
block_size
>
tokenizer
.
model_max_length
:
logger
.
warning
(
f
"The block_size passed (
{
data_args
.
block_size
}
) is larger than the maximum length for the model"
f
"The block_size passed (
{
data_args
.
block_size
}
) is larger than the maximum length for the model
"
f
"(
{
tokenizer
.
model_max_length
}
). Using block_size=
{
tokenizer
.
model_max_length
}
."
)
block_size
=
min
(
data_args
.
block_size
,
tokenizer
.
model_max_length
)
...
...
examples/research_projects/luke/run_luke_ner_no_trainer.py
View file @
40ea9ab2
...
...
@@ -357,7 +357,7 @@ def main():
tokenizer_name_or_path
=
args
.
tokenizer_name
if
args
.
tokenizer_name
else
args
.
model_name_or_path
if
not
tokenizer_name_or_path
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
examples/research_projects/mlm_wwm/run_mlm_wwm.py
View file @
40ea9ab2
...
...
@@ -314,7 +314,7 @@ def main():
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_args
.
model_name_or_path
,
**
tokenizer_kwargs
)
else
:
raise
ValueError
(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script.
"
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment