Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
39fa4009
Unverified
Commit
39fa4009
authored
Jan 29, 2024
by
Klaus Hipp
Committed by
GitHub
Jan 29, 2024
Browse files
Fix input data file extension in examples (#28741)
parent
5649c0cb
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
43 additions
and
20 deletions
+43
-20
examples/flax/language-modeling/run_bart_dlm_flax.py
examples/flax/language-modeling/run_bart_dlm_flax.py
+2
-1
examples/flax/language-modeling/run_clm_flax.py
examples/flax/language-modeling/run_clm_flax.py
+2
-1
examples/flax/language-modeling/run_mlm_flax.py
examples/flax/language-modeling/run_mlm_flax.py
+2
-1
examples/flax/language-modeling/run_t5_mlm_flax.py
examples/flax/language-modeling/run_t5_mlm_flax.py
+2
-1
examples/pytorch/language-modeling/run_clm_no_trainer.py
examples/pytorch/language-modeling/run_clm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_plm.py
examples/pytorch/language-modeling/run_plm.py
+2
-1
examples/pytorch/multiple-choice/run_swag.py
examples/pytorch/multiple-choice/run_swag.py
+2
-1
examples/pytorch/multiple-choice/run_swag_no_trainer.py
examples/pytorch/multiple-choice/run_swag_no_trainer.py
+2
-1
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
...torch/question-answering/run_qa_beam_search_no_trainer.py
+3
-1
examples/pytorch/question-answering/run_qa_no_trainer.py
examples/pytorch/question-answering/run_qa_no_trainer.py
+3
-1
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+2
-1
examples/pytorch/token-classification/run_ner.py
examples/pytorch/token-classification/run_ner.py
+3
-1
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+2
-1
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+2
-1
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
...search_projects/jax-projects/model_parallel/run_clm_mp.py
+2
-1
examples/research_projects/luke/run_luke_ner_no_trainer.py
examples/research_projects/luke/run_luke_ner_no_trainer.py
+2
-1
examples/research_projects/mlm_wwm/run_mlm_wwm.py
examples/research_projects/mlm_wwm/run_mlm_wwm.py
+2
-1
examples/research_projects/performer/run_mlm_performer.py
examples/research_projects/performer/run_mlm_performer.py
+2
-1
examples/tensorflow/language-modeling/run_mlm.py
examples/tensorflow/language-modeling/run_mlm.py
+2
-1
No files found.
examples/flax/language-modeling/run_bart_dlm_flax.py
View file @
39fa4009
...
...
@@ -558,9 +558,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
datasets
=
load_dataset
(
...
...
examples/flax/language-modeling/run_clm_flax.py
View file @
39fa4009
...
...
@@ -449,9 +449,10 @@ def main():
dataset_args
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
dataset_args
[
"keep_linebreaks"
]
=
data_args
.
keep_linebreaks
...
...
examples/flax/language-modeling/run_mlm_flax.py
View file @
39fa4009
...
...
@@ -485,9 +485,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
datasets
=
load_dataset
(
...
...
examples/flax/language-modeling/run_t5_mlm_flax.py
View file @
39fa4009
...
...
@@ -599,9 +599,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
datasets
=
load_dataset
(
...
...
examples/pytorch/language-modeling/run_clm_no_trainer.py
View file @
39fa4009
...
...
@@ -345,9 +345,10 @@ def main():
dataset_args
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
dataset_args
[
"keep_linebreaks"
]
=
not
args
.
no_keep_linebreaks
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
39fa4009
...
...
@@ -351,9 +351,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/pytorch/language-modeling/run_plm.py
View file @
39fa4009
...
...
@@ -328,9 +328,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
...
...
examples/pytorch/multiple-choice/run_swag.py
View file @
39fa4009
...
...
@@ -311,9 +311,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
...
...
examples/pytorch/multiple-choice/run_swag_no_trainer.py
View file @
39fa4009
...
...
@@ -357,9 +357,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
if
args
.
debug
:
...
...
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
View file @
39fa4009
...
...
@@ -362,11 +362,13 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
validation_file
.
split
(
"."
)[
-
1
]
if
args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
args
.
test_file
extension
=
args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/question-answering/run_qa_no_trainer.py
View file @
39fa4009
...
...
@@ -410,11 +410,13 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
validation_file
.
split
(
"."
)[
-
1
]
if
args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
args
.
test_file
extension
=
args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
39fa4009
...
...
@@ -404,9 +404,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/token-classification/run_ner.py
View file @
39fa4009
...
...
@@ -311,11 +311,13 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
validation_file
.
split
(
"."
)[
-
1
]
if
data_args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
data_args
.
test_file
extension
=
data_args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
39fa4009
...
...
@@ -339,9 +339,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
if
args
.
debug
:
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
39fa4009
...
...
@@ -384,9 +384,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
View file @
39fa4009
...
...
@@ -297,9 +297,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
dataset
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
...
...
examples/research_projects/luke/run_luke_ner_no_trainer.py
View file @
39fa4009
...
...
@@ -285,9 +285,10 @@ def main():
data_files
=
{}
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
if
args
.
debug
:
...
...
examples/research_projects/mlm_wwm/run_mlm_wwm.py
View file @
39fa4009
...
...
@@ -271,9 +271,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/research_projects/performer/run_mlm_performer.py
View file @
39fa4009
...
...
@@ -517,9 +517,10 @@ if __name__ == "__main__":
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/tensorflow/language-modeling/run_mlm.py
View file @
39fa4009
...
...
@@ -341,9 +341,10 @@ def main():
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
extension
=
"text"
raw_datasets
=
load_dataset
(
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment