Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
39fa4009
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "51e81e589521e816bbdf6cbc09ebccc677a18778"
Unverified
Commit
39fa4009
authored
Jan 29, 2024
by
Klaus Hipp
Committed by
GitHub
Jan 29, 2024
Browse files
Fix input data file extension in examples (#28741)
parent
5649c0cb
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
43 additions
and
20 deletions
+43
-20
examples/flax/language-modeling/run_bart_dlm_flax.py
examples/flax/language-modeling/run_bart_dlm_flax.py
+2
-1
examples/flax/language-modeling/run_clm_flax.py
examples/flax/language-modeling/run_clm_flax.py
+2
-1
examples/flax/language-modeling/run_mlm_flax.py
examples/flax/language-modeling/run_mlm_flax.py
+2
-1
examples/flax/language-modeling/run_t5_mlm_flax.py
examples/flax/language-modeling/run_t5_mlm_flax.py
+2
-1
examples/pytorch/language-modeling/run_clm_no_trainer.py
examples/pytorch/language-modeling/run_clm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_plm.py
examples/pytorch/language-modeling/run_plm.py
+2
-1
examples/pytorch/multiple-choice/run_swag.py
examples/pytorch/multiple-choice/run_swag.py
+2
-1
examples/pytorch/multiple-choice/run_swag_no_trainer.py
examples/pytorch/multiple-choice/run_swag_no_trainer.py
+2
-1
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
...torch/question-answering/run_qa_beam_search_no_trainer.py
+3
-1
examples/pytorch/question-answering/run_qa_no_trainer.py
examples/pytorch/question-answering/run_qa_no_trainer.py
+3
-1
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+2
-1
examples/pytorch/token-classification/run_ner.py
examples/pytorch/token-classification/run_ner.py
+3
-1
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+2
-1
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+2
-1
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
...search_projects/jax-projects/model_parallel/run_clm_mp.py
+2
-1
examples/research_projects/luke/run_luke_ner_no_trainer.py
examples/research_projects/luke/run_luke_ner_no_trainer.py
+2
-1
examples/research_projects/mlm_wwm/run_mlm_wwm.py
examples/research_projects/mlm_wwm/run_mlm_wwm.py
+2
-1
examples/research_projects/performer/run_mlm_performer.py
examples/research_projects/performer/run_mlm_performer.py
+2
-1
examples/tensorflow/language-modeling/run_mlm.py
examples/tensorflow/language-modeling/run_mlm.py
+2
-1
No files found.
examples/flax/language-modeling/run_bart_dlm_flax.py
View file @
39fa4009
...
@@ -558,9 +558,10 @@ def main():
...
@@ -558,9 +558,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
datasets
=
load_dataset
(
datasets
=
load_dataset
(
...
...
examples/flax/language-modeling/run_clm_flax.py
View file @
39fa4009
...
@@ -449,9 +449,10 @@ def main():
...
@@ -449,9 +449,10 @@ def main():
dataset_args
=
{}
dataset_args
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
dataset_args
[
"keep_linebreaks"
]
=
data_args
.
keep_linebreaks
dataset_args
[
"keep_linebreaks"
]
=
data_args
.
keep_linebreaks
...
...
examples/flax/language-modeling/run_mlm_flax.py
View file @
39fa4009
...
@@ -485,9 +485,10 @@ def main():
...
@@ -485,9 +485,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
datasets
=
load_dataset
(
datasets
=
load_dataset
(
...
...
examples/flax/language-modeling/run_t5_mlm_flax.py
View file @
39fa4009
...
@@ -599,9 +599,10 @@ def main():
...
@@ -599,9 +599,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
datasets
=
load_dataset
(
datasets
=
load_dataset
(
...
...
examples/pytorch/language-modeling/run_clm_no_trainer.py
View file @
39fa4009
...
@@ -345,9 +345,10 @@ def main():
...
@@ -345,9 +345,10 @@ def main():
dataset_args
=
{}
dataset_args
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
dataset_args
[
"keep_linebreaks"
]
=
not
args
.
no_keep_linebreaks
dataset_args
[
"keep_linebreaks"
]
=
not
args
.
no_keep_linebreaks
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
39fa4009
...
@@ -351,9 +351,10 @@ def main():
...
@@ -351,9 +351,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/pytorch/language-modeling/run_plm.py
View file @
39fa4009
...
@@ -328,9 +328,10 @@ def main():
...
@@ -328,9 +328,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
...
...
examples/pytorch/multiple-choice/run_swag.py
View file @
39fa4009
...
@@ -311,9 +311,10 @@ def main():
...
@@ -311,9 +311,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
raw_datasets
=
load_dataset
(
extension
,
extension
,
data_files
=
data_files
,
data_files
=
data_files
,
...
...
examples/pytorch/multiple-choice/run_swag_no_trainer.py
View file @
39fa4009
...
@@ -357,9 +357,10 @@ def main():
...
@@ -357,9 +357,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
# Trim a number of training examples
if
args
.
debug
:
if
args
.
debug
:
...
...
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
View file @
39fa4009
...
@@ -362,11 +362,13 @@ def main():
...
@@ -362,11 +362,13 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
validation_file
.
split
(
"."
)[
-
1
]
if
args
.
test_file
is
not
None
:
if
args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
args
.
test_file
data_files
[
"test"
]
=
args
.
test_file
extension
=
args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/question-answering/run_qa_no_trainer.py
View file @
39fa4009
...
@@ -410,11 +410,13 @@ def main():
...
@@ -410,11 +410,13 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
validation_file
.
split
(
"."
)[
-
1
]
if
args
.
test_file
is
not
None
:
if
args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
args
.
test_file
data_files
[
"test"
]
=
args
.
test_file
extension
=
args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
field
=
"data"
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
39fa4009
...
@@ -404,9 +404,10 @@ def main():
...
@@ -404,9 +404,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/token-classification/run_ner.py
View file @
39fa4009
...
@@ -311,11 +311,13 @@ def main():
...
@@ -311,11 +311,13 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
validation_file
.
split
(
"."
)[
-
1
]
if
data_args
.
test_file
is
not
None
:
if
data_args
.
test_file
is
not
None
:
data_files
[
"test"
]
=
data_args
.
test_file
data_files
[
"test"
]
=
data_args
.
test_file
extension
=
data_args
.
t
rain
_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
t
est
_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
39fa4009
...
@@ -339,9 +339,10 @@ def main():
...
@@ -339,9 +339,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
# Trim a number of training examples
if
args
.
debug
:
if
args
.
debug
:
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
39fa4009
...
@@ -384,9 +384,10 @@ def main():
...
@@ -384,9 +384,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.
# https://huggingface.co/docs/datasets/loading_datasets.
...
...
examples/research_projects/jax-projects/model_parallel/run_clm_mp.py
View file @
39fa4009
...
@@ -297,9 +297,10 @@ def main():
...
@@ -297,9 +297,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
dataset
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
dataset
=
load_dataset
(
extension
,
data_files
=
data_files
,
cache_dir
=
model_args
.
cache_dir
)
...
...
examples/research_projects/luke/run_luke_ner_no_trainer.py
View file @
39fa4009
...
@@ -285,9 +285,10 @@ def main():
...
@@ -285,9 +285,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
args
.
train_file
is
not
None
:
if
args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
args
.
train_file
data_files
[
"train"
]
=
args
.
train_file
extension
=
args
.
train_file
.
split
(
"."
)[
-
1
]
if
args
.
validation_file
is
not
None
:
if
args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
args
.
validation_file
data_files
[
"validation"
]
=
args
.
validation_file
extension
=
args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
raw_datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
# Trim a number of training examples
# Trim a number of training examples
if
args
.
debug
:
if
args
.
debug
:
...
...
examples/research_projects/mlm_wwm/run_mlm_wwm.py
View file @
39fa4009
...
@@ -271,9 +271,10 @@ def main():
...
@@ -271,9 +271,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/research_projects/performer/run_mlm_performer.py
View file @
39fa4009
...
@@ -517,9 +517,10 @@ if __name__ == "__main__":
...
@@ -517,9 +517,10 @@ if __name__ == "__main__":
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
datasets
=
load_dataset
(
extension
,
data_files
=
data_files
)
...
...
examples/tensorflow/language-modeling/run_mlm.py
View file @
39fa4009
...
@@ -341,9 +341,10 @@ def main():
...
@@ -341,9 +341,10 @@ def main():
data_files
=
{}
data_files
=
{}
if
data_args
.
train_file
is
not
None
:
if
data_args
.
train_file
is
not
None
:
data_files
[
"train"
]
=
data_args
.
train_file
data_files
[
"train"
]
=
data_args
.
train_file
extension
=
data_args
.
train_file
.
split
(
"."
)[
-
1
]
if
data_args
.
validation_file
is
not
None
:
if
data_args
.
validation_file
is
not
None
:
data_files
[
"validation"
]
=
data_args
.
validation_file
data_files
[
"validation"
]
=
data_args
.
validation_file
extension
=
data_args
.
trai
n_file
.
split
(
"."
)[
-
1
]
extension
=
data_args
.
validatio
n_file
.
split
(
"."
)[
-
1
]
if
extension
==
"txt"
:
if
extension
==
"txt"
:
extension
=
"text"
extension
=
"text"
raw_datasets
=
load_dataset
(
raw_datasets
=
load_dataset
(
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment