Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
70c10caa
Commit
70c10caa
authored
Aug 05, 2019
by
thomwolf
Browse files
add option mentioned in #940
parent
077ad693
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
0 deletions
+12
-0
examples/run_glue.py
examples/run_glue.py
+6
-0
examples/run_squad.py
examples/run_squad.py
+6
-0
No files found.
examples/run_glue.py
View file @
70c10caa
...
...
@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def
load_and_cache_examples
(
args
,
task
,
tokenizer
,
evaluate
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
processor
=
processors
[
task
]()
output_mode
=
output_modes
[
task
]
# Load data features from cache or dataset file
...
...
@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
features
],
dtype
=
torch
.
long
)
...
...
examples/run_squad.py
View file @
70c10caa
...
...
@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""):
def
load_and_cache_examples
(
args
,
tokenizer
,
evaluate
=
False
,
output_examples
=
False
):
if
args
.
local_rank
not
in
[
-
1
,
0
]:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Load data features from cache or dataset file
input_file
=
args
.
predict_file
if
evaluate
else
args
.
train_file
cached_features_file
=
os
.
path
.
join
(
os
.
path
.
dirname
(
input_file
),
'cached_{}_{}_{}'
.
format
(
...
...
@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
logger
.
info
(
"Saving features into cached file %s"
,
cached_features_file
)
torch
.
save
(
features
,
cached_features_file
)
if
args
.
local_rank
==
0
:
torch
.
distributed
.
barrier
()
# Make sure only the first process in distributed training process the dataset, and the others will use the cache
# Convert to Tensors and build dataset
all_input_ids
=
torch
.
tensor
([
f
.
input_ids
for
f
in
features
],
dtype
=
torch
.
long
)
all_input_mask
=
torch
.
tensor
([
f
.
input_mask
for
f
in
features
],
dtype
=
torch
.
long
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment