Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
56301bd9
Commit
56301bd9
authored
Oct 05, 2019
by
jinoobaek-qz
Committed by
Lysandre Debut
Oct 09, 2019
Browse files
Extract method
parent
d6c54697
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
17 deletions
+21
-17
examples/run_lm_finetuning.py
examples/run_lm_finetuning.py
+21
-17
No files found.
examples/run_lm_finetuning.py
View file @
56301bd9
...
@@ -106,6 +106,26 @@ def set_seed(args):
...
@@ -106,6 +106,26 @@ def set_seed(args):
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
def
rotate_checkpoints
(
args
):
if
args
.
save_total_limit
and
args
.
save_total_limit
>
0
:
# Check if we should delete older checkpoint(s)
glob_checkpoints
=
glob
.
glob
(
os
.
path
.
join
(
args
.
output_dir
,
'checkpoint-*'
))
if
len
(
glob_checkpoints
)
>
args
.
save_total_limit
:
checkpoints_sorted
=
[]
for
path
in
glob_checkpoints
:
regex_match
=
re
.
match
(
'.*checkpoint-([0-9]+)'
,
path
)
if
regex_match
and
regex_match
.
groups
():
checkpoints_sorted
.
append
((
int
(
regex_match
.
groups
()[
0
]),
path
))
checkpoints_sorted
=
sorted
(
checkpoints_sorted
)
checkpoints_sorted
=
[
checkpoint
[
1
]
for
checkpoint
in
checkpoints_sorted
]
number_of_checkpoints_to_delete
=
max
(
0
,
len
(
checkpoints_sorted
)
-
args
.
save_total_limit
)
checkpoints_to_be_deleted
=
checkpoints_sorted
[:
number_of_checkpoints_to_delete
]
for
checkpoint
in
checkpoints_to_be_deleted
:
logger
.
info
(
"Deleting older checkpoint [{}] due to args.save_total_limit"
.
format
(
checkpoint
))
shutil
.
rmtree
(
checkpoint
)
def
mask_tokens
(
inputs
,
tokenizer
,
args
):
def
mask_tokens
(
inputs
,
tokenizer
,
args
):
""" Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
""" Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
labels
=
inputs
.
clone
()
labels
=
inputs
.
clone
()
...
@@ -233,23 +253,7 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -233,23 +253,7 @@ def train(args, train_dataset, model, tokenizer):
torch
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
'training_args.bin'
))
torch
.
save
(
args
,
os
.
path
.
join
(
output_dir
,
'training_args.bin'
))
logger
.
info
(
"Saving model checkpoint to %s"
,
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
output_dir
)
if
args
.
save_total_limit
and
args
.
save_total_limit
>
0
:
rotate_checkpoints
(
args
)
# Check if we should delete older checkpoint(s)
glob_checkpoints
=
glob
.
glob
(
os
.
path
.
join
(
args
.
output_dir
,
'checkpoint-*'
))
if
len
(
glob_checkpoints
)
>
args
.
save_total_limit
:
checkpoints_sorted
=
[]
for
path
in
glob_checkpoints
:
regex_match
=
re
.
match
(
'.*checkpoint-([0-9]+)'
,
path
)
if
regex_match
and
regex_match
.
groups
():
checkpoints_sorted
.
append
((
int
(
regex_match
.
groups
()[
0
]),
path
))
checkpoints_sorted
=
sorted
(
checkpoints_sorted
)
checkpoints_sorted
=
[
checkpoint
[
1
]
for
checkpoint
in
checkpoints_sorted
]
number_of_checkpoints_to_delete
=
max
(
0
,
len
(
checkpoints_sorted
)
-
args
.
save_total_limit
)
checkpoints_to_be_deleted
=
checkpoints_sorted
[:
number_of_checkpoints_to_delete
]
for
checkpoint
in
checkpoints_to_be_deleted
:
logger
.
info
(
"Deleting older checkpoint [{}] due to args.save_total_limit"
.
format
(
checkpoint
))
shutil
.
rmtree
(
checkpoint
)
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
epoch_iterator
.
close
()
epoch_iterator
.
close
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment