Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
f24232cd
Commit
f24232cd
authored
Jan 08, 2020
by
Lysandre Debut
Browse files
Fix error with global step in run_squad.py
parent
1b59b57b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
9 deletions
+13
-9
examples/run_squad.py
examples/run_squad.py
+13
-9
No files found.
examples/run_squad.py
View file @
f24232cd
...
...
@@ -170,15 +170,19 @@ def train(args, train_dataset, model, tokenizer):
steps_trained_in_current_epoch
=
0
# Check if continuing training from a checkpoint
if
os
.
path
.
exists
(
args
.
model_name_or_path
):
# set global_step to gobal_step of last saved checkpoint from model path
global_step
=
int
(
args
.
model_name_or_path
.
split
(
"-"
)[
-
1
].
split
(
"/"
)[
0
])
epochs_trained
=
global_step
//
(
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
)
steps_trained_in_current_epoch
=
global_step
%
(
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
)
logger
.
info
(
" Continuing training from checkpoint, will skip to saved global_step"
)
logger
.
info
(
" Continuing training from epoch %d"
,
epochs_trained
)
logger
.
info
(
" Continuing training from global step %d"
,
global_step
)
logger
.
info
(
" Will skip the first %d steps in the first epoch"
,
steps_trained_in_current_epoch
)
try
:
# set global_step to gobal_step of last saved checkpoint from model path
checkpoint_suffix
=
args
.
model_name_or_path
.
split
(
"-"
)[
-
1
].
split
(
"/"
)[
0
]
global_step
=
int
(
checkpoint_suffix
)
epochs_trained
=
global_step
//
(
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
)
steps_trained_in_current_epoch
=
global_step
%
(
len
(
train_dataloader
)
//
args
.
gradient_accumulation_steps
)
logger
.
info
(
" Continuing training from checkpoint, will skip to saved global_step"
)
logger
.
info
(
" Continuing training from epoch %d"
,
epochs_trained
)
logger
.
info
(
" Continuing training from global step %d"
,
global_step
)
logger
.
info
(
" Will skip the first %d steps in the first epoch"
,
steps_trained_in_current_epoch
)
except
ValueError
:
logger
.
info
(
" Starting fine-tuning."
)
tr_loss
,
logging_loss
=
0.0
,
0.0
model
.
zero_grad
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment