Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b191d7db
Unverified
Commit
b191d7db
authored
May 22, 2023
by
Zachary Mueller
Committed by
GitHub
May 22, 2023
Browse files
Update all no_trainer with skip_first_batches (#23664)
parent
26a06814
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
115 additions
and
98 deletions
+115
-98
examples/pytorch/image-classification/run_image_classification_no_trainer.py
...age-classification/run_image_classification_no_trainer.py
+11
-7
examples/pytorch/image-pretraining/run_mim_no_trainer.py
examples/pytorch/image-pretraining/run_mim_no_trainer.py
+9
-11
examples/pytorch/language-modeling/run_clm_no_trainer.py
examples/pytorch/language-modeling/run_clm_no_trainer.py
+9
-11
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+9
-11
examples/pytorch/multiple-choice/run_swag_no_trainer.py
examples/pytorch/multiple-choice/run_swag_no_trainer.py
+11
-7
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
...torch/question-answering/run_qa_beam_search_no_trainer.py
+11
-7
examples/pytorch/question-answering/run_qa_no_trainer.py
examples/pytorch/question-answering/run_qa_no_trainer.py
+11
-7
examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
...ntic-segmentation/run_semantic_segmentation_no_trainer.py
+12
-8
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+11
-7
examples/pytorch/text-classification/run_glue_no_trainer.py
examples/pytorch/text-classification/run_glue_no_trainer.py
+6
-6
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+6
-6
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+9
-10
No files found.
examples/pytorch/image-classification/run_image_classification_no_trainer.py
View file @
b191d7db
...
...
@@ -451,22 +451,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/image-pretraining/run_mim_no_trainer.py
View file @
b191d7db
...
...
@@ -660,29 +660,27 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
starting_epoch
*
num_update_steps_per_epoch
)
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
if
step
%
args
.
gradient_accumulation_steps
==
0
:
progress_bar
.
update
(
1
)
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/language-modeling/run_clm_no_trainer.py
View file @
b191d7db
...
...
@@ -566,29 +566,27 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
starting_epoch
*
num_update_steps_per_epoch
)
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
if
step
%
args
.
gradient_accumulation_steps
==
0
:
progress_bar
.
update
(
1
)
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
b191d7db
...
...
@@ -610,29 +610,27 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
starting_epoch
*
num_update_steps_per_epoch
)
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
if
step
%
args
.
gradient_accumulation_steps
==
0
:
progress_bar
.
update
(
1
)
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/multiple-choice/run_swag_no_trainer.py
View file @
b191d7db
...
...
@@ -557,22 +557,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
View file @
b191d7db
...
...
@@ -809,22 +809,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/question-answering/run_qa_no_trainer.py
View file @
b191d7db
...
...
@@ -825,22 +825,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
View file @
b191d7db
...
...
@@ -554,22 +554,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
model
.
train
()
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
b191d7db
...
...
@@ -626,22 +626,26 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
with
accelerator
.
accumulate
(
model
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
...
...
examples/pytorch/text-classification/run_glue_no_trainer.py
View file @
b191d7db
...
...
@@ -510,12 +510,12 @@ def main():
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
)
:
# We
need to skip steps until we reach t
he resum
ed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We
skip the first `n` batches in the dataloader w
he
n
resum
ing from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
# We keep track of the loss at each epoch
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
b191d7db
...
...
@@ -668,12 +668,12 @@ def main():
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
)
:
# We
need to skip steps until we reach t
he resum
ed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We
skip the first `n` batches in the dataloader w
he
n
resum
ing from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
# We keep track of the loss at each epoch
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
b191d7db
...
...
@@ -607,28 +607,27 @@ def main():
if
"epoch"
in
training_difference
:
starting_epoch
=
int
(
training_difference
.
replace
(
"epoch_"
,
""
))
+
1
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
starting_epoch
*
num_update_steps_per_epoch
)
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
progress_bar
.
update
(
completed_steps
)
for
epoch
in
range
(
starting_epoch
,
args
.
num_train_epochs
):
model
.
train
()
if
args
.
with_tracking
:
total_loss
=
0
for
step
,
batch
in
enumerate
(
train_dataloader
):
# We need to skip steps until we reach the resumed step
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
:
if
resume_step
is
not
None
and
step
<
resume_step
:
if
step
%
args
.
gradient_accumulation_steps
==
0
:
progress_bar
.
update
(
1
)
completed_steps
+=
1
continue
if
args
.
resume_from_checkpoint
and
epoch
==
starting_epoch
and
resume_step
is
not
None
:
# We skip the first `n` batches in the dataloader when resuming from a checkpoint
active_dataloader
=
accelerator
.
skip_first_batches
(
train_dataloader
,
resume_step
)
else
:
active_dataloader
=
train_dataloader
for
step
,
batch
in
enumerate
(
active_dataloader
):
outputs
=
model
(
**
batch
)
loss
=
outputs
.
loss
# We keep track of the loss at each epoch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment