Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4fb64e28
Unverified
Commit
4fb64e28
authored
Sep 13, 2023
by
Phuc Van Phan
Committed by
GitHub
Sep 12, 2023
Browse files
chore: correct update_step and correct gradient_accumulation_steps (#26068)
parent
8f609ab9
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
13 additions
and
12 deletions
+13
-12
examples/pytorch/image-classification/run_image_classification_no_trainer.py
...age-classification/run_image_classification_no_trainer.py
+1
-1
examples/pytorch/image-pretraining/run_mim_no_trainer.py
examples/pytorch/image-pretraining/run_mim_no_trainer.py
+1
-1
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+1
-1
examples/pytorch/multiple-choice/run_swag_no_trainer.py
examples/pytorch/multiple-choice/run_swag_no_trainer.py
+1
-1
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
...torch/question-answering/run_qa_beam_search_no_trainer.py
+1
-1
examples/pytorch/question-answering/run_qa_no_trainer.py
examples/pytorch/question-answering/run_qa_no_trainer.py
+3
-2
examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
...ntic-segmentation/run_semantic_segmentation_no_trainer.py
+1
-1
examples/pytorch/summarization/run_summarization_no_trainer.py
...les/pytorch/summarization/run_summarization_no_trainer.py
+1
-1
examples/pytorch/text-classification/run_glue_no_trainer.py
examples/pytorch/text-classification/run_glue_no_trainer.py
+1
-1
examples/pytorch/token-classification/run_ner_no_trainer.py
examples/pytorch/token-classification/run_ner_no_trainer.py
+1
-1
examples/pytorch/translation/run_translation_no_trainer.py
examples/pytorch/translation/run_translation_no_trainer.py
+1
-1
No files found.
examples/pytorch/image-classification/run_image_classification_no_trainer.py
View file @
4fb64e28
...
@@ -477,8 +477,8 @@ def main():
...
@@ -477,8 +477,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_step
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/image-pretraining/run_mim_no_trainer.py
View file @
4fb64e28
...
@@ -701,8 +701,8 @@ def main():
...
@@ -701,8 +701,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
4fb64e28
...
@@ -636,8 +636,8 @@ def main():
...
@@ -636,8 +636,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/multiple-choice/run_swag_no_trainer.py
View file @
4fb64e28
...
@@ -583,8 +583,8 @@ def main():
...
@@ -583,8 +583,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py
View file @
4fb64e28
...
@@ -820,8 +820,8 @@ def main():
...
@@ -820,8 +820,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/question-answering/run_qa_no_trainer.py
View file @
4fb64e28
...
@@ -848,10 +848,11 @@ def main():
...
@@ -848,10 +848,11 @@ def main():
resume_step
=
None
resume_step
=
None
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
completed_steps
=
starting_epoch
*
num_update_steps_per_epoch
else
:
else
:
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
View file @
4fb64e28
...
@@ -581,8 +581,8 @@ def main():
...
@@ -581,8 +581,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/summarization/run_summarization_no_trainer.py
View file @
4fb64e28
...
@@ -652,8 +652,8 @@ def main():
...
@@ -652,8 +652,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/text-classification/run_glue_no_trainer.py
View file @
4fb64e28
...
@@ -530,8 +530,8 @@ def main():
...
@@ -530,8 +530,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_step
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/token-classification/run_ner_no_trainer.py
View file @
4fb64e28
...
@@ -690,8 +690,8 @@ def main():
...
@@ -690,8 +690,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
examples/pytorch/translation/run_translation_no_trainer.py
View file @
4fb64e28
...
@@ -633,8 +633,8 @@ def main():
...
@@ -633,8 +633,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
resume_step
=
int
(
training_difference
.
replace
(
"step_"
,
""
))
*
args
.
gradient_accumulation_steps
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
starting_epoch
=
resume_step
//
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_steps
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
resume_step
-=
starting_epoch
*
len
(
train_dataloader
)
completed_steps
=
resume_step
//
args
.
gradient_accumulation_stepp
# update the progress_bar if load from checkpoint
# update the progress_bar if load from checkpoint
progress_bar
.
update
(
completed_steps
)
progress_bar
.
update
(
completed_steps
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment