Unverified Commit 4fb64e28 authored by Phuc Van Phan's avatar Phuc Van Phan Committed by GitHub
Browse files

chore: correct update_step and correct gradient_accumulation_steps (#26068)

parent 8f609ab9
...@@ -477,8 +477,8 @@ def main(): ...@@ -477,8 +477,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_step
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -701,8 +701,8 @@ def main(): ...@@ -701,8 +701,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -636,8 +636,8 @@ def main(): ...@@ -636,8 +636,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -583,8 +583,8 @@ def main(): ...@@ -583,8 +583,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -820,8 +820,8 @@ def main(): ...@@ -820,8 +820,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -848,10 +848,11 @@ def main(): ...@@ -848,10 +848,11 @@ def main():
resume_step = None resume_step = None
completed_steps = starting_epoch * num_update_steps_per_epoch completed_steps = starting_epoch * num_update_steps_per_epoch
else: else:
resume_step = int(training_difference.replace("step_", "")) # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -581,8 +581,8 @@ def main(): ...@@ -581,8 +581,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -652,8 +652,8 @@ def main(): ...@@ -652,8 +652,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -530,8 +530,8 @@ def main(): ...@@ -530,8 +530,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_step
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -690,8 +690,8 @@ def main(): ...@@ -690,8 +690,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
...@@ -633,8 +633,8 @@ def main(): ...@@ -633,8 +633,8 @@ def main():
# need to multiply `gradient_accumulation_steps` to reflect real steps # need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader) starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader) resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp
# update the progress_bar if load from checkpoint # update the progress_bar if load from checkpoint
progress_bar.update(completed_steps) progress_bar.update(completed_steps)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment