Unverified Commit f275e593 authored by Zachary Mueller's avatar Zachary Mueller Committed by GitHub
Browse files

Fix no_trainer examples to properly calculate the number of samples (#17046)

* Update all examples to properly calculate progress bar
parent 35d48db8
...@@ -359,6 +359,10 @@ def main(): ...@@ -359,6 +359,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -472,6 +472,10 @@ def main(): ...@@ -472,6 +472,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -518,6 +518,10 @@ def main(): ...@@ -518,6 +518,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -472,6 +472,10 @@ def main(): ...@@ -472,6 +472,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -733,6 +733,10 @@ def main(): ...@@ -733,6 +733,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -739,6 +739,10 @@ def main(): ...@@ -739,6 +739,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -475,6 +475,10 @@ def main(): ...@@ -475,6 +475,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Instantiate metric # Instantiate metric
metric = load_metric("mean_iou") metric = load_metric("mean_iou")
......
...@@ -535,6 +535,10 @@ def main(): ...@@ -535,6 +535,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -418,6 +418,10 @@ def main(): ...@@ -418,6 +418,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -532,6 +532,10 @@ def main(): ...@@ -532,6 +532,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
...@@ -513,6 +513,10 @@ def main(): ...@@ -513,6 +513,10 @@ def main():
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) )
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch
# Figure out how many steps we should save the Accelerator states # Figure out how many steps we should save the Accelerator states
if hasattr(args.checkpointing_steps, "isdigit"): if hasattr(args.checkpointing_steps, "isdigit"):
checkpointing_steps = args.checkpointing_steps checkpointing_steps = args.checkpointing_steps
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment