Commit f777e6b3 authored by zihanl's avatar zihanl
Browse files

update finetune_utils.py

parent 5aa3d5e3
...@@ -177,6 +177,7 @@ def _train(model, optimizer, lr_scheduler, forward_step, ...@@ -177,6 +177,7 @@ def _train(model, optimizer, lr_scheduler, forward_step,
report_memory_flag = True report_memory_flag = True
# For each remaining epoch # For each remaining epoch
args.consumed_train_samples = 0
timers('interval-time').start() timers('interval-time').start()
for epoch in range(start_epoch, args.epochs): for epoch in range(start_epoch, args.epochs):
print_rank_0('working on epoch {} ...'.format(epoch + 1)) print_rank_0('working on epoch {} ...'.format(epoch + 1))
...@@ -196,6 +197,10 @@ def _train(model, optimizer, lr_scheduler, forward_step, ...@@ -196,6 +197,10 @@ def _train(model, optimizer, lr_scheduler, forward_step,
# Train for one step. # Train for one step.
out = train_step(forward_step, batch, model, optimizer, lr_scheduler) out = train_step(forward_step, batch, model, optimizer, lr_scheduler)
args.consumed_train_samples += mpu.get_data_parallel_world_size() * \
args.micro_batch_size * \
get_num_microbatches()
losses_dict, skipped_iter, grad_norm, num_zeros_in_grad = out losses_dict, skipped_iter, grad_norm, num_zeros_in_grad = out
iteration += 1 iteration += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment