Commit a84a5fa0 authored by Jared Casper's avatar Jared Casper Committed by Deepak Narayanan
Browse files

Make an eval iteration the same number of samples as a training iteration

parent 2cf1d6d0
...@@ -761,6 +761,7 @@ def evaluate(forward_step_func, data_iterator, model, verbose=False): ...@@ -761,6 +761,7 @@ def evaluate(forward_step_func, data_iterator, model, verbose=False):
print_rank_0('Evaluating iter {}/{}'.format(iteration, print_rank_0('Evaluating iter {}/{}'.format(iteration,
args.eval_iters)) args.eval_iters))
for _ in range(args.num_microbatches_in_minibatch):
if not mpu.is_pipeline_first_stage(): if not mpu.is_pipeline_first_stage():
input_tensor, _ = communicate( input_tensor, _ = communicate(
tensor_send_next=None, tensor_send_next=None,
...@@ -793,7 +794,7 @@ def evaluate(forward_step_func, data_iterator, model, verbose=False): ...@@ -793,7 +794,7 @@ def evaluate(forward_step_func, data_iterator, model, verbose=False):
model.train() model.train()
for key in total_loss_dict: for key in total_loss_dict:
total_loss_dict[key] /= args.eval_iters total_loss_dict[key] /= args.eval_iters * args.num_microbatches_in_minibatch
return total_loss_dict return total_loss_dict
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment