Commit 00c7fd2b authored by burcturkoglu's avatar burcturkoglu
Browse files

Division to num_train_optimizer of global_step in lr_this_step is removed.

parent fa37b4da
...@@ -315,8 +315,7 @@ def main(): ...@@ -315,8 +315,7 @@ def main():
if args.fp16: if args.fp16:
# modify learning rate with special warm up BERT uses # modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically # if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
args.warmup_proportion)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
optimizer.step() optimizer.step()
......
...@@ -604,8 +604,7 @@ def main(): ...@@ -604,8 +604,7 @@ def main():
if args.fp16: if args.fp16:
# modify learning rate with special warm up BERT uses # modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically # if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
args.warmup_proportion)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
optimizer.step() optimizer.step()
......
...@@ -855,8 +855,7 @@ def main(): ...@@ -855,8 +855,7 @@ def main():
if args.fp16: if args.fp16:
# modify learning rate with special warm up BERT uses # modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically # if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
args.warmup_proportion)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
optimizer.step() optimizer.step()
......
...@@ -1016,8 +1016,7 @@ def main(): ...@@ -1016,8 +1016,7 @@ def main():
if args.fp16: if args.fp16:
# modify learning rate with special warm up BERT uses # modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used and handles this automatically # if args.fp16 is False, BertAdam is used and handles this automatically
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
args.warmup_proportion)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
optimizer.step() optimizer.step()
......
...@@ -467,8 +467,7 @@ def main(): ...@@ -467,8 +467,7 @@ def main():
if args.fp16: if args.fp16:
# modify learning rate with special warm up BERT uses # modify learning rate with special warm up BERT uses
# if args.fp16 is False, BertAdam is used that handles this automatically # if args.fp16 is False, BertAdam is used that handles this automatically
lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
args.warmup_proportion)
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step param_group['lr'] = lr_this_step
optimizer.step() optimizer.step()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment