Commit 1d15a7f2 authored by Andreas Daiminger's avatar Andreas Daiminger
Browse files

swap order of optimizer.step() and scheduler.step()

parent 0ecfd17f
...@@ -157,8 +157,8 @@ def train(args, train_dataset, model, tokenizer): ...@@ -157,8 +157,8 @@ def train(args, train_dataset, model, tokenizer):
tr_loss += loss.item() tr_loss += loss.item()
if (step + 1) % args.gradient_accumulation_steps == 0: if (step + 1) % args.gradient_accumulation_steps == 0:
scheduler.step() # Update learning rate schedule
optimizer.step() optimizer.step()
scheduler.step() # Update learning rate schedule
model.zero_grad() model.zero_grad()
global_step += 1 global_step += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment