Commit af1ee9e6 authored by Victor SANH's avatar Victor SANH Committed by Lysandre Debut
Browse files

Move `torch.nn.utils.clip_grad_norm_`

parent 164c794e
...@@ -204,13 +204,16 @@ def train(args, train_dataset, model, tokenizer, teacher=None): ...@@ -204,13 +204,16 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
if args.fp16: if args.fp16:
with amp.scale_loss(loss, optimizer) as scaled_loss: with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward() scaled_loss.backward()
torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
else: else:
loss.backward() loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
tr_loss += loss.item() tr_loss += loss.item()
if (step + 1) % args.gradient_accumulation_steps == 0: if (step + 1) % args.gradient_accumulation_steps == 0:
if args.fp16:
torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
else:
torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
optimizer.step() optimizer.step()
scheduler.step() # Update learning rate schedule scheduler.step() # Update learning rate schedule
model.zero_grad() model.zero_grad()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment