"git@developer.sourcefind.cn:gaoqiong/flash-attention.git" did not exist on "22339db185027324f334a7f59e2584da266bfd4c"
Commit 25f73add authored by thomwolf's avatar thomwolf
Browse files

update optimizer run_squad

parent f514cbbf
...@@ -800,10 +800,14 @@ def main(): ...@@ -800,10 +800,14 @@ def main():
if n_gpu > 1: if n_gpu > 1:
model = torch.nn.DataParallel(model) model = torch.nn.DataParallel(model)
optimizer = BERTAdam([{'params': [p for n, p in model.named_parameters() if n != 'bias'], 'l2': 0.01}, no_decay = ['bias', 'gamma', 'beta']
{'params': [p for n, p in model.named_parameters() if n == 'bias'], 'l2': 0.} optimizer_parameters = [
], {'params': [p for n, p in model.named_parameters() if n not in no_decay], 'weight_decay_rate': 0.01},
lr=args.learning_rate, schedule='warmup_linear', {'params': [p for n, p in model.named_parameters() if n in no_decay], 'weight_decay_rate': 0.0}
]
optimizer = BERTAdam(optimizer_parameters,
lr=args.learning_rate,
warmup=args.warmup_proportion, warmup=args.warmup_proportion,
t_total=num_train_steps) t_total=num_train_steps)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment