"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "80a169451479f97d737e2be433a7cbd30c39c6bb"
Unverified Commit 28ba345e authored by Ethan Perez's avatar Ethan Perez Committed by GitHub
Browse files

Fixing unused weight_decay argument

Currently the L2 regularization is hard-coded to "0.01", even though there is a --weight_decay flag implemented (that is unused). I'm making this flag control the weight decay used for fine-tuning in this script.
parent 44dd941e
...@@ -205,7 +205,7 @@ def main(): ...@@ -205,7 +205,7 @@ def main():
param_optimizer = list(model.named_parameters()) param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [ optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
] ]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment