"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "ecb923da9cea390742a1262327a139852c5493e9"
Commit bab5d130 authored by thomwolf's avatar thomwolf
Browse files

update optimizer documentation

parent 7394eb47
...@@ -42,17 +42,18 @@ SCHEDULES = { ...@@ -42,17 +42,18 @@ SCHEDULES = {
class BERTAdam(Optimizer): class BERTAdam(Optimizer):
"""Implements Open AI version of Adam algorithm with weight decay fix. """Implements BERT version of Adam algorithm with weight decay fix (and no ).
Params: Params:
lr, lr: learning rate
warmup=-1, warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1
t_total=-1, t_total: total number of training steps for the learning
schedule='warmup_linear', rate schedule, -1 means constant learning rate. Default: -1
b1=0.9, schedule: schedule to use for the warmup (see above). Default: 'warmup_linear'
b2=0.999, b1: Adams b1. Default: 0.9
e=1e-6, b2: Adams b2. Default: 0.999
weight_decay_rate=0.01, e: Adams epsilon. Default: 1e-6
max_grad_norm=1.0 weight_decay_rate: Weight decay. Default: 0.01
max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
""" """
def __init__(self, params, lr, warmup=-1, t_total=-1, schedule='warmup_linear', def __init__(self, params, lr, warmup=-1, t_total=-1, schedule='warmup_linear',
b1=0.9, b2=0.999, e=1e-6, weight_decay_rate=0.01, b1=0.9, b2=0.999, e=1e-6, weight_decay_rate=0.01,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment