"torchvision/csrc/io/video_reader/VideoReader.cpp" did not exist on "32e16805a17401f5ef5ec825c808d645f5c26509"
Commit 9a951216 authored by Sergey Edunov's avatar Sergey Edunov
Browse files

Adjust weight decay by the current learning rate to make it work correctly during annealing

parent e4c935aa
......@@ -96,7 +96,7 @@ class Adam(Optimizer):
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
if group['weight_decay'] != 0:
p.data.add_(-group['weight_decay'], p.data)
p.data.add_(-group['weight_decay'] * group['lr'], p.data)
p.data.addcdiv_(-step_size, exp_avg, denom)
......
......@@ -44,7 +44,7 @@ class NAG(Optimizer):
buf = param_state['momentum_buffer']
if weight_decay != 0:
p.data.mul_(1 - weight_decay)
p.data.mul_(1 - lr * weight_decay)
p.data.add_(momentum * momentum * lr_correct, buf)
p.data.add_(-(1 + momentum) * lr, d_p)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment