class NoamOpt(object): def __init__(self, model_size, factor, warmup, optimizer): """ model_size: hidden size factor: coefficient warmup: warm up steps(step ** (-0.5) == step * warmup ** (-1.5) holds when warmup equals step) """ self.optimizer = optimizer self._step = 0 self.warmup = warmup self.factor = factor self.model_size = model_size self._rate = 0 def rate(self, step=None): if step is None: step = self._step return self.factor * \ (self.model_size ** (-0.5) * min(step ** (-0.5), step * self.warmup ** (-1.5)) ) def step(self): self._step += 1 rate = self.rate() for p in self.optimizer.param_groups: p['lr'] = rate self._rate = rate self.optimizer.step() """ Default setting: def get_std_opt(model): return NoamOpt(model.src_embed[0].d_model, 2, 4000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) """