updating readme and notebooks

886cb497 · thomwolf · fd647e8c · 886cb497 · 886cb497 · 886cb497
Commit 886cb497 authored Nov 16, 2018 by thomwolf
6 changed files
--- a/README.md
+++ b/README.md
--- a/notebooks/Comparing TF and PT models_MLM_NSP.ipynb
+++ b/notebooks/Comparing TF and PT models_MLM_NSP.ipynb
--- a/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb
+++ b/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb
--- a/notebooks/Comparing TF and PT models SQuAD predictions.ipynb
+++ b/notebooks/Comparing TF and PT models SQuAD predictions.ipynb
--- a/notebooks/Comparing TF and PT models.ipynb
+++ b/notebooks/Comparing TF and PT models.ipynb
--- a/pytorch_pretrained_bert/optimization.py
+++ b/pytorch_pretrained_bert/optimization.py
@@ -42,7 +42,7 @@ SCHEDULES = {


 class BERTAdam(Optimizer):
-    """Implements BERT version of Adam algorithm with weight decay fix (and no ).
+    """Implements BERT version of Adam algorithm with weight decay fix.
    Params:
        lr: learning rate
        warmup: portion of t_total for the warmup, -1  means no warmup. Default: -1
@@ -136,7 +136,7 @@ class BERTAdam(Optimizer):
                # the correct way of using L2 regularization/weight decay with Adam,
                # since that will interact with the m and v parameters in strange ways.
                #
-                # Instead we want ot decay the weights in a manner that doesn't interact
+                # Instead we want to decay the weights in a manner that doesn't interact
                # with the m/v parameters. This is equivalent to adding the square
                # of the weights to the loss with plain (non-momentum) SGD.
                if group['weight_decay_rate'] > 0.0:
@@ -154,6 +154,7 @@ class BERTAdam(Optimizer):
                state['step'] += 1

                # step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
+                # No bias correction
                # bias_correction1 = 1 - beta1 ** state['step']
                # bias_correction2 = 1 - beta2 ** state['step']