initialy -> initially

63ed224b · Santiago Castro · Lysandre Debut · 391db836 · 63ed224b · 63ed224b
Commit 63ed224b authored Oct 02, 2019 by Santiago Castro Committed by Lysandre Debut Oct 02, 2019
4 changed files
--- a/transformers/modeling_bert.py
+++ b/transformers/modeling_bert.py
@@ -118,7 +118,7 @@ def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
 def gelu(x):
-    """ Original Implementation of the gelu activation function in Google Bert repo when initialy created.
+    """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        Also see https://arxiv.org/abs/1606.08415

--- a/transformers/modeling_tf_bert.py
+++ b/transformers/modeling_tf_bert.py
@@ -62,7 +62,7 @@ def load_bert_pt_weights_in_tf2(tf_model, pytorch_checkpoint_path):
 def gelu(x):
    """ Gaussian Error Linear Unit.
-    Original Implementation of the gelu activation function in Google Bert repo when initialy created.
+    Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        Also see https://arxiv.org/abs/1606.08415

--- a/transformers/modeling_tf_distilbert.py
+++ b/transformers/modeling_tf_distilbert.py
@@ -45,7 +45,7 @@ TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
 ### UTILS AND BUILDING BLOCKS OF THE ARCHITECTURE ###
 def gelu(x):
    """ Gaussian Error Linear Unit.
-    Original Implementation of the gelu activation function in Google Bert repo when initialy created.
+    Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        Also see https://arxiv.org/abs/1606.08415

--- a/transformers/modeling_tf_xlm.py
+++ b/transformers/modeling_tf_xlm.py
@@ -69,7 +69,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
 def gelu(x):
    """ Gaussian Error Linear Unit.
-    Original Implementation of the gelu activation function in Google Bert repo when initialy created.
+    Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        Also see https://arxiv.org/abs/1606.08415