Torch 1.1.0 compatibility + FP16 O1 + TF checkpoints

Co-authored-by: wassname

Torch 1.1.0 compatibility + FP16 O1 + TF checkpoints
Co-authored-by: wassname
c9cb7f8a · Lysandre · Lysandre Debut · b18509c2 · c9cb7f8a · c9cb7f8a
Commit c9cb7f8a authored Nov 11, 2019 by Lysandre Committed by Lysandre Debut Nov 26, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 3 deletions

transformers/modeling_albert.py transformers/modeling_albert.py +2 -2

transformers/modeling_tf_albert.py transformers/modeling_tf_albert.py +8 -1

No files found.
--- a/transformers/modeling_albert.py
+++ b/transformers/modeling_albert.py
@@ -203,8 +203,8 @@ class AlbertAttention(BertSelfAttention):
        

        # Should find a better way to do this
-        w = self.dense.weight.T.view(self.num_attention_heads, self.attention_head_size, self.hidden_size)
-        b = self.dense.bias
+        w = self.dense.weight.t().view(self.num_attention_heads, self.attention_head_size, self.hidden_size).to(context_layer.dtype)
+        b = self.dense.bias.to(context_layer.dtype)

        projected_context_layer = torch.einsum("bfnd,ndh->bfh", context_layer, w) + b
        projected_context_layer_dropout = self.dropout(projected_context_layer)

--- a/transformers/modeling_tf_albert.py
+++ b/transformers/modeling_tf_albert.py
@@ -36,7 +36,14 @@ import logging
 logger = logging.getLogger(__name__)

 TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    # TODO FILL THAT UP
+    'albert-base-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-tf_model.h5",
+    'albert-large-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-tf_model.h5",
+    'albert-xlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-tf_model.h5",
+    'albert-xxlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-tf_model.h5",
+    'albert-base-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-tf_model.h5",
+    'albert-large-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-tf_model.h5",
+    'albert-xlarge-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-tf_model.h5",
+    'albert-xxlarge-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-tf_model.h5",
 }