Fix ALBERT exports with pretraining + sp classifier; Fix naming for ALBERT TF models

e85855f2 · Lysandre · b3d834ae · e85855f2 · e85855f2
Commit e85855f2 authored Dec 02, 2019 by Lysandre
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 5 deletions

transformers/modeling_albert.py transformers/modeling_albert.py +16 -1

transformers/modeling_tf_albert.py transformers/modeling_tf_albert.py +4 -4

No files found.
--- a/transformers/modeling_albert.py
+++ b/transformers/modeling_albert.py
@@ -100,7 +100,22 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
        name = name.replace("embeddings/attention", "embeddings")    
        name = name.replace("inner_group_", "albert_layers/") 
        name = name.replace("group_", "albert_layer_groups/")   
+
+        # Classifier
+        if len(name.split("/")) == 1 and ("output_bias" in name or "output_weights" in name):
+            name = "classifier/" + name
+
+        # No ALBERT model currently handles the next sentence prediction task 
+        if "seq_relationship" in name:
+            continue
+
        name = name.split('/')
+
+        # Ignore the gradients applied by the LAMB/ADAM optimizers.
+        if "adam_m" in name or "adam_v" in name or "global_step" in name:
+            logger.info("Skipping {}".format("/".join(name)))
+            continue
+
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):

--- a/transformers/modeling_tf_albert.py
+++ b/transformers/modeling_tf_albert.py
@@ -31,10 +31,10 @@ import logging
 logger = logging.getLogger(__name__)

 TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
-    'albert-base-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-tf_model.h5",
-    'albert-large-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-tf_model.h5",
-    'albert-xlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-tf_model.h5",
-    'albert-xxlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-tf_model.h5",
+    'albert-base-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v1-tf_model.h5",
+    'albert-large-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v1-tf_model.h5",
+    'albert-xlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v1-tf_model.h5",
+    'albert-xxlarge-v1': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v1-tf_model.h5",
    'albert-base-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-v2-tf_model.h5",
    'albert-large-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-v2-tf_model.h5",
    'albert-xlarge-v2': "https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-v2-tf_model.h5",