clean up pr

7ba83730 · lukovnikov · fa0c5a2e · 7ba83730 · 7ba83730
Commit 7ba83730 authored Nov 13, 2018 by lukovnikov
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 5 deletions

convert_tf_checkpoint_to_pytorch.py convert_tf_checkpoint_to_pytorch.py +9 -3

modeling.py modeling.py +6 -2

No files found.
--- a/convert_tf_checkpoint_to_pytorch.py
+++ b/convert_tf_checkpoint_to_pytorch.py
@@ -68,11 +68,17 @@ def convert():
        arrays.append(array)
    for name, array in zip(names, arrays):
-        name = name[5:]  # skip "bert/"
+        if not name.startswith("bert"):
+            print("Skipping {}".format(name))
+            continue
+        else:
+            name = name.replace("bert/", "")  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
-        if name[0] in ['redictions', 'eq_relationship']:
+        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
-            print("Skipping")
+        # which are not required for using pretrained model
+        if name[0] in ['redictions', 'eq_relationship'] or name[-1] == "adam_v" or  name[-1] == "adam_m":
+            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:

--- a/modeling.py
+++ b/modeling.py
@@ -26,6 +26,10 @@ import torch
 import torch.nn as nn
 from torch.nn import CrossEntropyLoss
+ACT2FN = {"gelu": gelu, "relu": torch.nn.ReLU, "swish": swish}
 def gelu(x):
    """Implementation of the gelu activation function.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
@@ -241,8 +245,8 @@ class BERTIntermediate(nn.Module):
    def __init__(self, config):
        super(BERTIntermediate, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
-        act2fn = {"gelu": gelu, "relu": torch.nn.ReLU, "swish": swish}
+        self.intermediate_act_fn = ACT2FN[config.hidden_act] \
-        self.intermediate_act_fn = act2fn[config.hidden_act] if isinstance(config.hidden_act, str) else config.hidden_act
+            if isinstance(config.hidden_act, str) else config.hidden_act
    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)