Unverified Commit 2d458b2c authored by abhishek thakur's avatar abhishek thakur Committed by GitHub
Browse files

ConvBERT fix torch <> tf weights conversion (#10314)



* convbert conversion test

* fin

* fin

* fin

* clean up tf<->pt conversion

* remove from_pt
Co-authored-by: default avatarpatrickvonplaten <patrick.v.platen@gmail.com>
parent 3437d121
...@@ -56,7 +56,11 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="") ...@@ -56,7 +56,11 @@ def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_remove="")
tf_name = tf_name[1:] # Remove level zero tf_name = tf_name[1:] # Remove level zero
# When should we transpose the weights # When should we transpose the weights
transpose = bool(tf_name[-1] == "kernel" or "emb_projs" in tf_name or "out_projs" in tf_name) transpose = bool(
tf_name[-1] in ["kernel", "pointwise_kernel", "depthwise_kernel"]
or "emb_projs" in tf_name
or "out_projs" in tf_name
)
# Convert standard TF2.0 names in PyTorch names # Convert standard TF2.0 names in PyTorch names
if tf_name[-1] == "kernel" or tf_name[-1] == "embeddings" or tf_name[-1] == "gamma": if tf_name[-1] == "kernel" or tf_name[-1] == "embeddings" or tf_name[-1] == "gamma":
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import argparse import argparse
from transformers import ConvBertConfig, ConvBertModel, load_tf_weights_in_convbert from transformers import ConvBertConfig, ConvBertModel, TFConvBertModel, load_tf_weights_in_convbert
from transformers.utils import logging from transformers.utils import logging
...@@ -30,6 +30,9 @@ def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path, convbert_config_f ...@@ -30,6 +30,9 @@ def convert_orig_tf1_checkpoint_to_pytorch(tf_checkpoint_path, convbert_config_f
model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path) model = load_tf_weights_in_convbert(model, conf, tf_checkpoint_path)
model.save_pretrained(pytorch_dump_path) model.save_pretrained(pytorch_dump_path)
tf_model = TFConvBertModel.from_pretrained(pytorch_dump_path, from_pt=True)
tf_model.save_pretrained(pytorch_dump_path)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
......
...@@ -343,7 +343,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer): ...@@ -343,7 +343,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer):
def build(self, input_shape): def build(self, input_shape):
self.kernel = self.add_weight( self.kernel = self.add_weight(
"kernel", "kernel",
shape=[self.num_groups, self.group_in_dim, self.group_out_dim], shape=[self.group_out_dim, self.group_in_dim, self.num_groups],
initializer=self.kernel_initializer, initializer=self.kernel_initializer,
trainable=True, trainable=True,
) )
...@@ -355,7 +355,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer): ...@@ -355,7 +355,7 @@ class GroupedLinearLayer(tf.keras.layers.Layer):
def call(self, hidden_states): def call(self, hidden_states):
batch_size = shape_list(hidden_states)[0] batch_size = shape_list(hidden_states)[0]
x = tf.transpose(tf.reshape(hidden_states, [-1, self.num_groups, self.group_in_dim]), [1, 0, 2]) x = tf.transpose(tf.reshape(hidden_states, [-1, self.num_groups, self.group_in_dim]), [1, 0, 2])
x = tf.matmul(x, self.kernel) x = tf.matmul(x, tf.transpose(self.kernel, [2, 1, 0]))
x = tf.transpose(x, [1, 0, 2]) x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [batch_size, -1, self.output_size]) x = tf.reshape(x, [batch_size, -1, self.output_size])
x = tf.nn.bias_add(value=x, bias=self.bias) x = tf.nn.bias_add(value=x, bias=self.bias)
......
...@@ -399,14 +399,12 @@ class TFConvBertModelIntegrationTest(unittest.TestCase): ...@@ -399,14 +399,12 @@ class TFConvBertModelIntegrationTest(unittest.TestCase):
expected_shape = [1, 6, 768] expected_shape = [1, 6, 768]
self.assertEqual(output.shape, expected_shape) self.assertEqual(output.shape, expected_shape)
print(output[:, :3, :3])
expected_slice = tf.constant( expected_slice = tf.constant(
[ [
[ [
[-0.10334751, -0.37152207, -0.2682219], [-0.03475493, -0.4686034, -0.30638832],
[0.20078957, -0.3918426, -0.78811496], [0.22637248, -0.26988646, -0.7423424],
[0.08000169, -0.509474, -0.59314483], [0.10324868, -0.45013508, -0.58280784],
] ]
] ]
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment