Commit 64d83c7a authored by thomwolf's avatar thomwolf
Browse files

WIP

parent 01597e5b
This diff is collapsed.
......@@ -255,3 +255,21 @@ class TFPreTrainedModel(tf.keras.Model):
ret = model(inputs, training=False) # Make sure restore ops are run
return model
class TFConv1D(tf.keras.layers.Layer):
def __init__(self, nf, nx):
""" TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed
"""
super(TFConv1D, self).__init__()
self.nf = nf
w = torch.empty(nx, nf)
nn.init.normal_(w, std=0.02)
self.weight = nn.Parameter(w)
self.bias = nn.Parameter(torch.zeros(nf))
def call(self, x):
size_out = t.shape(x)[:-1] + (self.nf,)
x = tf.addmm(self.bias, x.view(-1, x.size(-1)), self.weight)
x = x.view(*size_out)
return x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment