Commit 34f28b2a authored by thomwolf's avatar thomwolf
Browse files

WIP GPT2

parent ad88563b
...@@ -684,13 +684,13 @@ class TFBertModel(TFBertPreTrainedModel): ...@@ -684,13 +684,13 @@ class TFBertModel(TFBertPreTrainedModel):
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertModel.from_pretrained('bert-base-uncased') model = TFBertModel.from_pretrained('bert-base-uncased')
input_ids = tf.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids) outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertModel, self).__init__(config) super(TFBertModel, self).__init__(config, *inputs, **kwargs)
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
@tf.function @tf.function
...@@ -739,8 +739,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel): ...@@ -739,8 +739,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
prediction_scores, seq_relationship_scores = outputs[:2] prediction_scores, seq_relationship_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForPreTraining, self).__init__(config) super(TFBertForPreTraining, self).__init__(config, *inputs, **kwargs)
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp') self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
...@@ -790,8 +790,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel): ...@@ -790,8 +790,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
loss, prediction_scores = outputs[:2] loss, prediction_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForMaskedLM, self).__init__(config) super(TFBertForMaskedLM, self).__init__(config, *inputs, **kwargs)
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
...@@ -839,8 +839,8 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel): ...@@ -839,8 +839,8 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
seq_relationship_scores = outputs[0] seq_relationship_scores = outputs[0]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForNextSentencePrediction, self).__init__(config) super(TFBertForNextSentencePrediction, self).__init__(config, *inputs, **kwargs)
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
self.cls_nsp = TFBertNSPHead(config, name='cls_nsp') self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
...@@ -891,8 +891,8 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel): ...@@ -891,8 +891,8 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
loss, logits = outputs[:2] loss, logits = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForSequenceClassification, self).__init__(config) super(TFBertForSequenceClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
...@@ -984,8 +984,8 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel): ...@@ -984,8 +984,8 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
loss, classification_scores = outputs[:2] loss, classification_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForMultipleChoice, self).__init__(config) super(TFBertForMultipleChoice, self).__init__(config, *inputs, **kwargs)
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob) self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
...@@ -1066,8 +1066,8 @@ class TFBertForTokenClassification(TFBertPreTrainedModel): ...@@ -1066,8 +1066,8 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
loss, scores = outputs[:2] loss, scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForTokenClassification, self).__init__(config) super(TFBertForTokenClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
...@@ -1128,8 +1128,8 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel): ...@@ -1128,8 +1128,8 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
loss, start_scores, end_scores = outputs[:2] loss, start_scores, end_scores = outputs[:2]
""" """
def __init__(self, config): def __init__(self, config, *inputs, **kwargs):
super(TFBertForQuestionAnswering, self).__init__(config) super(TFBertForQuestionAnswering, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, name='bert') self.bert = TFBertMainLayer(config, name='bert')
......
This diff is collapsed.
...@@ -52,7 +52,7 @@ class TFPreTrainedModel(tf.keras.Model): ...@@ -52,7 +52,7 @@ class TFPreTrainedModel(tf.keras.Model):
base_model_prefix = "" base_model_prefix = ""
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super(TFPreTrainedModel, self).__init__() super(TFPreTrainedModel, self).__init__(*inputs, **kwargs)
if not isinstance(config, PretrainedConfig): if not isinstance(config, PretrainedConfig):
raise ValueError( raise ValueError(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
...@@ -257,11 +257,11 @@ class TFPreTrainedModel(tf.keras.Model): ...@@ -257,11 +257,11 @@ class TFPreTrainedModel(tf.keras.Model):
return model return model
class TFConv1D(tf.keras.layers.Layer): class TFConv1D(tf.keras.layers.Layer):
def __init__(self, nf, nx): def __init__(self, nf, nx, *inputs, **kwargs):
""" TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2) """ TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed Basically works like a Linear layer but the weights are transposed
""" """
super(TFConv1D, self).__init__() super(TFConv1D, self).__init__(*inputs, **kwargs)
self.nf = nf self.nf = nf
self.nx = nx self.nx = nx
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment