WIP GPT2

34f28b2a · thomwolf · ad88563b · 34f28b2a · 34f28b2a · 34f28b2a
Commit 34f28b2a authored Sep 08, 2019 by thomwolf
3 changed files
--- a/pytorch_transformers/modeling_tf_bert.py
+++ b/pytorch_transformers/modeling_tf_bert.py
@@ -684,13 +684,13 @@ class TFBertModel(TFBertPreTrainedModel):

        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = TFBertModel.from_pretrained('bert-base-uncased')
-        input_ids = tf.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0)  # Batch size 1
+        input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids)
        last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple

    """
-    def __init__(self, config):
-        super(TFBertModel, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertModel, self).__init__(config, *inputs, **kwargs)
        self.bert = TFBertMainLayer(config, name='bert')

    @tf.function
@@ -739,8 +739,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
        prediction_scores, seq_relationship_scores = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForPreTraining, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForPreTraining, self).__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name='bert')
        self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
@@ -790,8 +790,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
        loss, prediction_scores = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForMaskedLM, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForMaskedLM, self).__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name='bert')

@@ -839,8 +839,8 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
        seq_relationship_scores = outputs[0]

    """
-    def __init__(self, config):
-        super(TFBertForNextSentencePrediction, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForNextSentencePrediction, self).__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name='bert')
        self.cls_nsp = TFBertNSPHead(config, name='cls_nsp')
@@ -891,8 +891,8 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
        loss, logits = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForSequenceClassification, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForSequenceClassification, self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.bert = TFBertMainLayer(config, name='bert')
@@ -984,8 +984,8 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
        loss, classification_scores = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForMultipleChoice, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForMultipleChoice, self).__init__(config, *inputs, **kwargs)

        self.bert = TFBertMainLayer(config, name='bert')
        self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
@@ -1066,8 +1066,8 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
        loss, scores = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForTokenClassification, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForTokenClassification, self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.bert = TFBertMainLayer(config, name='bert')
@@ -1128,8 +1128,8 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
        loss, start_scores, end_scores = outputs[:2]

    """
-    def __init__(self, config):
-        super(TFBertForQuestionAnswering, self).__init__(config)
+    def __init__(self, config, *inputs, **kwargs):
+        super(TFBertForQuestionAnswering, self).__init__(config, *inputs, **kwargs)
        self.num_labels = config.num_labels

        self.bert = TFBertMainLayer(config, name='bert')

--- a/pytorch_transformers/modeling_tf_gpt2.py
+++ b/pytorch_transformers/modeling_tf_gpt2.py
--- a/pytorch_transformers/modeling_tf_utils.py
+++ b/pytorch_transformers/modeling_tf_utils.py
@@ -52,7 +52,7 @@ class TFPreTrainedModel(tf.keras.Model):
    base_model_prefix = ""

    def __init__(self, config, *inputs, **kwargs):
-        super(TFPreTrainedModel, self).__init__()
+        super(TFPreTrainedModel, self).__init__(*inputs, **kwargs)
        if not isinstance(config, PretrainedConfig):
            raise ValueError(
                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
@@ -257,11 +257,11 @@ class TFPreTrainedModel(tf.keras.Model):
        return model

 class TFConv1D(tf.keras.layers.Layer):
-    def __init__(self, nf, nx):
+    def __init__(self, nf, nx, *inputs, **kwargs):
        """ TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
            Basically works like a Linear layer but the weights are transposed
        """
-        super(TFConv1D, self).__init__()
+        super(TFConv1D, self).__init__(*inputs, **kwargs)
        self.nf = nf
        self.nx = nx