Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
34f28b2a
"...resnet50_tensorflow.git" did not exist on "6ddd627a92df83ce1e5cec9af1db8b825c78660b"
Commit
34f28b2a
authored
Sep 08, 2019
by
thomwolf
Browse files
WIP GPT2
parent
ad88563b
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
292 additions
and
76 deletions
+292
-76
pytorch_transformers/modeling_tf_bert.py
pytorch_transformers/modeling_tf_bert.py
+17
-17
pytorch_transformers/modeling_tf_gpt2.py
pytorch_transformers/modeling_tf_gpt2.py
+272
-56
pytorch_transformers/modeling_tf_utils.py
pytorch_transformers/modeling_tf_utils.py
+3
-3
No files found.
pytorch_transformers/modeling_tf_bert.py
View file @
34f28b2a
...
@@ -684,13 +684,13 @@ class TFBertModel(TFBertPreTrainedModel):
...
@@ -684,13 +684,13 @@ class TFBertModel(TFBertPreTrainedModel):
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertModel.from_pretrained('bert-base-uncased')
model = TFBertModel.from_pretrained('bert-base-uncased')
input_ids = tf.
tensor
(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
input_ids = tf.
constant
(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1
outputs = model(input_ids)
outputs = model(input_ids)
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertModel
,
self
).
__init__
(
config
)
super
(
TFBertModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
@
tf
.
function
@
tf
.
function
...
@@ -739,8 +739,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
...
@@ -739,8 +739,8 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
prediction_scores, seq_relationship_scores = outputs[:2]
prediction_scores, seq_relationship_scores = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForPreTraining
,
self
).
__init__
(
config
)
super
(
TFBertForPreTraining
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
...
@@ -790,8 +790,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
...
@@ -790,8 +790,8 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
loss, prediction_scores = outputs[:2]
loss, prediction_scores = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForMaskedLM
,
self
).
__init__
(
config
)
super
(
TFBertForMaskedLM
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
...
@@ -839,8 +839,8 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
...
@@ -839,8 +839,8 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
seq_relationship_scores = outputs[0]
seq_relationship_scores = outputs[0]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForNextSentencePrediction
,
self
).
__init__
(
config
)
super
(
TFBertForNextSentencePrediction
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
...
@@ -891,8 +891,8 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
...
@@ -891,8 +891,8 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
loss, logits = outputs[:2]
loss, logits = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForSequenceClassification
,
self
).
__init__
(
config
)
super
(
TFBertForSequenceClassification
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
...
@@ -984,8 +984,8 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
...
@@ -984,8 +984,8 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
loss, classification_scores = outputs[:2]
loss, classification_scores = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForMultipleChoice
,
self
).
__init__
(
config
)
super
(
TFBertForMultipleChoice
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
...
@@ -1066,8 +1066,8 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
...
@@ -1066,8 +1066,8 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
loss, scores = outputs[:2]
loss, scores = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForTokenClassification
,
self
).
__init__
(
config
)
super
(
TFBertForTokenClassification
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
...
@@ -1128,8 +1128,8 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
...
@@ -1128,8 +1128,8 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
loss, start_scores, end_scores = outputs[:2]
loss, start_scores, end_scores = outputs[:2]
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFBertForQuestionAnswering
,
self
).
__init__
(
config
)
super
(
TFBertForQuestionAnswering
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
...
...
pytorch_transformers/modeling_tf_gpt2.py
View file @
34f28b2a
This diff is collapsed.
Click to expand it.
pytorch_transformers/modeling_tf_utils.py
View file @
34f28b2a
...
@@ -52,7 +52,7 @@ class TFPreTrainedModel(tf.keras.Model):
...
@@ -52,7 +52,7 @@ class TFPreTrainedModel(tf.keras.Model):
base_model_prefix
=
""
base_model_prefix
=
""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
TFPreTrainedModel
,
self
).
__init__
()
super
(
TFPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
if
not
isinstance
(
config
,
PretrainedConfig
):
if
not
isinstance
(
config
,
PretrainedConfig
):
raise
ValueError
(
raise
ValueError
(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
...
@@ -257,11 +257,11 @@ class TFPreTrainedModel(tf.keras.Model):
...
@@ -257,11 +257,11 @@ class TFPreTrainedModel(tf.keras.Model):
return
model
return
model
class
TFConv1D
(
tf
.
keras
.
layers
.
Layer
):
class
TFConv1D
(
tf
.
keras
.
layers
.
Layer
):
def
__init__
(
self
,
nf
,
nx
):
def
__init__
(
self
,
nf
,
nx
,
*
inputs
,
**
kwargs
):
""" TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
""" TFConv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed
Basically works like a Linear layer but the weights are transposed
"""
"""
super
(
TFConv1D
,
self
).
__init__
()
super
(
TFConv1D
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
self
.
nf
=
nf
self
.
nf
=
nf
self
.
nx
=
nx
self
.
nx
=
nx
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment