Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0537139b
Commit
0537139b
authored
Sep 09, 2019
by
thomwolf
Browse files
removing tf.function
parent
33cb00f4
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
36 deletions
+0
-36
pytorch_transformers/modeling_tf_bert.py
pytorch_transformers/modeling_tf_bert.py
+0
-22
pytorch_transformers/modeling_tf_gpt2.py
pytorch_transformers/modeling_tf_gpt2.py
+0
-12
pytorch_transformers/modeling_tf_utils.py
pytorch_transformers/modeling_tf_utils.py
+0
-2
No files found.
pytorch_transformers/modeling_tf_bert.py
View file @
0537139b
...
@@ -164,7 +164,6 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
...
@@ -164,7 +164,6 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
super
(
TFBertEmbeddings
,
self
).
build
(
input_shape
)
super
(
TFBertEmbeddings
,
self
).
build
(
input_shape
)
# @tf.function
def
call
(
self
,
inputs
,
mode
=
"embedding"
,
training
=
False
):
def
call
(
self
,
inputs
,
mode
=
"embedding"
,
training
=
False
):
"""Get token embeddings of inputs.
"""Get token embeddings of inputs.
Args:
Args:
...
@@ -248,7 +247,6 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
...
@@ -248,7 +247,6 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
x
=
tf
.
reshape
(
x
,
(
batch_size
,
-
1
,
self
.
num_attention_heads
,
self
.
attention_head_size
))
x
=
tf
.
reshape
(
x
,
(
batch_size
,
-
1
,
self
.
num_attention_heads
,
self
.
attention_head_size
))
return
tf
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
return
tf
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
@@ -297,7 +295,6 @@ class TFBertSelfOutput(tf.keras.layers.Layer):
...
@@ -297,7 +295,6 @@ class TFBertSelfOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
hidden_states
,
input_tensor
=
inputs
...
@@ -317,7 +314,6 @@ class TFBertAttention(tf.keras.layers.Layer):
...
@@ -317,7 +314,6 @@ class TFBertAttention(tf.keras.layers.Layer):
def
prune_heads
(
self
,
heads
):
def
prune_heads
(
self
,
heads
):
raise
NotImplementedError
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
input_tensor
,
attention_mask
,
head_mask
=
inputs
input_tensor
,
attention_mask
,
head_mask
=
inputs
...
@@ -336,7 +332,6 @@ class TFBertIntermediate(tf.keras.layers.Layer):
...
@@ -336,7 +332,6 @@ class TFBertIntermediate(tf.keras.layers.Layer):
else
:
else
:
self
.
intermediate_act_fn
=
config
.
hidden_act
self
.
intermediate_act_fn
=
config
.
hidden_act
# @tf.function
def
call
(
self
,
hidden_states
):
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
intermediate_act_fn
(
hidden_states
)
hidden_states
=
self
.
intermediate_act_fn
(
hidden_states
)
...
@@ -350,7 +345,6 @@ class TFBertOutput(tf.keras.layers.Layer):
...
@@ -350,7 +345,6 @@ class TFBertOutput(tf.keras.layers.Layer):
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
input_tensor
=
inputs
hidden_states
,
input_tensor
=
inputs
...
@@ -368,7 +362,6 @@ class TFBertLayer(tf.keras.layers.Layer):
...
@@ -368,7 +362,6 @@ class TFBertLayer(tf.keras.layers.Layer):
self
.
intermediate
=
TFBertIntermediate
(
config
,
name
=
'intermediate'
)
self
.
intermediate
=
TFBertIntermediate
(
config
,
name
=
'intermediate'
)
self
.
bert_output
=
TFBertOutput
(
config
,
name
=
'output'
)
self
.
bert_output
=
TFBertOutput
(
config
,
name
=
'output'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
@@ -387,7 +380,6 @@ class TFBertEncoder(tf.keras.layers.Layer):
...
@@ -387,7 +380,6 @@ class TFBertEncoder(tf.keras.layers.Layer):
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
layer
=
[
TFBertLayer
(
config
,
name
=
'layer_{}'
.
format
(
i
))
for
i
in
range
(
config
.
num_hidden_layers
)]
self
.
layer
=
[
TFBertLayer
(
config
,
name
=
'layer_{}'
.
format
(
i
))
for
i
in
range
(
config
.
num_hidden_layers
)]
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
hidden_states
,
attention_mask
,
head_mask
=
inputs
...
@@ -420,7 +412,6 @@ class TFBertPooler(tf.keras.layers.Layer):
...
@@ -420,7 +412,6 @@ class TFBertPooler(tf.keras.layers.Layer):
super
(
TFBertPooler
,
self
).
__init__
(
**
kwargs
)
super
(
TFBertPooler
,
self
).
__init__
(
**
kwargs
)
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
activation
=
'tanh'
,
name
=
'dense'
)
self
.
dense
=
tf
.
keras
.
layers
.
Dense
(
config
.
hidden_size
,
activation
=
'tanh'
,
name
=
'dense'
)
# @tf.function
def
call
(
self
,
hidden_states
):
def
call
(
self
,
hidden_states
):
# We "pool" the model by simply taking the hidden state corresponding
# We "pool" the model by simply taking the hidden state corresponding
# to the first token.
# to the first token.
...
@@ -439,7 +430,6 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
...
@@ -439,7 +430,6 @@ class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
self
.
transform_act_fn
=
config
.
hidden_act
self
.
transform_act_fn
=
config
.
hidden_act
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
self
.
LayerNorm
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_eps
,
name
=
'LayerNorm'
)
# @tf.function
def
call
(
self
,
hidden_states
):
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
transform_act_fn
(
hidden_states
)
hidden_states
=
self
.
transform_act_fn
(
hidden_states
)
...
@@ -463,7 +453,6 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
...
@@ -463,7 +453,6 @@ class TFBertLMPredictionHead(tf.keras.layers.Layer):
trainable
=
True
,
trainable
=
True
,
name
=
'bias'
)
name
=
'bias'
)
# @tf.function
def
call
(
self
,
hidden_states
):
def
call
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
@@ -475,7 +464,6 @@ class TFBertMLMHead(tf.keras.layers.Layer):
...
@@ -475,7 +464,6 @@ class TFBertMLMHead(tf.keras.layers.Layer):
super
(
TFBertMLMHead
,
self
).
__init__
(
**
kwargs
)
super
(
TFBertMLMHead
,
self
).
__init__
(
**
kwargs
)
self
.
predictions
=
TFBertLMPredictionHead
(
config
,
name
=
'predictions'
)
self
.
predictions
=
TFBertLMPredictionHead
(
config
,
name
=
'predictions'
)
# @tf.function
def
call
(
self
,
sequence_output
):
def
call
(
self
,
sequence_output
):
prediction_scores
=
self
.
predictions
(
sequence_output
)
prediction_scores
=
self
.
predictions
(
sequence_output
)
return
prediction_scores
return
prediction_scores
...
@@ -486,7 +474,6 @@ class TFBertNSPHead(tf.keras.layers.Layer):
...
@@ -486,7 +474,6 @@ class TFBertNSPHead(tf.keras.layers.Layer):
super
(
TFBertNSPHead
,
self
).
__init__
(
**
kwargs
)
super
(
TFBertNSPHead
,
self
).
__init__
(
**
kwargs
)
self
.
seq_relationship
=
tf
.
keras
.
layers
.
Dense
(
2
,
name
=
'seq_relationship'
)
self
.
seq_relationship
=
tf
.
keras
.
layers
.
Dense
(
2
,
name
=
'seq_relationship'
)
# @tf.function
def
call
(
self
,
pooled_output
):
def
call
(
self
,
pooled_output
):
seq_relationship_score
=
self
.
seq_relationship
(
pooled_output
)
seq_relationship_score
=
self
.
seq_relationship
(
pooled_output
)
return
seq_relationship_score
return
seq_relationship_score
...
@@ -511,7 +498,6 @@ class TFBertMainLayer(tf.keras.layers.Layer):
...
@@ -511,7 +498,6 @@ class TFBertMainLayer(tf.keras.layers.Layer):
"""
"""
raise
NotImplementedError
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
input_ids
=
inputs
...
@@ -693,7 +679,6 @@ class TFBertModel(TFBertPreTrainedModel):
...
@@ -693,7 +679,6 @@ class TFBertModel(TFBertPreTrainedModel):
super
(
TFBertModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
(
TFBertModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
return
outputs
return
outputs
...
@@ -732,7 +717,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
...
@@ -732,7 +717,6 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
@@ -774,7 +758,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
...
@@ -774,7 +758,6 @@ class TFBertForMaskedLM(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
@@ -818,7 +801,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
...
@@ -818,7 +801,6 @@ class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
self
.
cls_nsp
=
TFBertNSPHead
(
config
,
name
=
'cls_nsp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
@@ -863,7 +845,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
...
@@ -863,7 +845,6 @@ class TFBertForSequenceClassification(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
@@ -912,7 +893,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
...
@@ -912,7 +893,6 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
1
,
name
=
'classifier'
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
1
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
input_ids
=
inputs
...
@@ -989,7 +969,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
...
@@ -989,7 +969,6 @@ class TFBertForTokenClassification(TFBertPreTrainedModel):
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
hidden_dropout_prob
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
self
.
classifier
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'classifier'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
@@ -1040,7 +1019,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
...
@@ -1040,7 +1019,6 @@ class TFBertForQuestionAnswering(TFBertPreTrainedModel):
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
bert
=
TFBertMainLayer
(
config
,
name
=
'bert'
)
self
.
qa_outputs
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'qa_outputs'
)
self
.
qa_outputs
=
tf
.
keras
.
layers
.
Dense
(
config
.
num_labels
,
name
=
'qa_outputs'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
outputs
=
self
.
bert
(
inputs
,
training
=
training
)
...
...
pytorch_transformers/modeling_tf_gpt2.py
View file @
0537139b
...
@@ -143,7 +143,6 @@ class TFAttention(tf.keras.layers.Layer):
...
@@ -143,7 +143,6 @@ class TFAttention(tf.keras.layers.Layer):
pass
pass
@
staticmethod
@
staticmethod
# @tf.function
def
causal_attention_mask
(
nd
,
ns
,
dtype
):
def
causal_attention_mask
(
nd
,
ns
,
dtype
):
"""1's in the lower triangle, counting from the lower right corner.
"""1's in the lower triangle, counting from the lower right corner.
Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs.
Same as tf.matrix_band_part(tf.ones([nd, ns]), -1, ns-nd), but doesn't produce garbage on TPUs.
...
@@ -153,7 +152,6 @@ class TFAttention(tf.keras.layers.Layer):
...
@@ -153,7 +152,6 @@ class TFAttention(tf.keras.layers.Layer):
m
=
i
>=
j
-
ns
+
nd
m
=
i
>=
j
-
ns
+
nd
return
tf
.
cast
(
m
,
dtype
)
return
tf
.
cast
(
m
,
dtype
)
# @tf.function
def
_attn
(
self
,
inputs
,
training
=
False
):
def
_attn
(
self
,
inputs
,
training
=
False
):
q
,
k
,
v
,
attention_mask
,
head_mask
=
inputs
q
,
k
,
v
,
attention_mask
,
head_mask
=
inputs
# q, k, v have shape [batch, heads, sequence, features]
# q, k, v have shape [batch, heads, sequence, features]
...
@@ -185,21 +183,18 @@ class TFAttention(tf.keras.layers.Layer):
...
@@ -185,21 +183,18 @@ class TFAttention(tf.keras.layers.Layer):
outputs
.
append
(
w
)
outputs
.
append
(
w
)
return
outputs
return
outputs
# @tf.function
def
merge_heads
(
self
,
x
):
def
merge_heads
(
self
,
x
):
x
=
tf
.
transpose
(
x
,
[
0
,
2
,
1
,
3
])
x
=
tf
.
transpose
(
x
,
[
0
,
2
,
1
,
3
])
x_shape
=
shape_list
(
x
)
x_shape
=
shape_list
(
x
)
new_x_shape
=
x_shape
[:
-
2
]
+
[
x_shape
[
-
2
]
*
x_shape
[
-
1
]]
new_x_shape
=
x_shape
[:
-
2
]
+
[
x_shape
[
-
2
]
*
x_shape
[
-
1
]]
return
tf
.
reshape
(
x
,
new_x_shape
)
return
tf
.
reshape
(
x
,
new_x_shape
)
# @tf.function
def
split_heads
(
self
,
x
):
def
split_heads
(
self
,
x
):
x_shape
=
shape_list
(
x
)
x_shape
=
shape_list
(
x
)
new_x_shape
=
x_shape
[:
-
1
]
+
[
self
.
n_head
,
x_shape
[
-
1
]
//
self
.
n_head
]
new_x_shape
=
x_shape
[:
-
1
]
+
[
self
.
n_head
,
x_shape
[
-
1
]
//
self
.
n_head
]
x
=
tf
.
reshape
(
x
,
new_x_shape
)
x
=
tf
.
reshape
(
x
,
new_x_shape
)
return
tf
.
transpose
(
x
,
(
0
,
2
,
1
,
3
))
# (batch, head, seq_length, head_features)
return
tf
.
transpose
(
x
,
(
0
,
2
,
1
,
3
))
# (batch, head, seq_length, head_features)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
...
@@ -235,7 +230,6 @@ class TFMLP(tf.keras.layers.Layer):
...
@@ -235,7 +230,6 @@ class TFMLP(tf.keras.layers.Layer):
self
.
act
=
gelu
self
.
act
=
gelu
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
resid_pdrop
)
self
.
dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
resid_pdrop
)
# @tf.function
def
call
(
self
,
x
,
training
=
False
):
def
call
(
self
,
x
,
training
=
False
):
h
=
self
.
act
(
self
.
c_fc
(
x
))
h
=
self
.
act
(
self
.
c_fc
(
x
))
h2
=
self
.
c_proj
(
h
)
h2
=
self
.
c_proj
(
h
)
...
@@ -253,7 +247,6 @@ class TFBlock(tf.keras.layers.Layer):
...
@@ -253,7 +247,6 @@ class TFBlock(tf.keras.layers.Layer):
self
.
ln_2
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_epsilon
,
name
=
'ln_2'
)
self
.
ln_2
=
tf
.
keras
.
layers
.
LayerNormalization
(
epsilon
=
config
.
layer_norm_epsilon
,
name
=
'ln_2'
)
self
.
mlp
=
TFMLP
(
4
*
nx
,
config
,
name
=
'mlp'
)
self
.
mlp
=
TFMLP
(
4
*
nx
,
config
,
name
=
'mlp'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
x
,
layer_past
,
attention_mask
,
head_mask
=
inputs
...
@@ -289,7 +282,6 @@ class TFGPT2Embeddings(tf.keras.layers.Layer):
...
@@ -289,7 +282,6 @@ class TFGPT2Embeddings(tf.keras.layers.Layer):
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
mean
=
0.
,
stddev
=
self
.
hidden_size
**-
0.5
))
super
(
TFGPT2Embeddings
,
self
).
build
(
input_shape
)
super
(
TFGPT2Embeddings
,
self
).
build
(
input_shape
)
# @tf.function
def
call
(
self
,
inputs
,
mode
=
"embedding"
):
def
call
(
self
,
inputs
,
mode
=
"embedding"
):
"""Get token embeddings of inputs.
"""Get token embeddings of inputs.
Args:
Args:
...
@@ -354,7 +346,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
...
@@ -354,7 +346,6 @@ class TFGPT2MainLayer(tf.keras.layers.Layer):
"""
"""
raise
NotImplementedError
raise
NotImplementedError
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
input_ids
=
inputs
...
@@ -568,7 +559,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
...
@@ -568,7 +559,6 @@ class TFGPT2Model(TFGPT2PreTrainedModel):
super
(
TFGPT2Model
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
(
TFGPT2Model
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
return
outputs
return
outputs
...
@@ -610,7 +600,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
...
@@ -610,7 +600,6 @@ class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
super
(
TFGPT2LMHeadModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
super
(
TFGPT2LMHeadModel
,
self
).
__init__
(
config
,
*
inputs
,
**
kwargs
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
self
.
transformer
=
TFGPT2MainLayer
(
config
,
name
=
'transformer'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
transformer_outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
transformer_outputs
=
self
.
transformer
(
inputs
,
training
=
training
)
hidden_states
=
transformer_outputs
[
0
]
hidden_states
=
transformer_outputs
[
0
]
...
@@ -680,7 +669,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
...
@@ -680,7 +669,6 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
name
=
'multiple_choice_head'
)
self
.
multiple_choice_head
=
TFSequenceSummary
(
config
,
name
=
'multiple_choice_head'
)
# @tf.function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
if
not
isinstance
(
inputs
,
(
dict
,
tuple
,
list
)):
input_ids
=
inputs
input_ids
=
inputs
...
...
pytorch_transformers/modeling_tf_utils.py
View file @
0537139b
...
@@ -277,7 +277,6 @@ class TFConv1D(tf.keras.layers.Layer):
...
@@ -277,7 +277,6 @@ class TFConv1D(tf.keras.layers.Layer):
shape
=
[
1
,
self
.
nf
],
shape
=
[
1
,
self
.
nf
],
initializer
=
tf
.
zeros_initializer
())
initializer
=
tf
.
zeros_initializer
())
@
tf
.
function
def
call
(
self
,
x
):
def
call
(
self
,
x
):
bz
,
sl
=
shape_list
(
x
)[:
2
]
bz
,
sl
=
shape_list
(
x
)[:
2
]
...
@@ -334,7 +333,6 @@ class TFSequenceSummary(tf.keras.layers.Layer):
...
@@ -334,7 +333,6 @@ class TFSequenceSummary(tf.keras.layers.Layer):
if
hasattr
(
config
,
'summary_last_dropout'
)
and
config
.
summary_last_dropout
>
0
:
if
hasattr
(
config
,
'summary_last_dropout'
)
and
config
.
summary_last_dropout
>
0
:
self
.
last_dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
summary_last_dropout
)
self
.
last_dropout
=
tf
.
keras
.
layers
.
Dropout
(
config
.
summary_last_dropout
)
@
tf
.
function
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
cls_index: [optional] position of the classification token if summary_type == 'cls_index',
cls_index: [optional] position of the classification token if summary_type == 'cls_index',
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment